##// END OF EJS Templates
v2.9.2 :: Update 'insecure' option
eynilupu -
r11:67c74119d8e4
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,935 +1,935
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 5 import sys
6 6 import platform
7 7 import os
8 8 import tempfile
9 9 import shutil
10 10 import zipfile
11 11 import concurrent.futures
12 12 import requests
13 13 import json
14 14 #import pathlib
15 15 import uuid
16 16
17 17 if sys.version_info.major == 3:
18 18 from urllib.parse import urlparse
19 19 else:
20 20 import urlparse
21 21
22 22 class JROAPI():
23 23 """
24 24 FINALIDAD:
25 25 Script para administrar y obtener la data del repositorio por medio de APIs.
26 26
27 27 REQUISITIOS PREVIOS:
28 28 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
29 29 - Paso 2: Instalar lo siguiente como admininstrador:
30 30 En Python 2
31 31 - pip install ckanapi==4.5
32 32 - pip install requests
33 33 - pip install futures
34 34 - pip install tqdm
35 35 En Python > 3
36 36 - pip3 install ckanapi==4.5
37 37 - pip3 install requests
38 38 - pip3 install tqdm
39 39
40 40 FUNCIONES DISPONIBLES:
41 41 - action
42 42 - upload_file
43 43 - upload_multiple_files
44 44 - upload_multiple_files_advance
45 45 - show
46 46 - search
47 47 - create
48 48 - patch
49 49 - delete
50 50 - download_files
51 51
52 52 EJEMPLOS:
53 53 #1:
54 54 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
55 55 ... some operation(s) ...
56 56 #2:
57 57 <access_name> = JROAPI('http://example.com', Authorization='#########')
58 58 ... some operation(s) ...
59 59 <access_name>.ckan.close()
60 60
61 61 REPORTAR ALGUN PROBLEMA:
62 62 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
63 63 1) Correo para contactarlo
64 64 2) Descripcion del problema
65 65 3) ¿En que paso o seccion encontro el problema?
66 66 4) ¿Cual era el resultado que usted esperaba?
67 67 """
68 def __init__(self, url, Authorization=None):
69 #-------- Insecure -------#
70 self.verify = None
71 session = None
72 if urlparse(url).scheme == 'https':
68 def __init__(self, url, Authorization=None, secure=True):
69 #-------- Check Secure -------#
70 self.verify = secure
71 if not secure and isinstance(secure, bool):
73 72 session = requests.Session()
74 73 session.verify = False
75 self.verify = False
76 #--------------------------#
74 else:
75 session = None
76 #------------------------------#
77 77 self.url = url
78 78 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
79 79 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
80 80 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
81 81 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
82 82 self.Authorization = Authorization
83 83 # Change for --> self.separator = os.sep
84 84 if platform.system() == 'Windows':
85 85 self.separator = '\\'
86 86 else:
87 87 self.separator = '/'
88 88
89 89 self.chunk_size = 1024
90 90 self.list = []
91 91 self.dict = {}
92 92 self.str = ''
93 93 self.check = 1
94 94 self.cont = 0
95 95
96 96 def __enter__(self):
97 97 return self
98 98
99 99 def __exit__(self, *args):
100 100 self.ckan.close()
101 101
102 102 def action(self, action, **kwargs):
103 103 """
104 104 FINALIDAD:
105 105 Funcion para llamar a las APIs disponibles
106 106
107 107 APIs DISPONIBLES:
108 108 CONSULTAR: "GUIA DE SCRIPT.pdf"
109 109
110 110 EJEMPLO:
111 111 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
112 112 """
113 113 #--------------- CASE: PACKAGE SEARCH ---------------#
114 114 if kwargs is not None:
115 115 if action == 'package_search':
116 116 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
117 117 for facet in self.list:
118 118 if facet in kwargs:
119 119 kwargs[facet.replace('_', '.')] = kwargs[facet]
120 120 kwargs.pop(facet)
121 121 #----------------------------------------------------#
122 122 try:
123 123 return getattr(self.ckan.action, action)(**kwargs)
124 124 except:
125 125 _, exc_value, _ = sys.exc_info()
126 126 return exc_value
127 127
128 128 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
129 129 # Agregar si es interruptido por teclado
130 130 '''
131 131 FINALIDAD:
132 132 Funcion para subir un unico archivo al repositorio del ROJ.
133 133
134 134 PARAMETROS DISPONIBLES:
135 135 CONSULTAR: "GUIA DE SCRIPT.pdf"
136 136
137 137 ESTRUCTURA:
138 138 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
139 139 '''
140 140 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
141 141 for key1, value1 in kwargs.items():
142 142 if not key1 in self.list:
143 143 self.dict[key1] = value1
144 144
145 145 #---------------------------#
146 146 if not 'others' in kwargs:
147 147 self.dict['others'] = ''
148 148 else:
149 149 if isinstance(kwargs['others'], list):
150 150 self.dict['others'] = json.dumps(kwargs['others'])
151 151 #---------------------------#
152 152
153 153 if not os.path.isfile(file_path):
154 154 return 'File "%s" not exist' % (file_path)
155 155
156 156 #if not 'format' in self.dict:
157 157 # self.str = ''.join(pathlib.Path(file_path).suffixes)
158 158 # if len(self.str) > 0:
159 159 # self.dict['format'] = self.str.upper()[1:]
160 160
161 161 #-------------------------PACKAGE SHOW-----------------------#
162 162 try:
163 163 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
164 164 except:
165 165 _, exc_value, _ = sys.exc_info()
166 166 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
167 167 return exc_value
168 168
169 169 resources_name = []
170 170 for u in dataset_show:
171 171 resources_name.append(u['name'].lower())
172 172
173 173 if os.path.basename(file_path).lower() in resources_name:
174 174 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(file_path))
175 175 #------------------------------------------------------------#
176 176
177 177 try:
178 178 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
179 179 except:
180 180 _, exc_value, _ = sys.exc_info()
181 181 return exc_value
182 182
183 183 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
184 184 # Agregar si es interruptido por teclado
185 185 '''
186 186 FINALIDAD:
187 187 Funcion para subir multiples archivos al repositorio del ROJ.
188 188
189 189 PARAMETROS DISPONIBLES:
190 190 CONSULTAR: "GUIA DE SCRIPT.pdf"
191 191
192 192 ESTRUCTURA:
193 193 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
194 194 '''
195 195 #-------------------------PACKAGE SHOW-----------------------#
196 196 try:
197 197 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
198 198 except:
199 199 _, exc_value, _ = sys.exc_info()
200 200 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
201 201 return exc_value
202 202 #------------------------------------------------------------#
203 203 resources_name = []
204 204 for u in dataset_show:
205 205 resources_name.append(u['name'].lower())
206 206 #------------------------------------------------------------#
207 207 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
208 208 for key1, value1 in kwargs.items():
209 209 if not key1 in self.list:
210 210 self.dict[key1] = value1
211 211 #------------------------------------------------------------#
212 212 if not 'others' in kwargs:
213 213 self.dict['others'] = ''
214 214 else:
215 215 if isinstance(kwargs['others'], list):
216 216 self.dict['others'] = json.dumps(kwargs['others'])
217 217 #------------------------------------------------------------#
218 218 total_list = []
219 219 #---------------CASO : "path" or "path_list"-----------------#
220 220 if type(path_files) is list:
221 221 if len(path_files) != 0:
222 222 path_files.sort()
223 223 for u in path_files:
224 224 if os.path.isfile(u):
225 225 if os.path.basename(u).lower() in resources_name:
226 226 if not ignore_repetition:
227 227 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
228 228 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
229 229 else:
230 230 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
231 231 else:
232 232 return 'File "%s" does not exist' % (u)
233 233 else:
234 234 return 'ERROR:: "path_list is empty"'
235 235
236 236 elif type(path_files) is str:
237 237 if os.path.isdir(path_files):
238 238 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
239 239 path_order.sort()
240 240 if path_order:
241 241 for name in path_order:
242 242 if name.lower() in resources_name:
243 243 if not ignore_repetition:
244 244 return 'ERROR:: "%s" file already exist in this dataset' % (name)
245 245 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
246 246 else:
247 247 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
248 248 else:
249 249 return "ERROR:: There aren't files in this directory"
250 250 else:
251 251 return 'ERROR:: Directory "%s" does not exist' % (path_files)
252 252 else:
253 253 return 'ERROR:: "path_files" must be a str or list'
254 254 #------------------------------------------------------------#
255 255 try:
256 256 uuid.UUID(str(dataset_id), version=4)
257 257 package_id_or_name = '"id": "' + str(dataset_id) + '"'
258 258 except ValueError:
259 259 package_id_or_name = '"name": "' + str(dataset_id) + '"'
260 260 #------------------------------------------------------------#
261 261 blocks = [[]]
262 262 size_file = 0
263 263 count_file = 0
264 264 inter_num = 0
265 265 for value in total_list:
266 266 if value['size'] > 1024 * 1024 * float(max_size):
267 267 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
268 268 if not 1 <= int(max_count) <= 999:
269 269 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
270 270
271 271 size_file = size_file + value['size']
272 272 count_file = count_file + 1
273 273 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
274 274 del value['size']
275 275 blocks[inter_num].append(value)
276 276 else:
277 277 inter_num = inter_num + 1
278 278 size_file = value['size']
279 279 count_file = 1
280 280 blocks.append([])
281 281 del value['size']
282 282 blocks[inter_num].append(value)
283 283 #------------------------------------------------------------#
284 284 if len(blocks[0]) > 0:
285 285 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
286 286 for count1, block in enumerate(blocks):
287 287 print('---- BLOCK N°{} ----'.format(count1 + 1))
288 288 resource_extend = []
289 289 files_dict = {}
290 290 for count2, value2 in enumerate(block):
291 291 value2['file_date'] = file_date
292 292 value2['voc_file_type'] = file_type
293 293 value2.update(self.dict)
294 294
295 295 #if not 'format' in value2:
296 296 # format = ''.join(pathlib.Path(value2['name']).suffixes)
297 297 # if len(format) > 0:
298 298 # value2['format'] = format.upper()[1:]
299 299
300 300 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
301 301 del value2['upload']
302 302 resource_extend.append(value2)
303 303
304 304 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
305 305 try:
306 306 result = self.ckan.call_action(
307 307 'package_revise',
308 308 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
309 309 files=files_dict
310 310 )
311 311 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
312 312 if len(blocks) == count1 + 1:
313 313 return result
314 314 except:
315 315 print('ERROR :: Use the "print" for more information')
316 316 _, exc_value, _ = sys.exc_info()
317 317 return exc_value
318 318 else:
319 319 return "ERROR:: No file(s) found to upload"
320 320
321 321 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
322 322 # Agregar si es interruptido por teclado
323 323 '''
324 324 FINALIDAD:
325 325 Funcion para subir multiples archivos al repositorio del ROJ.
326 326
327 327 PARAMETROS DISPONIBLES:
328 328 CONSULTAR: "GUIA DE SCRIPT.pdf"
329 329
330 330 ESTRUCTURA:
331 331 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
332 332 '''
333 333 #-------------------------PACKAGE SHOW-----------------------#
334 334 try:
335 335 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
336 336 except:
337 337 _, exc_value, _ = sys.exc_info()
338 338 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
339 339 return exc_value
340 340 #------------------------------------------------------------#
341 341 resources_name = []
342 342 for u in dataset_show:
343 343 resources_name.append(u['name'].lower())
344 344 #------------------------------------------------------------#
345 345
346 346 params_dict = {'upload':[], 'name':[]}
347 347 #if not 'format' in kwargs:
348 348 # params_dict.update({'format':[]})
349 349 #---------------CASO : "path" or "path_list"-----------------#
350 350 if type(path_files) is list:
351 351 if len(path_files) != 0:
352 352 path_files.sort()
353 353 for u in path_files:
354 354 if os.path.isfile(u):
355 355 if os.path.basename(u).lower() in resources_name:
356 356 if not ignore_repetition:
357 357 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
358 358 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
359 359 else:
360 360 params_dict['upload'].append(open(u, 'rb'))
361 361 params_dict['name'].append(os.path.basename(u))
362 362 #if not 'format' in kwargs:
363 363 # format = ''.join(pathlib.Path(u).suffixes)
364 364 # if len(format) > 0:
365 365 # params_dict['format'].append(format.upper()[1:])
366 366 # else:
367 367 # params_dict['format'].append('')
368 368 else:
369 369 return 'File "%s" does not exist' % (u)
370 370 else:
371 371 return 'ERROR:: "path_list is empty"'
372 372 elif type(path_files) is str:
373 373 if os.path.isdir(path_files):
374 374 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
375 375 path_order.sort()
376 376 if path_order:
377 377 for name in path_order:
378 378 if name.lower() in resources_name:
379 379 if not ignore_repetition:
380 380 return 'ERROR:: "%s" file already exist in this dataset' % (name)
381 381 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
382 382 else:
383 383 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
384 384 params_dict['name'].append(name)
385 385 #if not 'format' in kwargs:
386 386 # format = ''.join(pathlib.Path(name).suffixes)
387 387 # if len(format) > 0:
388 388 # params_dict['format'].append(format.upper()[1:])
389 389 # else:
390 390 # params_dict['format'].append('')
391 391 else:
392 392 return "ERROR:: There aren't files in this directory"
393 393 else:
394 394 return 'ERROR:: Directory "%s" does not exist' % (path_files)
395 395 else:
396 396 return 'ERROR:: "path_files" must be a str or list'
397 397 #------------------------------------------------------------#
398 398 params_no_dict = {'package_id': dataset_id}
399 399 if type(date_files) is list:
400 400 params_dict['file_date'] = date_files
401 401 else:
402 402 params_no_dict['file_date'] = date_files
403 403
404 404 if type(type_files) is list:
405 405 params_dict['voc_file_type'] = type_files
406 406 else:
407 407 params_no_dict['voc_file_type'] = type_files
408 408
409 409 for key1, value1 in kwargs.items():
410 410 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
411 411 if type(value1) is list:
412 412 params_dict[key1] = value1
413 413 else:
414 414 params_no_dict[key1] = value1
415 415 #------------------------------------------#
416 416 if not 'others' in kwargs:
417 417 params_no_dict['others'] = ''
418 418 else:
419 419 if isinstance(kwargs['others'], tuple):
420 420 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
421 421 elif isinstance(kwargs['others'], list):
422 422 params_no_dict['others'] = json.dumps(kwargs['others'])
423 423 elif isinstance(kwargs['others'], str):
424 424 params_no_dict['others'] = kwargs['others']
425 425 else:
426 426 return 'ERROR:: "others" must be a tuple, list or str'
427 427 #------------------------------------------#
428 428 len_params_dict = []
429 429 for value2 in params_dict.values():
430 430 len_params_dict.append(len(value2))
431 431
432 432 if len(list(set(len_params_dict))) > 1:
433 433 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
434 434 #------------------------------------------------------------#
435 435 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
436 436 for v in range(len(params_dict['name'])):
437 437 try:
438 438 send = {}
439 439 for key_dict, value_dict in params_dict.items():
440 440 send[key_dict] = value_dict[v]
441 441 for key_no_dict, value_no_dict in params_no_dict.items():
442 442 send[key_no_dict] = value_no_dict
443 443
444 444 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
445 445 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
446 446 except:
447 447 _, exc_value, _ = sys.exc_info()
448 448 self.list.append(exc_value)
449 449 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
450 450 return self.list
451 451 #------------------------------------------------------------#
452 452
453 453 def show(self, type_option, id, **kwargs):
454 454 '''
455 455 FINALIDAD:
456 456 Funcion personalizada para una busqueda en especifico.
457 457
458 458 PARAMETROS DISPONIBLES:
459 459 CONSULTAR: "GUIA DE SCRIPT.pdf"
460 460
461 461 ESTRUCTURA:
462 462 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
463 463 '''
464 464 if type(type_option) is str:
465 465 try:
466 466 if type_option == 'dataset':
467 467 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
468 468 elif type_option == 'resource':
469 469 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
470 470 elif type_option == 'project':
471 471 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
472 472 elif type_option == 'collaborator':
473 473 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
474 474 elif type_option == 'member':
475 475 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
476 476 elif type_option == 'vocabulary':
477 477 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
478 478 elif type_option == 'tag':
479 479 if not 'vocabulary_id' in kwargs:
480 480 print('Missing "vocabulary_id" value: assume it is a free tag')
481 481 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
482 482 elif type_option == 'user':
483 483 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
484 484 elif type_option == 'job':
485 485 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
486 486 else:
487 487 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
488 488 except:
489 489 _, exc_value, _ = sys.exc_info()
490 490 return exc_value
491 491 else:
492 492 return 'ERROR:: "type_option" must be a str'
493 493
494 494 def search(self, type_option, query=None, **kwargs):
495 495 '''
496 496 FINALIDAD:
497 497 Funcion personalizada para busquedas que satisfagan algun criterio.
498 498
499 499 PARAMETROS DISPONIBLES:
500 500 CONSULTAR: "GUIA DE SCRIPT.pdf"
501 501
502 502 ESTRUCTURA:
503 503 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
504 504 '''
505 505 if type(type_option) is str:
506 506 try:
507 507 if type_option == 'dataset':
508 508 key_replace = ['fq', 'fq_list', 'include_private']
509 509 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
510 510 for key1, value1 in kwargs.items():
511 511 if not key1 in key_replace:
512 512 if key1 in key_point:
513 513 self.dict[key1.replace('_', '.')] = value1
514 514 else:
515 515 self.dict[key1] = value1
516 516
517 517 if query is not None:
518 518 if type(query) is dict:
519 519 self.dict['fq_list'] = []
520 520 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
521 521 #----------------------------------------------------#
522 522 if 'dataset_start_date' in query:
523 523 if type(query['dataset_start_date']) is str:
524 524 try:
525 525 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
526 526 if len(query['dataset_start_date']) != 10:
527 527 return '"dataset_start_date", must be: <YYYY-MM-DD>'
528 528 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
529 529 self.list.append('dataset_start_date')
530 530 except:
531 531 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
532 532 else:
533 533 return '"dataset_start_date" must be <str>'
534 534 #----------------------------------------------------#
535 535 if 'dataset_end_date' in query:
536 536 if type(query['dataset_end_date']) is str:
537 537 try:
538 538 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
539 539 if len(query['dataset_end_date']) != 10:
540 540 return '"dataset_end_date", must be: <YYYY-MM-DD>'
541 541
542 542 if 'dataset_start_date' in query:
543 543 if query['dataset_start_date'] > query['dataset_end_date']:
544 544 return '"dataset_end_date" must be greater than "dataset_start_date"'
545 545
546 546 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
547 547 self.list.append('dataset_end_date')
548 548 except:
549 549 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
550 550 else:
551 551 return '"dataset_end_date" must be <str>'
552 552 #----------------------------------------------------#
553 553 for key, value in query.items():
554 554 if value is not None and not key in self.list:
555 555 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
556 556 else:
557 557 return '"query" must be <dict>'
558 558
559 559 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
560 560
561 561 elif type_option == 'resource':
562 562 for key1, value1 in kwargs.items():
563 563 if key1 != 'fields':
564 564 self.dict[key1] = value1
565 565
566 566 if query is not None:
567 567 if type(query) is dict:
568 568 #----------------------------------------------------#
569 569 if 'file_date_min' in query:
570 570 if type(query['file_date_min']) is str:
571 571 try:
572 572 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
573 573 if len(query['file_date_min']) != 10:
574 574 return '"file_date_min", must be: <YYYY-MM-DD>'
575 575 except:
576 576 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
577 577 else:
578 578 return '"file_date_min" must be <str>'
579 579 #----------------------------------------------------#
580 580 if 'file_date_max' in query:
581 581 if type(query['file_date_max']) is str:
582 582 try:
583 583 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
584 584 if len(query['file_date_max']) != 10:
585 585 return '"file_date_max", must be: <YYYY-MM-DD>'
586 586
587 587 if 'file_date_min' in query:
588 588 if query['file_date_min'] > query['file_date_max']:
589 589 return '"file_date_max" must be greater than "file_date_min"'
590 590 except:
591 591 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
592 592 else:
593 593 return '"file_date_max" must be <str>'
594 594 #----------------------------------------------------#
595 595 self.dict['query'] = query
596 596 else:
597 597 return '"query" must be <dict>'
598 598 return getattr(self.ckan.action, 'resources_search')(**self.dict)
599 599
600 600 elif type_option == 'tag':
601 601 for key1, value1 in kwargs.items():
602 602 if key1 != 'fields':
603 603 self.dict[key1] = value1
604 604
605 605 if not 'vocabulary_id' in kwargs:
606 606 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
607 607 else:
608 608 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
609 609
610 610 if query is not None:
611 611 if type(query) is dict:
612 612 if 'search' in query:
613 613 if type(query['search']) is list or type(query['search']) is str:
614 614 self.dict['query'] = query['search']
615 615 else:
616 616 return '"search" must be <list> or <str>'
617 617 else:
618 618 return '"query" must be <dict>'
619 619 return getattr(self.ckan.action, 'tag_search')(**self.dict)
620 620
621 621 else:
622 622 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
623 623
624 624 except:
625 625 _, exc_value, _ = sys.exc_info()
626 626 return exc_value
627 627 else:
628 628 return 'ERROR:: "type_option" must be <str>'
629 629
630 630 def create(self, type_option, select=None, **kwargs):
631 631 '''
632 632 FINALIDAD:
633 633 Funcion personalizada para crear.
634 634
635 635 PARAMETROS DISPONIBLES:
636 636 CONSULTAR: "GUIA DE SCRIPT.pdf"
637 637
638 638 ESTRUCTURA:
639 639 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
640 640 '''
641 641 if type(type_option) is str:
642 642 try:
643 643 if type_option == 'dataset':
644 644 return getattr(self.ckan.action, 'package_create')(**kwargs)
645 645 elif type_option == 'project':
646 646 return getattr(self.ckan.action, 'organization_create')(**kwargs)
647 647 elif type_option == 'member':
648 648 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
649 649 elif type_option == 'collaborator':
650 650 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
651 651 elif type_option == 'vocabulary':
652 652 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
653 653 elif type_option == 'tag':
654 654 return getattr(self.ckan.action, 'tag_create')(**kwargs)
655 655 elif type_option == 'user':
656 656 return getattr(self.ckan.action, 'user_create')(**kwargs)
657 657 elif type_option == 'views':
658 658 if 'resource' == select:
659 659 self.list = ['package']
660 660 for key1, value1 in kwargs.items():
661 661 if not key1 in self.list:
662 662 self.dict[key1] = value1
663 663 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
664 664 elif 'dataset' == select:
665 665 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
666 666 else:
667 667 return 'ERROR:: "select = %s" is not accepted' % (select)
668 668 else:
669 669 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
670 670 except:
671 671 _, exc_value, _ = sys.exc_info()
672 672 return exc_value
673 673 else:
674 674 return 'ERROR:: "type_option" must be <str>'
675 675
676 676 def patch(self, type_option, **kwargs):
677 677 '''
678 678 FINALIDAD:
679 679 Funciones personalizadas para actualizar
680 680
681 681 PARAMETROS DISPONIBLES:
682 682 CONSULTAR: "GUIA DE SCRIPT.pdf"
683 683
684 684 ESTRUCTURA:
685 685 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
686 686 '''
687 687 if type(type_option) is str:
688 688 try:
689 689 if type_option == 'dataset':
690 690 return getattr(self.ckan.action, 'package_patch')(**kwargs)
691 691 elif type_option == 'project':
692 692 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
693 693 elif type_option == 'resource':
694 694 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
695 695 elif type_option == 'member':
696 696 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
697 697 elif type_option == 'collaborator':
698 698 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
699 699 else:
700 700 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
701 701 except:
702 702 _, exc_value, _ = sys.exc_info()
703 703 return exc_value
704 704 else:
705 705 return 'ERROR:: "type_option" must be <str>'
706 706
707 707 def delete(self, type_option, select=None, **kwargs):
708 708 '''
709 709 FINALIDAD:
710 710 Función personalizada para eliminar y/o purgar.
711 711
712 712 PARAMETROS DISPONIBLES:
713 713 CONSULTAR: "GUIA DE SCRIPT.pdf"
714 714
715 715 ESTRUCTURA:
716 716 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
717 717 '''
718 718 if type(type_option) is str:
719 719 try:
720 720 if type_option == 'dataset':
721 721 if select is None:
722 722 return 'ERROR:: "select" must not be "None"'
723 723 else:
724 724 if 'delete' == select:
725 725 return getattr(self.ckan.action, 'package_delete')(**kwargs)
726 726 elif 'purge' == select:
727 727 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
728 728 else:
729 729 return 'ERROR:: "select = %s" is not accepted' % (select)
730 730 elif type_option == 'project':
731 731 if select is None:
732 732 return 'ERROR:: "select" must not be "None"'
733 733 else:
734 734 if 'delete' == select:
735 735 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
736 736 elif 'purge' == select:
737 737 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
738 738 else:
739 739 return 'ERROR:: "select = %s" is not accepted' % (select)
740 740 elif type_option == 'resource':
741 741 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
742 742 elif type_option == 'vocabulary':
743 743 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
744 744 elif type_option == 'tag':
745 745 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
746 746 elif type_option == 'user':
747 747 return getattr(self.ckan.action, 'user_delete')(**kwargs)
748 748 else:
749 749 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
750 750 except:
751 751 _, exc_value, _ = sys.exc_info()
752 752 return exc_value
753 753 else:
754 754 return 'ERROR:: "type_option" must be <str>'
755 755
756 756 def f_status_note(self, total, result, path):
757 757 file_txt = open(path+'status_note.txt', 'w')
758 758 file_txt = open(path+'status_note.txt', 'a')
759 759
760 760 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
761 761 file_txt.write(''+ os.linesep)
762 762 for u in result['name']:
763 763 file_txt.write(' - '+ u + os.linesep)
764 764 file_txt.write(''+ os.linesep)
765 765
766 766 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
767 767 file_txt.write(''+ os.linesep)
768 768 if len(total['name'])-len(result['name']) != 0:
769 769 for u in total['name']:
770 770 if not u in result['name']:
771 771 file_txt.write(' - '+ u + os.linesep)
772 772 else:
773 773 file_txt.write(' "None"'+ os.linesep)
774 774
775 775 def f_name(self, name_dataset, ext, tempdir):
776 776 while self.check:
777 777 self.str = ''
778 778 if self.cont == 0:
779 779 if os.path.exists(tempdir + name_dataset + ext):
780 780 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
781 781 else:
782 782 self.check = self.check * 0
783 783 self.str = name_dataset + ext
784 784 else:
785 785 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
786 786 self.check = self.check * 0
787 787 self.str = name_dataset+'('+str(self.cont)+')'+ ext
788 788 self.cont = self.cont+1
789 789 return self.str
790 790
791 791 def f_zipdir(self, path, ziph, zip_name):
792 792 for root, _, files in os.walk(path):
793 793 print('.....')
794 794 print('Creating: "{}" >>'.format(zip_name))
795 795 for __file in tqdm(iterable=files, total=len(files)):
796 796 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
797 797 ziph.write(os.path.join(root, __file), new_dir)
798 798 print('Created >>')
799 799
800 800 def download_by_step(self, response, tempdir_name):
801 801 try:
802 802 # ---------- REPLACE URL --------- #
803 803 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
804 804 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
805 805 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
806 806 #----------------------------------#
807 807 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
808 808 if resp.status_code == 200:
809 809 with open(tempdir_name+response['name'], 'wb') as file:
810 810 for chunk in resp.iter_content(chunk_size = self.chunk_size):
811 811 if chunk:
812 812 file.write(chunk)
813 813 except requests.exceptions.RequestException:
814 814 pass
815 815
816 816 def download_files(self, **kwargs):
817 817 '''
818 818 FINALIDAD:
819 819 Funcion personalizada para la descarga de archivos existentes de un dataset.
820 820
821 821 PARAMETROS DISPONIBLES:
822 822 CONSULTAR: "GUIA DE SCRIPT.pdf"
823 823
824 824 ESTRUCTURA:
825 825 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
826 826 '''
827 827 dict_local = {}
828 828 #----------------------------------------------#
829 829 if 'zip' in kwargs:
830 830 if type(kwargs['zip']) is not bool:
831 831 return 'ERROR:: "zip" must be: <class "bool">'
832 832 else:
833 833 dict_local['zip'] = kwargs['zip']
834 834 else:
835 835 dict_local['zip'] = False
836 836 #----------------------------------------------#
837 837 if 'status_note' in kwargs:
838 838 if type(kwargs['status_note']) is not bool:
839 839 return 'ERROR:: "status_note" must be: <class "bool">'
840 840 else:
841 841 dict_local['status_note'] = kwargs['status_note']
842 842 else:
843 843 dict_local['status_note'] = False
844 844 #----------------------------------------------#
845 845 if 'path' in kwargs:
846 846 if type(kwargs['path']) is str:
847 847 if os.path.isdir(kwargs['path']) == False:
848 848 return 'ERROR:: "path" does not exist'
849 849 else:
850 850 if kwargs['path'][-1:] != self.separator:
851 851 dict_local['path'] = kwargs['path']+self.separator
852 852 else:
853 853 dict_local['path'] = kwargs['path']
854 854
855 855 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
856 856 if int(platform.python_version()[0]) == 3:
857 857 try:
858 858 file_txt = open(txt, 'w')
859 859 file_txt.close()
860 860 os.remove(txt)
861 861 except PermissionError:
862 862 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
863 863 else:
864 864 try:
865 865 file_txt = open(txt, 'w')
866 866 file_txt.close()
867 867 os.remove(txt)
868 868 except:
869 869 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
870 870 else:
871 871 return 'ERROR:: "path" must be: <class "str">'
872 872 else:
873 873 dict_local['path'] = ''
874 874 #----------------------------------------------#
875 875 for key, value in kwargs.items():
876 876 if not key in dict_local:
877 877 self.dict[key] = value
878 878 try:
879 879 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
880 880 except:
881 881 _, exc_value, _ = sys.exc_info()
882 882 return exc_value
883 883
884 884 if len(response) != 0:
885 885 #--------------TEMP PATH---------------#
886 886 if dict_local['zip']:
887 887 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
888 888 os.mkdir(tempdir+kwargs['id'])
889 889 dir_name = tempdir + kwargs['id'] + self.separator
890 890 else:
891 891 dir = self.f_name(kwargs['id'], '', dict_local['path'])
892 892 os.mkdir(dict_local['path'] + dir)
893 893 dir_name = dict_local['path'] + dir + self.separator
894 894 #-----------DOWNLOAD FILES-------------#
895 895 print('.....')
896 896 print('Downloading "{}" file(s) >>'.format(len(response)))
897 897 name_total = {'name': []}
898 898 with concurrent.futures.ThreadPoolExecutor() as executor:
899 899 for u in tqdm(iterable=response, total=len(response)):
900 900 name_total['name'].append(u['name'])
901 901 executor.submit(self.download_by_step, u, dir_name)
902 902 name_check = {}
903 903 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
904 904 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
905 905 #--------------------------------------#
906 906 if len(name_check['name']) != 0:
907 907 #----------Status Note---------#
908 908 if dict_local['status_note']:
909 909 print('.....')
910 910 print('Creating: "status_note.txt" >>')
911 911 self.f_status_note(name_total, name_check, dir_name)
912 912 print('Created>>')
913 913 #----------ZIP CREATE----------#
914 914 if dict_local['zip']:
915 915 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
916 916 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
917 917 self.f_zipdir(dir_name, ziph, zip_name)
918 918 ziph.close()
919 919 #Delete Temporal Path
920 920 if os.path.exists(tempdir[:-1]):
921 921 shutil.rmtree(tempdir[:-1])
922 922 #------------------------------#
923 923 print('.....')
924 924 return 'DOWNLOAD FINISHED'
925 925 else:
926 926 #Delete Temporal Path
927 927 if dict_local['zip']:
928 928 if os.path.exists(tempdir[:-1]):
929 929 shutil.rmtree(tempdir[:-1])
930 930 else:
931 931 if os.path.exists(dir_name[:-1]):
932 932 shutil.rmtree(dir_name[:-1])
933 933 return 'NO FILES WERE DOWNLOADED'
934 934 else:
935 935 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now