##// END OF EJS Templates
v2.9.2 :: Add 'insecure' option for www.igp.gob.pe
eynilupu -
r10:95347668911e
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,916 +1,935
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 5 import sys
6 6 import platform
7 7 import os
8 8 import tempfile
9 9 import shutil
10 10 import zipfile
11 11 import concurrent.futures
12 12 import requests
13 13 import json
14 14 #import pathlib
15 15 import uuid
16 16
17 if sys.version_info.major == 3:
18 from urllib.parse import urlparse
19 else:
20 import urlparse
21
17 22 class JROAPI():
18 23 """
19 24 FINALIDAD:
20 25 Script para administrar y obtener la data del repositorio por medio de APIs.
21 26
22 27 REQUISITIOS PREVIOS:
23 28 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
24 29 - Paso 2: Instalar lo siguiente como admininstrador:
25 30 En Python 2
26 31 - pip install ckanapi==4.5
27 32 - pip install requests
28 33 - pip install futures
29 34 - pip install tqdm
30 35 En Python > 3
31 36 - pip3 install ckanapi==4.5
32 37 - pip3 install requests
33 38 - pip3 install tqdm
34 39
35 40 FUNCIONES DISPONIBLES:
36 41 - action
37 42 - upload_file
38 43 - upload_multiple_files
39 44 - upload_multiple_files_advance
40 45 - show
41 46 - search
42 47 - create
43 48 - patch
44 49 - delete
45 50 - download_files
46 51
47 52 EJEMPLOS:
48 53 #1:
49 54 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
50 55 ... some operation(s) ...
51 56 #2:
52 57 <access_name> = JROAPI('http://example.com', Authorization='#########')
53 58 ... some operation(s) ...
54 59 <access_name>.ckan.close()
55 60
56 61 REPORTAR ALGUN PROBLEMA:
57 62 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
58 63 1) Correo para contactarlo
59 64 2) Descripcion del problema
60 65 3) ¿En que paso o seccion encontro el problema?
61 66 4) ¿Cual era el resultado que usted esperaba?
62 67 """
63 68 def __init__(self, url, Authorization=None):
64 ua = 'CKAN_JRO/1.1 (+'+str(url)+')'
69 #-------- Insecure -------#
70 self.verify = None
71 session = None
72 if urlparse(url).scheme == 'https':
73 session = requests.Session()
74 session.verify = False
75 self.verify = False
76 #--------------------------#
77 self.url = url
78 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
65 79 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
66 self.ckan = RemoteCKAN(url, apikey=Authorization, user_agent=ua)
67 #self.ckan = RemoteCKAN(url, apikey=Authorization)
80 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
81 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
68 82 self.Authorization = Authorization
69 83 # Change for --> self.separator = os.sep
70 84 if platform.system() == 'Windows':
71 85 self.separator = '\\'
72 86 else:
73 87 self.separator = '/'
74 88
75 89 self.chunk_size = 1024
76 90 self.list = []
77 91 self.dict = {}
78 92 self.str = ''
79 93 self.check = 1
80 94 self.cont = 0
81 95
82 96 def __enter__(self):
83 97 return self
84 98
85 99 def __exit__(self, *args):
86 100 self.ckan.close()
87 101
88 102 def action(self, action, **kwargs):
89 103 """
90 104 FINALIDAD:
91 105 Funcion para llamar a las APIs disponibles
92 106
93 107 APIs DISPONIBLES:
94 108 CONSULTAR: "GUIA DE SCRIPT.pdf"
95 109
96 110 EJEMPLO:
97 111 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
98 112 """
99 113 #--------------- CASE: PACKAGE SEARCH ---------------#
100 114 if kwargs is not None:
101 115 if action == 'package_search':
102 116 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
103 117 for facet in self.list:
104 118 if facet in kwargs:
105 119 kwargs[facet.replace('_', '.')] = kwargs[facet]
106 120 kwargs.pop(facet)
107 121 #----------------------------------------------------#
108 122 try:
109 123 return getattr(self.ckan.action, action)(**kwargs)
110 124 except:
111 125 _, exc_value, _ = sys.exc_info()
112 126 return exc_value
113 127
114 128 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
115 129 # Agregar si es interruptido por teclado
116 130 '''
117 131 FINALIDAD:
118 132 Funcion para subir un unico archivo al repositorio del ROJ.
119 133
120 134 PARAMETROS DISPONIBLES:
121 135 CONSULTAR: "GUIA DE SCRIPT.pdf"
122 136
123 137 ESTRUCTURA:
124 138 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
125 139 '''
126 140 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
127 141 for key1, value1 in kwargs.items():
128 142 if not key1 in self.list:
129 143 self.dict[key1] = value1
130 144
131 145 #---------------------------#
132 146 if not 'others' in kwargs:
133 147 self.dict['others'] = ''
134 148 else:
135 149 if isinstance(kwargs['others'], list):
136 150 self.dict['others'] = json.dumps(kwargs['others'])
137 151 #---------------------------#
138 152
139 153 if not os.path.isfile(file_path):
140 154 return 'File "%s" not exist' % (file_path)
141 155
142 156 #if not 'format' in self.dict:
143 157 # self.str = ''.join(pathlib.Path(file_path).suffixes)
144 158 # if len(self.str) > 0:
145 159 # self.dict['format'] = self.str.upper()[1:]
146 160
147 161 #-------------------------PACKAGE SHOW-----------------------#
148 162 try:
149 163 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
150 164 except:
151 165 _, exc_value, _ = sys.exc_info()
152 166 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
153 167 return exc_value
154 168
155 169 resources_name = []
156 170 for u in dataset_show:
157 171 resources_name.append(u['name'].lower())
158 172
159 173 if os.path.basename(file_path).lower() in resources_name:
160 174 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(file_path))
161 175 #------------------------------------------------------------#
162 176
163 177 try:
164 178 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
165 179 except:
166 180 _, exc_value, _ = sys.exc_info()
167 181 return exc_value
168 182
169 183 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
170 184 # Agregar si es interruptido por teclado
171 185 '''
172 186 FINALIDAD:
173 187 Funcion para subir multiples archivos al repositorio del ROJ.
174 188
175 189 PARAMETROS DISPONIBLES:
176 190 CONSULTAR: "GUIA DE SCRIPT.pdf"
177 191
178 192 ESTRUCTURA:
179 193 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
180 194 '''
181 195 #-------------------------PACKAGE SHOW-----------------------#
182 196 try:
183 197 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
184 198 except:
185 199 _, exc_value, _ = sys.exc_info()
186 200 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
187 201 return exc_value
188 202 #------------------------------------------------------------#
189 203 resources_name = []
190 204 for u in dataset_show:
191 205 resources_name.append(u['name'].lower())
192 206 #------------------------------------------------------------#
193 207 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
194 208 for key1, value1 in kwargs.items():
195 209 if not key1 in self.list:
196 210 self.dict[key1] = value1
197 211 #------------------------------------------------------------#
198 212 if not 'others' in kwargs:
199 213 self.dict['others'] = ''
200 214 else:
201 215 if isinstance(kwargs['others'], list):
202 216 self.dict['others'] = json.dumps(kwargs['others'])
203 217 #------------------------------------------------------------#
204 218 total_list = []
205 219 #---------------CASO : "path" or "path_list"-----------------#
206 220 if type(path_files) is list:
207 221 if len(path_files) != 0:
208 222 path_files.sort()
209 223 for u in path_files:
210 224 if os.path.isfile(u):
211 225 if os.path.basename(u).lower() in resources_name:
212 226 if not ignore_repetition:
213 227 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
214 228 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
215 229 else:
216 230 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
217 231 else:
218 232 return 'File "%s" does not exist' % (u)
219 233 else:
220 234 return 'ERROR:: "path_list is empty"'
221 235
222 236 elif type(path_files) is str:
223 237 if os.path.isdir(path_files):
224 238 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
225 239 path_order.sort()
226 240 if path_order:
227 241 for name in path_order:
228 242 if name.lower() in resources_name:
229 243 if not ignore_repetition:
230 244 return 'ERROR:: "%s" file already exist in this dataset' % (name)
231 245 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
232 246 else:
233 247 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
234 248 else:
235 249 return "ERROR:: There aren't files in this directory"
236 250 else:
237 251 return 'ERROR:: Directory "%s" does not exist' % (path_files)
238 252 else:
239 253 return 'ERROR:: "path_files" must be a str or list'
240 254 #------------------------------------------------------------#
241 255 try:
242 256 uuid.UUID(str(dataset_id), version=4)
243 257 package_id_or_name = '"id": "' + str(dataset_id) + '"'
244 258 except ValueError:
245 259 package_id_or_name = '"name": "' + str(dataset_id) + '"'
246 260 #------------------------------------------------------------#
247 261 blocks = [[]]
248 262 size_file = 0
249 263 count_file = 0
250 264 inter_num = 0
251 265 for value in total_list:
252 266 if value['size'] > 1024 * 1024 * float(max_size):
253 267 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
254 268 if not 1 <= int(max_count) <= 999:
255 269 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
256 270
257 271 size_file = size_file + value['size']
258 272 count_file = count_file + 1
259 273 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
260 274 del value['size']
261 275 blocks[inter_num].append(value)
262 276 else:
263 277 inter_num = inter_num + 1
264 278 size_file = value['size']
265 279 count_file = 1
266 280 blocks.append([])
267 281 del value['size']
268 282 blocks[inter_num].append(value)
269 283 #------------------------------------------------------------#
270 284 if len(blocks[0]) > 0:
271 285 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
272 286 for count1, block in enumerate(blocks):
273 287 print('---- BLOCK N°{} ----'.format(count1 + 1))
274 288 resource_extend = []
275 289 files_dict = {}
276 290 for count2, value2 in enumerate(block):
277 291 value2['file_date'] = file_date
278 292 value2['voc_file_type'] = file_type
279 293 value2.update(self.dict)
280 294
281 295 #if not 'format' in value2:
282 296 # format = ''.join(pathlib.Path(value2['name']).suffixes)
283 297 # if len(format) > 0:
284 298 # value2['format'] = format.upper()[1:]
285 299
286 300 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
287 301 del value2['upload']
288 302 resource_extend.append(value2)
289 303
290 304 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
291 305 try:
292 306 result = self.ckan.call_action(
293 307 'package_revise',
294 308 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
295 309 files=files_dict
296 310 )
297 311 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
298 312 if len(blocks) == count1 + 1:
299 313 return result
300 314 except:
301 315 print('ERROR :: Use the "print" for more information')
302 316 _, exc_value, _ = sys.exc_info()
303 317 return exc_value
304 318 else:
305 319 return "ERROR:: No file(s) found to upload"
306 320
307 321 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
308 322 # Agregar si es interruptido por teclado
309 323 '''
310 324 FINALIDAD:
311 325 Funcion para subir multiples archivos al repositorio del ROJ.
312 326
313 327 PARAMETROS DISPONIBLES:
314 328 CONSULTAR: "GUIA DE SCRIPT.pdf"
315 329
316 330 ESTRUCTURA:
317 331 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
318 332 '''
319 333 #-------------------------PACKAGE SHOW-----------------------#
320 334 try:
321 335 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
322 336 except:
323 337 _, exc_value, _ = sys.exc_info()
324 338 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
325 339 return exc_value
326 340 #------------------------------------------------------------#
327 341 resources_name = []
328 342 for u in dataset_show:
329 343 resources_name.append(u['name'].lower())
330 344 #------------------------------------------------------------#
331 345
332 346 params_dict = {'upload':[], 'name':[]}
333 347 #if not 'format' in kwargs:
334 348 # params_dict.update({'format':[]})
335 349 #---------------CASO : "path" or "path_list"-----------------#
336 350 if type(path_files) is list:
337 351 if len(path_files) != 0:
338 352 path_files.sort()
339 353 for u in path_files:
340 354 if os.path.isfile(u):
341 355 if os.path.basename(u).lower() in resources_name:
342 356 if not ignore_repetition:
343 357 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
344 358 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
345 359 else:
346 360 params_dict['upload'].append(open(u, 'rb'))
347 361 params_dict['name'].append(os.path.basename(u))
348 362 #if not 'format' in kwargs:
349 363 # format = ''.join(pathlib.Path(u).suffixes)
350 364 # if len(format) > 0:
351 365 # params_dict['format'].append(format.upper()[1:])
352 366 # else:
353 367 # params_dict['format'].append('')
354 368 else:
355 369 return 'File "%s" does not exist' % (u)
356 370 else:
357 371 return 'ERROR:: "path_list is empty"'
358 372 elif type(path_files) is str:
359 373 if os.path.isdir(path_files):
360 374 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
361 375 path_order.sort()
362 376 if path_order:
363 377 for name in path_order:
364 378 if name.lower() in resources_name:
365 379 if not ignore_repetition:
366 380 return 'ERROR:: "%s" file already exist in this dataset' % (name)
367 381 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
368 382 else:
369 383 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
370 384 params_dict['name'].append(name)
371 385 #if not 'format' in kwargs:
372 386 # format = ''.join(pathlib.Path(name).suffixes)
373 387 # if len(format) > 0:
374 388 # params_dict['format'].append(format.upper()[1:])
375 389 # else:
376 390 # params_dict['format'].append('')
377 391 else:
378 392 return "ERROR:: There aren't files in this directory"
379 393 else:
380 394 return 'ERROR:: Directory "%s" does not exist' % (path_files)
381 395 else:
382 396 return 'ERROR:: "path_files" must be a str or list'
383 397 #------------------------------------------------------------#
384 398 params_no_dict = {'package_id': dataset_id}
385 399 if type(date_files) is list:
386 400 params_dict['file_date'] = date_files
387 401 else:
388 402 params_no_dict['file_date'] = date_files
389 403
390 404 if type(type_files) is list:
391 405 params_dict['voc_file_type'] = type_files
392 406 else:
393 407 params_no_dict['voc_file_type'] = type_files
394 408
395 409 for key1, value1 in kwargs.items():
396 410 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
397 411 if type(value1) is list:
398 412 params_dict[key1] = value1
399 413 else:
400 414 params_no_dict[key1] = value1
401 415 #------------------------------------------#
402 416 if not 'others' in kwargs:
403 417 params_no_dict['others'] = ''
404 418 else:
405 419 if isinstance(kwargs['others'], tuple):
406 420 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
407 421 elif isinstance(kwargs['others'], list):
408 422 params_no_dict['others'] = json.dumps(kwargs['others'])
409 423 elif isinstance(kwargs['others'], str):
410 424 params_no_dict['others'] = kwargs['others']
411 425 else:
412 426 return 'ERROR:: "others" must be a tuple, list or str'
413 427 #------------------------------------------#
414 428 len_params_dict = []
415 429 for value2 in params_dict.values():
416 430 len_params_dict.append(len(value2))
417 431
418 432 if len(list(set(len_params_dict))) > 1:
419 433 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
420 434 #------------------------------------------------------------#
421 435 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
422 436 for v in range(len(params_dict['name'])):
423 437 try:
424 438 send = {}
425 439 for key_dict, value_dict in params_dict.items():
426 440 send[key_dict] = value_dict[v]
427 441 for key_no_dict, value_no_dict in params_no_dict.items():
428 442 send[key_no_dict] = value_no_dict
429 443
430 444 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
431 445 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
432 446 except:
433 447 _, exc_value, _ = sys.exc_info()
434 448 self.list.append(exc_value)
435 449 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
436 450 return self.list
437 451 #------------------------------------------------------------#
438 452
439 453 def show(self, type_option, id, **kwargs):
440 454 '''
441 455 FINALIDAD:
442 456 Funcion personalizada para una busqueda en especifico.
443 457
444 458 PARAMETROS DISPONIBLES:
445 459 CONSULTAR: "GUIA DE SCRIPT.pdf"
446 460
447 461 ESTRUCTURA:
448 462 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
449 463 '''
450 464 if type(type_option) is str:
451 465 try:
452 466 if type_option == 'dataset':
453 467 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
454 468 elif type_option == 'resource':
455 469 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
456 470 elif type_option == 'project':
457 471 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
458 472 elif type_option == 'collaborator':
459 473 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
460 474 elif type_option == 'member':
461 475 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
462 476 elif type_option == 'vocabulary':
463 477 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
464 478 elif type_option == 'tag':
465 479 if not 'vocabulary_id' in kwargs:
466 480 print('Missing "vocabulary_id" value: assume it is a free tag')
467 481 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
468 482 elif type_option == 'user':
469 483 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
470 484 elif type_option == 'job':
471 485 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
472 486 else:
473 487 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
474 488 except:
475 489 _, exc_value, _ = sys.exc_info()
476 490 return exc_value
477 491 else:
478 492 return 'ERROR:: "type_option" must be a str'
479 493
480 494 def search(self, type_option, query=None, **kwargs):
481 495 '''
482 496 FINALIDAD:
483 497 Funcion personalizada para busquedas que satisfagan algun criterio.
484 498
485 499 PARAMETROS DISPONIBLES:
486 500 CONSULTAR: "GUIA DE SCRIPT.pdf"
487 501
488 502 ESTRUCTURA:
489 503 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
490 504 '''
491 505 if type(type_option) is str:
492 506 try:
493 507 if type_option == 'dataset':
494 508 key_replace = ['fq', 'fq_list', 'include_private']
495 509 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
496 510 for key1, value1 in kwargs.items():
497 511 if not key1 in key_replace:
498 512 if key1 in key_point:
499 513 self.dict[key1.replace('_', '.')] = value1
500 514 else:
501 515 self.dict[key1] = value1
502 516
503 517 if query is not None:
504 518 if type(query) is dict:
505 519 self.dict['fq_list'] = []
506 520 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
507 521 #----------------------------------------------------#
508 522 if 'dataset_start_date' in query:
509 523 if type(query['dataset_start_date']) is str:
510 524 try:
511 525 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
512 526 if len(query['dataset_start_date']) != 10:
513 527 return '"dataset_start_date", must be: <YYYY-MM-DD>'
514 528 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
515 529 self.list.append('dataset_start_date')
516 530 except:
517 531 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
518 532 else:
519 533 return '"dataset_start_date" must be <str>'
520 534 #----------------------------------------------------#
521 535 if 'dataset_end_date' in query:
522 536 if type(query['dataset_end_date']) is str:
523 537 try:
524 538 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
525 539 if len(query['dataset_end_date']) != 10:
526 540 return '"dataset_end_date", must be: <YYYY-MM-DD>'
527 541
528 542 if 'dataset_start_date' in query:
529 543 if query['dataset_start_date'] > query['dataset_end_date']:
530 544 return '"dataset_end_date" must be greater than "dataset_start_date"'
531 545
532 546 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
533 547 self.list.append('dataset_end_date')
534 548 except:
535 549 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
536 550 else:
537 551 return '"dataset_end_date" must be <str>'
538 552 #----------------------------------------------------#
539 553 for key, value in query.items():
540 554 if value is not None and not key in self.list:
541 555 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
542 556 else:
543 557 return '"query" must be <dict>'
544 558
545 559 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
546 560
547 561 elif type_option == 'resource':
548 562 for key1, value1 in kwargs.items():
549 563 if key1 != 'fields':
550 564 self.dict[key1] = value1
551 565
552 566 if query is not None:
553 567 if type(query) is dict:
554 568 #----------------------------------------------------#
555 569 if 'file_date_min' in query:
556 570 if type(query['file_date_min']) is str:
557 571 try:
558 572 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
559 573 if len(query['file_date_min']) != 10:
560 574 return '"file_date_min", must be: <YYYY-MM-DD>'
561 575 except:
562 576 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
563 577 else:
564 578 return '"file_date_min" must be <str>'
565 579 #----------------------------------------------------#
566 580 if 'file_date_max' in query:
567 581 if type(query['file_date_max']) is str:
568 582 try:
569 583 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
570 584 if len(query['file_date_max']) != 10:
571 585 return '"file_date_max", must be: <YYYY-MM-DD>'
572 586
573 587 if 'file_date_min' in query:
574 588 if query['file_date_min'] > query['file_date_max']:
575 589 return '"file_date_max" must be greater than "file_date_min"'
576 590 except:
577 591 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
578 592 else:
579 593 return '"file_date_max" must be <str>'
580 594 #----------------------------------------------------#
581 595 self.dict['query'] = query
582 596 else:
583 597 return '"query" must be <dict>'
584 598 return getattr(self.ckan.action, 'resources_search')(**self.dict)
585 599
586 600 elif type_option == 'tag':
587 601 for key1, value1 in kwargs.items():
588 602 if key1 != 'fields':
589 603 self.dict[key1] = value1
590 604
591 605 if not 'vocabulary_id' in kwargs:
592 606 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
593 607 else:
594 608 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
595 609
596 610 if query is not None:
597 611 if type(query) is dict:
598 612 if 'search' in query:
599 613 if type(query['search']) is list or type(query['search']) is str:
600 614 self.dict['query'] = query['search']
601 615 else:
602 616 return '"search" must be <list> or <str>'
603 617 else:
604 618 return '"query" must be <dict>'
605 619 return getattr(self.ckan.action, 'tag_search')(**self.dict)
606 620
607 621 else:
608 622 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
609 623
610 624 except:
611 625 _, exc_value, _ = sys.exc_info()
612 626 return exc_value
613 627 else:
614 628 return 'ERROR:: "type_option" must be <str>'
615 629
616 630 def create(self, type_option, select=None, **kwargs):
617 631 '''
618 632 FINALIDAD:
619 633 Funcion personalizada para crear.
620 634
621 635 PARAMETROS DISPONIBLES:
622 636 CONSULTAR: "GUIA DE SCRIPT.pdf"
623 637
624 638 ESTRUCTURA:
625 639 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
626 640 '''
627 641 if type(type_option) is str:
628 642 try:
629 643 if type_option == 'dataset':
630 644 return getattr(self.ckan.action, 'package_create')(**kwargs)
631 645 elif type_option == 'project':
632 646 return getattr(self.ckan.action, 'organization_create')(**kwargs)
633 647 elif type_option == 'member':
634 648 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
635 649 elif type_option == 'collaborator':
636 650 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
637 651 elif type_option == 'vocabulary':
638 652 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
639 653 elif type_option == 'tag':
640 654 return getattr(self.ckan.action, 'tag_create')(**kwargs)
641 655 elif type_option == 'user':
642 656 return getattr(self.ckan.action, 'user_create')(**kwargs)
643 657 elif type_option == 'views':
644 658 if 'resource' == select:
645 659 self.list = ['package']
646 660 for key1, value1 in kwargs.items():
647 661 if not key1 in self.list:
648 662 self.dict[key1] = value1
649 663 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
650 664 elif 'dataset' == select:
651 665 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
652 666 else:
653 667 return 'ERROR:: "select = %s" is not accepted' % (select)
654 668 else:
655 669 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
656 670 except:
657 671 _, exc_value, _ = sys.exc_info()
658 672 return exc_value
659 673 else:
660 674 return 'ERROR:: "type_option" must be <str>'
661 675
662 676 def patch(self, type_option, **kwargs):
663 677 '''
664 678 FINALIDAD:
665 679 Funciones personalizadas para actualizar
666 680
667 681 PARAMETROS DISPONIBLES:
668 682 CONSULTAR: "GUIA DE SCRIPT.pdf"
669 683
670 684 ESTRUCTURA:
671 685 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
672 686 '''
673 687 if type(type_option) is str:
674 688 try:
675 689 if type_option == 'dataset':
676 690 return getattr(self.ckan.action, 'package_patch')(**kwargs)
677 691 elif type_option == 'project':
678 692 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
679 693 elif type_option == 'resource':
680 694 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
681 695 elif type_option == 'member':
682 696 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
683 697 elif type_option == 'collaborator':
684 698 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
685 699 else:
686 700 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
687 701 except:
688 702 _, exc_value, _ = sys.exc_info()
689 703 return exc_value
690 704 else:
691 705 return 'ERROR:: "type_option" must be <str>'
692 706
693 707 def delete(self, type_option, select=None, **kwargs):
694 708 '''
695 709 FINALIDAD:
696 710 Función personalizada para eliminar y/o purgar.
697 711
698 712 PARAMETROS DISPONIBLES:
699 713 CONSULTAR: "GUIA DE SCRIPT.pdf"
700 714
701 715 ESTRUCTURA:
702 716 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
703 717 '''
704 718 if type(type_option) is str:
705 719 try:
706 720 if type_option == 'dataset':
707 721 if select is None:
708 722 return 'ERROR:: "select" must not be "None"'
709 723 else:
710 724 if 'delete' == select:
711 725 return getattr(self.ckan.action, 'package_delete')(**kwargs)
712 726 elif 'purge' == select:
713 727 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
714 728 else:
715 729 return 'ERROR:: "select = %s" is not accepted' % (select)
716 730 elif type_option == 'project':
717 731 if select is None:
718 732 return 'ERROR:: "select" must not be "None"'
719 733 else:
720 734 if 'delete' == select:
721 735 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
722 736 elif 'purge' == select:
723 737 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
724 738 else:
725 739 return 'ERROR:: "select = %s" is not accepted' % (select)
726 740 elif type_option == 'resource':
727 741 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
728 742 elif type_option == 'vocabulary':
729 743 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
730 744 elif type_option == 'tag':
731 745 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
732 746 elif type_option == 'user':
733 747 return getattr(self.ckan.action, 'user_delete')(**kwargs)
734 748 else:
735 749 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
736 750 except:
737 751 _, exc_value, _ = sys.exc_info()
738 752 return exc_value
739 753 else:
740 754 return 'ERROR:: "type_option" must be <str>'
741 755
742 756 def f_status_note(self, total, result, path):
743 757 file_txt = open(path+'status_note.txt', 'w')
744 758 file_txt = open(path+'status_note.txt', 'a')
745 759
746 760 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
747 761 file_txt.write(''+ os.linesep)
748 762 for u in result['name']:
749 763 file_txt.write(' - '+ u + os.linesep)
750 764 file_txt.write(''+ os.linesep)
751 765
752 766 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
753 767 file_txt.write(''+ os.linesep)
754 768 if len(total['name'])-len(result['name']) != 0:
755 769 for u in total['name']:
756 770 if not u in result['name']:
757 771 file_txt.write(' - '+ u + os.linesep)
758 772 else:
759 773 file_txt.write(' "None"'+ os.linesep)
760 774
761 775 def f_name(self, name_dataset, ext, tempdir):
762 776 while self.check:
763 777 self.str = ''
764 778 if self.cont == 0:
765 779 if os.path.exists(tempdir + name_dataset + ext):
766 780 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
767 781 else:
768 782 self.check = self.check * 0
769 783 self.str = name_dataset + ext
770 784 else:
771 785 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
772 786 self.check = self.check * 0
773 787 self.str = name_dataset+'('+str(self.cont)+')'+ ext
774 788 self.cont = self.cont+1
775 789 return self.str
776 790
777 791 def f_zipdir(self, path, ziph, zip_name):
778 792 for root, _, files in os.walk(path):
779 793 print('.....')
780 794 print('Creating: "{}" >>'.format(zip_name))
781 795 for __file in tqdm(iterable=files, total=len(files)):
782 796 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
783 797 ziph.write(os.path.join(root, __file), new_dir)
784 798 print('Created >>')
785 799
786 800 def download_by_step(self, response, tempdir_name):
787 801 try:
788 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}) as resp:
802 # ---------- REPLACE URL --------- #
803 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
804 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
805 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
806 #----------------------------------#
807 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
789 808 if resp.status_code == 200:
790 809 with open(tempdir_name+response['name'], 'wb') as file:
791 810 for chunk in resp.iter_content(chunk_size = self.chunk_size):
792 811 if chunk:
793 812 file.write(chunk)
794 813 except requests.exceptions.RequestException:
795 814 pass
796 815
797 816 def download_files(self, **kwargs):
798 817 '''
799 818 FINALIDAD:
800 819 Funcion personalizada para la descarga de archivos existentes de un dataset.
801 820
802 821 PARAMETROS DISPONIBLES:
803 822 CONSULTAR: "GUIA DE SCRIPT.pdf"
804 823
805 824 ESTRUCTURA:
806 825 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
807 826 '''
808 827 dict_local = {}
809 828 #----------------------------------------------#
810 829 if 'zip' in kwargs:
811 830 if type(kwargs['zip']) is not bool:
812 831 return 'ERROR:: "zip" must be: <class "bool">'
813 832 else:
814 833 dict_local['zip'] = kwargs['zip']
815 834 else:
816 835 dict_local['zip'] = False
817 836 #----------------------------------------------#
818 837 if 'status_note' in kwargs:
819 838 if type(kwargs['status_note']) is not bool:
820 839 return 'ERROR:: "status_note" must be: <class "bool">'
821 840 else:
822 841 dict_local['status_note'] = kwargs['status_note']
823 842 else:
824 843 dict_local['status_note'] = False
825 844 #----------------------------------------------#
826 845 if 'path' in kwargs:
827 846 if type(kwargs['path']) is str:
828 847 if os.path.isdir(kwargs['path']) == False:
829 848 return 'ERROR:: "path" does not exist'
830 849 else:
831 850 if kwargs['path'][-1:] != self.separator:
832 851 dict_local['path'] = kwargs['path']+self.separator
833 852 else:
834 853 dict_local['path'] = kwargs['path']
835 854
836 855 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
837 856 if int(platform.python_version()[0]) == 3:
838 857 try:
839 858 file_txt = open(txt, 'w')
840 859 file_txt.close()
841 860 os.remove(txt)
842 861 except PermissionError:
843 862 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
844 863 else:
845 864 try:
846 865 file_txt = open(txt, 'w')
847 866 file_txt.close()
848 867 os.remove(txt)
849 868 except:
850 869 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
851 870 else:
852 871 return 'ERROR:: "path" must be: <class "str">'
853 872 else:
854 873 dict_local['path'] = ''
855 874 #----------------------------------------------#
856 875 for key, value in kwargs.items():
857 876 if not key in dict_local:
858 877 self.dict[key] = value
859 878 try:
860 879 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
861 880 except:
862 881 _, exc_value, _ = sys.exc_info()
863 882 return exc_value
864 883
865 884 if len(response) != 0:
866 885 #--------------TEMP PATH---------------#
867 886 if dict_local['zip']:
868 887 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
869 888 os.mkdir(tempdir+kwargs['id'])
870 889 dir_name = tempdir + kwargs['id'] + self.separator
871 890 else:
872 891 dir = self.f_name(kwargs['id'], '', dict_local['path'])
873 892 os.mkdir(dict_local['path'] + dir)
874 893 dir_name = dict_local['path'] + dir + self.separator
875 894 #-----------DOWNLOAD FILES-------------#
876 895 print('.....')
877 896 print('Downloading "{}" file(s) >>'.format(len(response)))
878 897 name_total = {'name': []}
879 898 with concurrent.futures.ThreadPoolExecutor() as executor:
880 899 for u in tqdm(iterable=response, total=len(response)):
881 900 name_total['name'].append(u['name'])
882 901 executor.submit(self.download_by_step, u, dir_name)
883 902 name_check = {}
884 903 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
885 904 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
886 905 #--------------------------------------#
887 906 if len(name_check['name']) != 0:
888 907 #----------Status Note---------#
889 908 if dict_local['status_note']:
890 909 print('.....')
891 910 print('Creating: "status_note.txt" >>')
892 911 self.f_status_note(name_total, name_check, dir_name)
893 912 print('Created>>')
894 913 #----------ZIP CREATE----------#
895 914 if dict_local['zip']:
896 915 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
897 916 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
898 917 self.f_zipdir(dir_name, ziph, zip_name)
899 918 ziph.close()
900 919 #Delete Temporal Path
901 920 if os.path.exists(tempdir[:-1]):
902 921 shutil.rmtree(tempdir[:-1])
903 922 #------------------------------#
904 923 print('.....')
905 924 return 'DOWNLOAD FINISHED'
906 925 else:
907 926 #Delete Temporal Path
908 927 if dict_local['zip']:
909 928 if os.path.exists(tempdir[:-1]):
910 929 shutil.rmtree(tempdir[:-1])
911 930 else:
912 931 if os.path.exists(dir_name[:-1]):
913 932 shutil.rmtree(dir_name[:-1])
914 933 return 'NO FILES WERE DOWNLOADED'
915 934 else:
916 935 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now