##// END OF EJS Templates
v2.9.2 :: Uncomment 'upload_multiple_files_advance' function
eynilupu -
r16:39b05cc1efce
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,1046 +1,1044
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 from CKAN_JRO import logic_download
5 5 from CKAN_JRO import resource
6 6 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
7 7 import sys
8 8 import platform
9 9 import os
10 10 import tempfile
11 11 import shutil
12 12 import zipfile
13 13 import concurrent.futures
14 14 import requests
15 15 import json
16 16 #import pathlib
17 17 import uuid
18 18
19 19 if sys.version_info.major == 3:
20 20 from urllib.parse import urlparse
21 21 else:
22 22 import urlparse
23 23
24 24 class JROAPI():
25 25 """
26 26 FINALIDAD:
27 27 Script para administrar y obtener la data del repositorio por medio de APIs.
28 28
29 29 REQUISITIOS PREVIOS:
30 30 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
31 31 - Paso 2: Instalar lo siguiente como admininstrador:
32 32 En Python 2
33 33 - pip install ckanapi==4.5
34 34 - pip install requests
35 35 - pip install futures
36 36 - pip install tqdm
37 37 En Python > 3
38 38 - pip3 install ckanapi==4.5
39 39 - pip3 install requests
40 40 - pip3 install tqdm
41 41
42 42 FUNCIONES DISPONIBLES:
43 43 - action
44 44 - upload_file
45 45 - upload_multiple_files
46 46 - upload_multiple_files_advance
47 47 - show
48 48 - search
49 49 - create
50 50 - patch
51 51 - delete
52 52 - download_files
53 53
54 54 EJEMPLOS:
55 55 #1:
56 56 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
57 57 ... some operation(s) ...
58 58 #2:
59 59 <access_name> = JROAPI('http://example.com', Authorization='#########')
60 60 ... some operation(s) ...
61 61 <access_name>.ckan.close()
62 62
63 63 REPORTAR ALGUN PROBLEMA:
64 64 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
65 65 1) Correo para contactarlo
66 66 2) Descripcion del problema
67 67 3) ¿En que paso o seccion encontro el problema?
68 68 4) ¿Cual era el resultado que usted esperaba?
69 69 """
70 70 def __init__(self, url, Authorization=None, secure=True):
71 71 #-------- Check Secure -------#
72 72 self.verify = secure
73 73 if not secure and isinstance(secure, bool):
74 74 session = requests.Session()
75 75 session.verify = False
76 76 else:
77 77 session = None
78 78 #------------------------------#
79 79 self.url = url
80 80 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
81 81 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
82 82 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
83 83 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
84 84 self.Authorization = Authorization
85 85 # Change for --> self.separator = os.sep
86 86 if platform.system() == 'Windows':
87 87 self.separator = '\\'
88 88 else:
89 89 self.separator = '/'
90 90
91 91 self.chunk_size = 1024
92 92 self.list = []
93 93 self.dict = {}
94 94 self.str = ''
95 95 self.check = 1
96 96 self.cont = 0
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, *args):
102 102 self.ckan.close()
103 103
104 104 def action(self, action, **kwargs):
105 105 """
106 106 FINALIDAD:
107 107 Funcion para llamar a las APIs disponibles
108 108
109 109 APIs DISPONIBLES:
110 110 CONSULTAR: "GUIA DE SCRIPT.pdf"
111 111
112 112 EJEMPLO:
113 113 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
114 114 """
115 115 #--------------- CASE: PACKAGE SEARCH ---------------#
116 116 if kwargs is not None:
117 117 if action == 'package_search':
118 118 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
119 119 for facet in self.list:
120 120 if facet in kwargs:
121 121 kwargs[facet.replace('_', '.')] = kwargs[facet]
122 122 kwargs.pop(facet)
123 123 #----------------------------------------------------#
124 124 try:
125 125 return getattr(self.ckan.action, action)(**kwargs)
126 126 except:
127 127 _, exc_value, _ = sys.exc_info()
128 128 return exc_value
129 129
130 130 def upload_file(self, dataset_id, file_date, file_type, file_path=False, url_or_path=False, ignore_repetition=False, **kwargs):
131 131 # Agregar si es interruptido por teclado
132 132 '''
133 133 FINALIDAD:
134 134 Funcion para crear un unico recurso (puede incluir un archivo asociado) al repositorio del ROJ.
135 135
136 136 PARAMETROS DISPONIBLES:
137 137 CONSULTAR: "GUIA DE SCRIPT.pdf"
138 138
139 139 ESTRUCTURA:
140 140 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_type = <class 'str'>, file_path = <class 'str'>, url_or_path = <class 'str'>, param_1 = <class 'param_1'>, ...)
141 141 '''
142 142 #self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
143 143 self.list = ['package_id', 'upload', 'voc_file_type'] #file_date
144 144 for key1, value1 in kwargs.items():
145 145 if not key1 in self.list:
146 146 self.dict[key1] = value1
147 147
148 148 #---------------------------#
149 149 if not 'others' in kwargs:
150 150 self.dict['others'] = ''
151 151 else:
152 152 if isinstance(kwargs['others'], list):
153 153 self.dict['others'] = json.dumps(kwargs['others'])
154 154 #---------------------------#
155 155
156 156 if isinstance(file_path, str) and isinstance(url_or_path, str):
157 157 return 'ERROR:: Choose one: "file_path" or "url_or_path" parameters'
158 158
159 159 if isinstance(file_path, str):
160 160 if not os.path.isfile(file_path):
161 161 return 'File "%s" not exist' % (file_path)
162 162
163 163 self.dict['upload'] = open(file_path, 'rb')
164 164 self.dict['name'] = os.path.basename(file_path)
165 165 elif isinstance(url_or_path, str):
166 166 self.dict['url'] = url_or_path
167 167 if not 'name' in self.dict:
168 168 self.dict['name'] = os.path.basename(url_or_path)
169 169 else:
170 170 return 'ERROR: Verify "file_path" or "url_or_path" parameters: <class "str"> or choose one'
171 171
172 172 #if not 'format' in self.dict:
173 173 # self.str = ''.join(pathlib.Path(file_path).suffixes)
174 174 # if len(self.str) > 0:
175 175 # self.dict['format'] = self.str.upper()[1:]
176 176
177 177 #-------------------------PACKAGE SHOW-----------------------#
178 178 try:
179 179 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
180 180 except:
181 181 _, exc_value, _ = sys.exc_info()
182 182 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
183 183 return exc_value
184 184
185 185 resources_name = []
186 186 for u in dataset_show:
187 187 resources_name.append(u['name'].lower())
188 188
189 189 if self.dict['name'].lower() in resources_name:
190 190 if not ignore_repetition:
191 191 return 'ERROR:: "%s" resource already exist in this dataset' % (self.dict['name'])
192 192 print('WARRING:: "'+ str(self.dict['name']) +'" resource already exist in this dataset')
193 193 #------------------------------------------------------------#
194 194 try:
195 195 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, voc_file_type=file_type, **self.dict)
196 196 except:
197 197 _, exc_value, _ = sys.exc_info()
198 198 return exc_value
199 199
200 200 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
201 201 # Agregar si es interruptido por teclado
202 202 '''
203 203 FINALIDAD:
204 204 Funcion para subir multiples archivos al repositorio del ROJ.
205 205
206 206 PARAMETROS DISPONIBLES:
207 207 CONSULTAR: "GUIA DE SCRIPT.pdf"
208 208
209 209 ESTRUCTURA:
210 210 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
211 211 '''
212 212 #-------------------------PACKAGE SHOW-----------------------#
213 213 try:
214 214 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
215 215 except:
216 216 _, exc_value, _ = sys.exc_info()
217 217 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
218 218 return exc_value
219 219 #------------------------------------------------------------#
220 220 resources_name = []
221 221 for u in dataset_show:
222 222 resources_name.append(u['name'].lower())
223 223 #------------------------------------------------------------#
224 224 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
225 225 for key1, value1 in kwargs.items():
226 226 if not key1 in self.list:
227 227 self.dict[key1] = value1
228 228 #------------------------------------------------------------#
229 229 if not 'others' in kwargs:
230 230 self.dict['others'] = ''
231 231 else:
232 232 if isinstance(kwargs['others'], list):
233 233 self.dict['others'] = json.dumps(kwargs['others'])
234 234 #------------------------------------------------------------#
235 235 total_list = []
236 236 #---------------CASO : "path" or "path_list"-----------------#
237 237 if type(path_files) is list:
238 238 if len(path_files) != 0:
239 239 path_files.sort()
240 240 for u in path_files:
241 241 if os.path.isfile(u):
242 242 if os.path.basename(u).lower() in resources_name:
243 243 if not ignore_repetition:
244 244 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
245 245 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
246 246 else:
247 247 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
248 248 else:
249 249 return 'File "%s" does not exist' % (u)
250 250 else:
251 251 return 'ERROR:: "path_list is empty"'
252 252
253 253 elif type(path_files) is str:
254 254 if os.path.isdir(path_files):
255 255 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
256 256 path_order.sort()
257 257 if path_order:
258 258 for name in path_order:
259 259 if name.lower() in resources_name:
260 260 if not ignore_repetition:
261 261 return 'ERROR:: "%s" file already exist in this dataset' % (name)
262 262 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
263 263 else:
264 264 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
265 265 else:
266 266 return "ERROR:: There aren't files in this directory"
267 267 else:
268 268 return 'ERROR:: Directory "%s" does not exist' % (path_files)
269 269 else:
270 270 return 'ERROR:: "path_files" must be a str or list'
271 271 #------------------------------------------------------------#
272 272 try:
273 273 uuid.UUID(str(dataset_id), version=4)
274 274 package_id_or_name = '"id": "' + str(dataset_id) + '"'
275 275 except ValueError:
276 276 package_id_or_name = '"name": "' + str(dataset_id) + '"'
277 277 #------------------------------------------------------------#
278 278 blocks = [[]]
279 279 size_file = 0
280 280 count_file = 0
281 281 inter_num = 0
282 282 for value in total_list:
283 283 if value['size'] > 1024 * 1024 * float(max_size):
284 284 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
285 285 if not 1 <= int(max_count) <= 999:
286 286 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
287 287
288 288 size_file = size_file + value['size']
289 289 count_file = count_file + 1
290 290 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
291 291 del value['size']
292 292 blocks[inter_num].append(value)
293 293 else:
294 294 inter_num = inter_num + 1
295 295 size_file = value['size']
296 296 count_file = 1
297 297 blocks.append([])
298 298 del value['size']
299 299 blocks[inter_num].append(value)
300 300 #------------------------------------------------------------#
301 301 if len(blocks[0]) > 0:
302 302 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
303 303 for count1, block in enumerate(blocks):
304 304 print('---- BLOCK N°{} ----'.format(count1 + 1))
305 305 resource_extend = []
306 306 files_dict = {}
307 307 for count2, value2 in enumerate(block):
308 308 value2['file_date'] = file_date
309 309 value2['voc_file_type'] = file_type
310 310 value2.update(self.dict)
311 311
312 312 #if not 'format' in value2:
313 313 # format = ''.join(pathlib.Path(value2['name']).suffixes)
314 314 # if len(format) > 0:
315 315 # value2['format'] = format.upper()[1:]
316 316
317 317 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
318 318 del value2['upload']
319 319 resource_extend.append(value2)
320 320
321 321 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
322 print(resource_extend)
323 print(files_dict)
324 #try:
325 # result = self.ckan.call_action(
326 # 'package_revise',
327 # {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
328 # files=files_dict
329 # )
330 # print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
331 # if len(blocks) == count1 + 1:
332 # return result
333 #except:
334 # print('ERROR :: Use the "print" for more information')
335 # _, exc_value, _ = sys.exc_info()
336 # return exc_value
322 try:
323 result = self.ckan.call_action(
324 'package_revise',
325 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
326 files=files_dict
327 )
328 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
329 if len(blocks) == count1 + 1:
330 return result
331 except:
332 print('ERROR :: Use the "print" for more information')
333 _, exc_value, _ = sys.exc_info()
334 return exc_value
337 335 else:
338 336 return "ERROR:: No file(s) found to upload"
339 337
340 338 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
341 339 # Agregar si es interruptido por teclado
342 340 '''
343 341 FINALIDAD:
344 342 Funcion para subir multiples archivos al repositorio del ROJ.
345 343
346 344 PARAMETROS DISPONIBLES:
347 345 CONSULTAR: "GUIA DE SCRIPT.pdf"
348 346
349 347 ESTRUCTURA:
350 348 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
351 349 '''
352 350 #-------------------------PACKAGE SHOW-----------------------#
353 351 try:
354 352 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
355 353 except:
356 354 _, exc_value, _ = sys.exc_info()
357 355 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
358 356 return exc_value
359 357 #------------------------------------------------------------#
360 358 resources_name = []
361 359 for u in dataset_show:
362 360 resources_name.append(u['name'].lower())
363 361 #------------------------------------------------------------#
364 362
365 363 params_dict = {'upload':[], 'name':[]}
366 364 #if not 'format' in kwargs:
367 365 # params_dict.update({'format':[]})
368 366 #---------------CASO : "path" or "path_list"-----------------#
369 367 if type(path_files) is list:
370 368 if len(path_files) != 0:
371 369 path_files.sort()
372 370 for u in path_files:
373 371 if os.path.isfile(u):
374 372 if os.path.basename(u).lower() in resources_name:
375 373 if not ignore_repetition:
376 374 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
377 375 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
378 376 else:
379 377 params_dict['upload'].append(open(u, 'rb'))
380 378 params_dict['name'].append(os.path.basename(u))
381 379 #if not 'format' in kwargs:
382 380 # format = ''.join(pathlib.Path(u).suffixes)
383 381 # if len(format) > 0:
384 382 # params_dict['format'].append(format.upper()[1:])
385 383 # else:
386 384 # params_dict['format'].append('')
387 385 else:
388 386 return 'File "%s" does not exist' % (u)
389 387 else:
390 388 return 'ERROR:: "path_list is empty"'
391 389 elif type(path_files) is str:
392 390 if os.path.isdir(path_files):
393 391 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
394 392 path_order.sort()
395 393 if path_order:
396 394 for name in path_order:
397 395 if name.lower() in resources_name:
398 396 if not ignore_repetition:
399 397 return 'ERROR:: "%s" file already exist in this dataset' % (name)
400 398 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
401 399 else:
402 400 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
403 401 params_dict['name'].append(name)
404 402 #if not 'format' in kwargs:
405 403 # format = ''.join(pathlib.Path(name).suffixes)
406 404 # if len(format) > 0:
407 405 # params_dict['format'].append(format.upper()[1:])
408 406 # else:
409 407 # params_dict['format'].append('')
410 408 else:
411 409 return "ERROR:: There aren't files in this directory"
412 410 else:
413 411 return 'ERROR:: Directory "%s" does not exist' % (path_files)
414 412 else:
415 413 return 'ERROR:: "path_files" must be a str or list'
416 414 #------------------------------------------------------------#
417 415 params_no_dict = {'package_id': dataset_id}
418 416 if type(date_files) is list:
419 417 params_dict['file_date'] = date_files
420 418 else:
421 419 params_no_dict['file_date'] = date_files
422 420
423 421 if type(type_files) is list:
424 422 params_dict['voc_file_type'] = type_files
425 423 else:
426 424 params_no_dict['voc_file_type'] = type_files
427 425
428 426 for key1, value1 in kwargs.items():
429 427 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
430 428 if type(value1) is list:
431 429 params_dict[key1] = value1
432 430 else:
433 431 params_no_dict[key1] = value1
434 432 #------------------------------------------#
435 433 if not 'others' in kwargs:
436 434 params_no_dict['others'] = ''
437 435 else:
438 436 if isinstance(kwargs['others'], tuple):
439 437 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
440 438 elif isinstance(kwargs['others'], list):
441 439 params_no_dict['others'] = json.dumps(kwargs['others'])
442 440 elif isinstance(kwargs['others'], str):
443 441 params_no_dict['others'] = kwargs['others']
444 442 else:
445 443 return 'ERROR:: "others" must be a tuple, list or str'
446 444 #------------------------------------------#
447 445 len_params_dict = []
448 446 for value2 in params_dict.values():
449 447 len_params_dict.append(len(value2))
450 448
451 449 if len(list(set(len_params_dict))) > 1:
452 450 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
453 451 #------------------------------------------------------------#
454 452 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
455 453 for v in range(len(params_dict['name'])):
456 454 try:
457 455 send = {}
458 456 for key_dict, value_dict in params_dict.items():
459 457 send[key_dict] = value_dict[v]
460 458 for key_no_dict, value_no_dict in params_no_dict.items():
461 459 send[key_no_dict] = value_no_dict
462 460
463 461 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
464 462 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
465 463 except:
466 464 _, exc_value, _ = sys.exc_info()
467 465 self.list.append(exc_value)
468 466 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
469 467 return self.list
470 468 #------------------------------------------------------------#
471 469
472 470 def show(self, type_option, id, **kwargs):
473 471 '''
474 472 FINALIDAD:
475 473 Funcion personalizada para una busqueda en especifico.
476 474
477 475 PARAMETROS DISPONIBLES:
478 476 CONSULTAR: "GUIA DE SCRIPT.pdf"
479 477
480 478 ESTRUCTURA:
481 479 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
482 480 '''
483 481 if type(type_option) is str:
484 482 try:
485 483 if type_option == 'dataset':
486 484 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
487 485 elif type_option == 'resource':
488 486 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
489 487 elif type_option == 'project':
490 488 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
491 489 elif type_option == 'collaborator':
492 490 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
493 491 elif type_option == 'member':
494 492 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
495 493 elif type_option == 'vocabulary':
496 494 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
497 495 elif type_option == 'tag':
498 496 if not 'vocabulary_id' in kwargs:
499 497 print('Missing "vocabulary_id" value: assume it is a free tag')
500 498 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
501 499 elif type_option == 'user':
502 500 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
503 501 elif type_option == 'job':
504 502 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
505 503 else:
506 504 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
507 505 except:
508 506 _, exc_value, _ = sys.exc_info()
509 507 return exc_value
510 508 else:
511 509 return 'ERROR:: "type_option" must be a str'
512 510
513 511 def search(self, type_option, query=None, **kwargs):
514 512 '''
515 513 FINALIDAD:
516 514 Funcion personalizada para busquedas que satisfagan algun criterio.
517 515
518 516 PARAMETROS DISPONIBLES:
519 517 CONSULTAR: "GUIA DE SCRIPT.pdf"
520 518
521 519 ESTRUCTURA:
522 520 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
523 521 '''
524 522 if type(type_option) is str:
525 523 try:
526 524 if type_option == 'dataset':
527 525 key_replace = ['fq', 'fq_list', 'include_private']
528 526 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
529 527 for key1, value1 in kwargs.items():
530 528 if not key1 in key_replace:
531 529 if key1 in key_point:
532 530 self.dict[key1.replace('_', '.')] = value1
533 531 else:
534 532 self.dict[key1] = value1
535 533
536 534 if query is not None:
537 535 if type(query) is dict:
538 536 self.dict['fq_list'] = []
539 537 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
540 538 #----------------------------------------------------#
541 539 if 'dataset_start_date' in query:
542 540 if type(query['dataset_start_date']) is str:
543 541 try:
544 542 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
545 543 if len(query['dataset_start_date']) != 10:
546 544 return '"dataset_start_date", must be: <YYYY-MM-DD>'
547 545 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
548 546 self.list.append('dataset_start_date')
549 547 except:
550 548 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
551 549 else:
552 550 return '"dataset_start_date" must be <str>'
553 551 #----------------------------------------------------#
554 552 if 'dataset_end_date' in query:
555 553 if type(query['dataset_end_date']) is str:
556 554 try:
557 555 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
558 556 if len(query['dataset_end_date']) != 10:
559 557 return '"dataset_end_date", must be: <YYYY-MM-DD>'
560 558
561 559 if 'dataset_start_date' in query:
562 560 if query['dataset_start_date'] > query['dataset_end_date']:
563 561 return '"dataset_end_date" must be greater than "dataset_start_date"'
564 562
565 563 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
566 564 self.list.append('dataset_end_date')
567 565 except:
568 566 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
569 567 else:
570 568 return '"dataset_end_date" must be <str>'
571 569 #----------------------------------------------------#
572 570 for key, value in query.items():
573 571 if value is not None and not key in self.list:
574 572 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
575 573 else:
576 574 return '"query" must be <dict>'
577 575
578 576 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
579 577
580 578 elif type_option == 'resource':
581 579 for key1, value1 in kwargs.items():
582 580 if key1 != 'fields':
583 581 self.dict[key1] = value1
584 582
585 583 if query is not None:
586 584 if type(query) is dict:
587 585 #----------------------------------------------------#
588 586 if 'file_date_min' in query:
589 587 if type(query['file_date_min']) is str:
590 588 try:
591 589 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
592 590 if len(query['file_date_min']) != 10:
593 591 return '"file_date_min", must be: <YYYY-MM-DD>'
594 592 except:
595 593 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
596 594 else:
597 595 return '"file_date_min" must be <str>'
598 596 #----------------------------------------------------#
599 597 if 'file_date_max' in query:
600 598 if type(query['file_date_max']) is str:
601 599 try:
602 600 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
603 601 if len(query['file_date_max']) != 10:
604 602 return '"file_date_max", must be: <YYYY-MM-DD>'
605 603
606 604 if 'file_date_min' in query:
607 605 if query['file_date_min'] > query['file_date_max']:
608 606 return '"file_date_max" must be greater than "file_date_min"'
609 607 except:
610 608 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
611 609 else:
612 610 return '"file_date_max" must be <str>'
613 611 #----------------------------------------------------#
614 612 self.dict['query'] = query
615 613 else:
616 614 return '"query" must be <dict>'
617 615 return getattr(self.ckan.action, 'resources_search')(**self.dict)
618 616
619 617 elif type_option == 'tag':
620 618 for key1, value1 in kwargs.items():
621 619 if key1 != 'fields':
622 620 self.dict[key1] = value1
623 621
624 622 if not 'vocabulary_id' in kwargs:
625 623 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
626 624 else:
627 625 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
628 626
629 627 if query is not None:
630 628 if type(query) is dict:
631 629 if 'search' in query:
632 630 if type(query['search']) is list or type(query['search']) is str:
633 631 self.dict['query'] = query['search']
634 632 else:
635 633 return '"search" must be <list> or <str>'
636 634 else:
637 635 return '"query" must be <dict>'
638 636 return getattr(self.ckan.action, 'tag_search')(**self.dict)
639 637
640 638 else:
641 639 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
642 640
643 641 except:
644 642 _, exc_value, _ = sys.exc_info()
645 643 return exc_value
646 644 else:
647 645 return 'ERROR:: "type_option" must be <str>'
648 646
649 647 def create(self, type_option, select=None, **kwargs):
650 648 '''
651 649 FINALIDAD:
652 650 Funcion personalizada para crear.
653 651
654 652 PARAMETROS DISPONIBLES:
655 653 CONSULTAR: "GUIA DE SCRIPT.pdf"
656 654
657 655 ESTRUCTURA:
658 656 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
659 657 '''
660 658 if type(type_option) is str:
661 659 try:
662 660 if type_option == 'dataset':
663 661 return getattr(self.ckan.action, 'package_create')(**kwargs)
664 662 if type_option == 'resource':
665 663 return resource.resource_create(self, **kwargs)
666 664 elif type_option == 'project':
667 665 return getattr(self.ckan.action, 'organization_create')(**kwargs)
668 666 elif type_option == 'member':
669 667 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
670 668 elif type_option == 'collaborator':
671 669 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
672 670 elif type_option == 'vocabulary':
673 671 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
674 672 elif type_option == 'tag':
675 673 return getattr(self.ckan.action, 'tag_create')(**kwargs)
676 674 elif type_option == 'user':
677 675 return getattr(self.ckan.action, 'user_create')(**kwargs)
678 676 elif type_option == 'views':
679 677 if 'resource' == select:
680 678 self.list = ['package']
681 679 for key1, value1 in kwargs.items():
682 680 if not key1 in self.list:
683 681 self.dict[key1] = value1
684 682 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
685 683 elif 'dataset' == select:
686 684 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
687 685 else:
688 686 return 'ERROR:: "select = %s" is not accepted' % (select)
689 687 else:
690 688 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
691 689 except:
692 690 _, exc_value, _ = sys.exc_info()
693 691 return exc_value
694 692 else:
695 693 return 'ERROR:: "type_option" must be <str>'
696 694
697 695 def patch(self, type_option, **kwargs):
698 696 '''
699 697 FINALIDAD:
700 698 Funciones personalizadas para actualizar
701 699
702 700 PARAMETROS DISPONIBLES:
703 701 CONSULTAR: "GUIA DE SCRIPT.pdf"
704 702
705 703 ESTRUCTURA:
706 704 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
707 705 '''
708 706 if type(type_option) is str:
709 707 try:
710 708 if type_option == 'dataset':
711 709 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
712 710 return getattr(self.ckan.action, 'package_patch')(**kwargs)
713 711 elif type_option == 'project':
714 712 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
715 713 elif type_option == 'resource':
716 714 return resource.resource_patch(self, **kwargs)
717 715 elif type_option == 'member':
718 716 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
719 717 elif type_option == 'collaborator':
720 718 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
721 719 else:
722 720 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
723 721 except:
724 722 _, exc_value, _ = sys.exc_info()
725 723 return exc_value
726 724 else:
727 725 return 'ERROR:: "type_option" must be <str>'
728 726
729 727 def delete(self, type_option, select=None, **kwargs):
730 728 '''
731 729 FINALIDAD:
732 730 Función personalizada para eliminar y/o purgar.
733 731
734 732 PARAMETROS DISPONIBLES:
735 733 CONSULTAR: "GUIA DE SCRIPT.pdf"
736 734
737 735 ESTRUCTURA:
738 736 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
739 737 '''
740 738 if type(type_option) is str:
741 739 try:
742 740 if type_option == 'dataset':
743 741 if select is None:
744 742 return 'ERROR:: "select" must not be "None"'
745 743 else:
746 744 if 'delete' == select:
747 745 return getattr(self.ckan.action, 'package_delete')(**kwargs)
748 746 elif 'purge' == select:
749 747 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
750 748 else:
751 749 return 'ERROR:: "select = %s" is not accepted' % (select)
752 750 elif type_option == 'project':
753 751 if select is None:
754 752 return 'ERROR:: "select" must not be "None"'
755 753 else:
756 754 if 'delete' == select:
757 755 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
758 756 elif 'purge' == select:
759 757 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
760 758 else:
761 759 return 'ERROR:: "select = %s" is not accepted' % (select)
762 760 elif type_option == 'resource':
763 761 if select is None:
764 762 return 'ERROR:: "select" must not be "None"'
765 763 else:
766 764 return resource.resource_delete(self, select, **kwargs)
767 765 elif type_option == 'vocabulary':
768 766 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
769 767 elif type_option == 'tag':
770 768 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
771 769 elif type_option == 'user':
772 770 return getattr(self.ckan.action, 'user_delete')(**kwargs)
773 771 else:
774 772 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
775 773 except:
776 774 _, exc_value, _ = sys.exc_info()
777 775 return exc_value
778 776 else:
779 777 return 'ERROR:: "type_option" must be <str>'
780 778
781 779 def f_status_note(self, total, result, path):
782 780 file_txt = open(path+'status_note.txt', 'w')
783 781 file_txt = open(path+'status_note.txt', 'a')
784 782
785 783 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
786 784 file_txt.write(''+ os.linesep)
787 785 for u in result['name']:
788 786 file_txt.write(' - '+ u + os.linesep)
789 787 file_txt.write(''+ os.linesep)
790 788
791 789 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
792 790 file_txt.write(''+ os.linesep)
793 791 if len(total['name'])-len(result['name']) != 0:
794 792 for u in total['name']:
795 793 if not u in result['name']:
796 794 file_txt.write(' - '+ u + os.linesep)
797 795 else:
798 796 file_txt.write(' "None"'+ os.linesep)
799 797
800 798 def f_name(self, name_dataset, ext, tempdir):
801 799 while self.check:
802 800 self.str = ''
803 801 if self.cont == 0:
804 802 if os.path.exists(tempdir + name_dataset + ext):
805 803 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
806 804 else:
807 805 self.check = self.check * 0
808 806 self.str = name_dataset + ext
809 807 else:
810 808 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
811 809 self.check = self.check * 0
812 810 self.str = name_dataset+'('+str(self.cont)+')'+ ext
813 811 self.cont = self.cont+1
814 812 return self.str
815 813
816 814 def f_zipdir(self, path, ziph, zip_name):
817 815 for root, _, files in os.walk(path):
818 816 print('.....')
819 817 print('Creating: "{}" >>'.format(zip_name))
820 818 for __file in tqdm(iterable=files, total=len(files)):
821 819 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
822 820 ziph.write(os.path.join(root, __file), new_dir)
823 821 print('Created >>')
824 822
825 823 def download_by_step(self, response, tempdir_name):
826 824 try:
827 825 # ---------- REPLACE URL --------- #
828 826 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
829 827 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
830 828 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
831 829 #----------------------------------#
832 830 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
833 831 if resp.status_code == 200:
834 832 with open(tempdir_name+response['name'], 'wb') as file:
835 833 for chunk in resp.iter_content(chunk_size = self.chunk_size):
836 834 if chunk:
837 835 file.write(chunk)
838 836 except requests.exceptions.RequestException:
839 837 pass
840 838
841 839 def download_files(self, **kwargs):
842 840 '''
843 841 FINALIDAD:
844 842 Funcion personalizada para la descarga de archivos existentes de un dataset.
845 843
846 844 PARAMETROS DISPONIBLES:
847 845 CONSULTAR: "GUIA DE SCRIPT.pdf"
848 846
849 847 ESTRUCTURA:
850 848 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
851 849 '''
852 850 dict_local = {}
853 851 #----------------------------------------------#
854 852 if 'zip' in kwargs:
855 853 if type(kwargs['zip']) is not bool:
856 854 return 'ERROR:: "zip" must be: <class "bool">'
857 855 else:
858 856 dict_local['zip'] = kwargs['zip']
859 857 else:
860 858 dict_local['zip'] = False
861 859 #----------------------------------------------#
862 860 if 'status_note' in kwargs:
863 861 if type(kwargs['status_note']) is not bool:
864 862 return 'ERROR:: "status_note" must be: <class "bool">'
865 863 else:
866 864 dict_local['status_note'] = kwargs['status_note']
867 865 else:
868 866 dict_local['status_note'] = False
869 867 #----------------------------------------------#
870 868 if 'path' in kwargs:
871 869 if type(kwargs['path']) is str:
872 870 if os.path.isdir(kwargs['path']) == False:
873 871 return 'ERROR:: "path" does not exist'
874 872 else:
875 873 if kwargs['path'][-1:] != self.separator:
876 874 dict_local['path'] = kwargs['path']+self.separator
877 875 else:
878 876 dict_local['path'] = kwargs['path']
879 877
880 878 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
881 879 if int(platform.python_version()[0]) == 3:
882 880 try:
883 881 file_txt = open(txt, 'w')
884 882 file_txt.close()
885 883 os.remove(txt)
886 884 except PermissionError:
887 885 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
888 886 else:
889 887 try:
890 888 file_txt = open(txt, 'w')
891 889 file_txt.close()
892 890 os.remove(txt)
893 891 except:
894 892 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
895 893 else:
896 894 return 'ERROR:: "path" must be: <class "str">'
897 895 else:
898 896 dict_local['path'] = ''
899 897 #----------------------------------------------#
900 898 for key, value in kwargs.items():
901 899 if not key in dict_local:
902 900 self.dict[key] = value
903 901 try:
904 902 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
905 903 except:
906 904 _, exc_value, _ = sys.exc_info()
907 905 return exc_value
908 906
909 907 if len(response) != 0:
910 908 #--------------TEMP PATH---------------#
911 909 if dict_local['zip']:
912 910 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
913 911 os.mkdir(tempdir+kwargs['id'])
914 912 dir_name = tempdir + kwargs['id'] + self.separator
915 913 else:
916 914 dir = self.f_name(kwargs['id'], '', dict_local['path'])
917 915 os.mkdir(dict_local['path'] + dir)
918 916 dir_name = dict_local['path'] + dir + self.separator
919 917 #-----------DOWNLOAD FILES-------------#
920 918 print('.....')
921 919 print('Downloading "{}" file(s) >>'.format(len(response)))
922 920 name_total = {'name': []}
923 921 with concurrent.futures.ThreadPoolExecutor() as executor:
924 922 for u in tqdm(iterable=response, total=len(response)):
925 923 name_total['name'].append(u['name'])
926 924 executor.submit(self.download_by_step, u, dir_name)
927 925 name_check = {}
928 926 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
929 927 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
930 928 #--------------------------------------#
931 929 if len(name_check['name']) != 0:
932 930 #----------Status Note---------#
933 931 if dict_local['status_note']:
934 932 print('.....')
935 933 print('Creating: "status_note.txt" >>')
936 934 self.f_status_note(name_total, name_check, dir_name)
937 935 print('Created>>')
938 936 #----------ZIP CREATE----------#
939 937 if dict_local['zip']:
940 938 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
941 939 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
942 940 self.f_zipdir(dir_name, ziph, zip_name)
943 941 ziph.close()
944 942 #Delete Temporal Path
945 943 if os.path.exists(tempdir[:-1]):
946 944 shutil.rmtree(tempdir[:-1])
947 945 #------------------------------#
948 946 print('.....')
949 947 return 'DOWNLOAD FINISHED'
950 948 else:
951 949 #Delete Temporal Path
952 950 if dict_local['zip']:
953 951 if os.path.exists(tempdir[:-1]):
954 952 shutil.rmtree(tempdir[:-1])
955 953 else:
956 954 if os.path.exists(dir_name[:-1]):
957 955 shutil.rmtree(dir_name[:-1])
958 956 return 'NO FILES WERE DOWNLOADED'
959 957 else:
960 958 return 'FILES NOT FOUND'
961 959
962 960 def download_files_advance(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
963 961 '''
964 962 FINALIDAD:
965 963 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
966 964
967 965 PARAMETROS DISPONIBLES:
968 966 CONSULTAR: "GUIA DE SCRIPT.pdf"
969 967
970 968 ESTRUCTURA:
971 969 <access_name>.download_files_advance(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
972 970 '''
973 971 #------------------ PATH ----------------------#
974 972 if isinstance(path, str):
975 973 if os.path.isdir(path):
976 974 if not path.endswith(os.sep):
977 975 path = path + os.sep
978 976 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
979 977 try:
980 978 file_txt = open(test_txt, 'w')
981 979 file_txt.close()
982 980 os.remove(test_txt)
983 981 except:
984 982 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
985 983 else:
986 984 return 'ERROR:: "path" does not exist'
987 985 else:
988 986 return 'ERROR:: "path" must be: <class "str">'
989 987
990 988 #------------------ PROCESSES -----------------#
991 989 if not isinstance(processes, int):
992 990 return 'ERROR:: "processes" must be: <class "int">'
993 991
994 992 #------------------ ID OR NAME ----------------#
995 993 if isinstance(id_or_name, str):
996 994 id_or_name = [id_or_name]
997 995 elif isinstance(id_or_name, list):
998 996 id_or_name = list(map(str, id_or_name))
999 997 else:
1000 998 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
1001 999 #----------------------------------------------#
1002 1000 arguments = {
1003 1001 '--apikey': self.Authorization,
1004 1002 '--ckan-user': None,
1005 1003 '--config': None,
1006 1004 '--datapackages': path,
1007 1005 '--datastore-fields': False,
1008 1006 '--get-request': False,
1009 1007 '--insecure': not self.verify,
1010 1008 '--log': '/home/soporte/DUMP/download.txt',
1011 1009 '--processes': str(processes),
1012 1010 '--quiet': False,
1013 1011 '--remote': self.url,
1014 1012 '--worker': False,
1015 1013 #'--all': False,
1016 1014 #'--gzip': False,
1017 1015 #'--output': None,
1018 1016 #'--max-records': None,
1019 1017 #'--output-json': False,
1020 1018 #'--output-jsonl': False,
1021 1019 #'--create-only': False,
1022 1020 #'--help': False,
1023 1021 #'--input': None,
1024 1022 #'--input-json': False,
1025 1023 #'--start-record': '1',
1026 1024 #'--update-only': False,
1027 1025 #'--upload-logo': False,
1028 1026 #'--upload-resources': False,
1029 1027 #'--version': False,
1030 1028 'ID_OR_NAME': id_or_name,
1031 1029 'datasets': True,
1032 1030 'dump': True,
1033 1031 #'ACTION_NAME': None,
1034 1032 #'KEY:JSON': [],
1035 1033 #'KEY=STRING': [],
1036 1034 #'KEY@FILE': [],
1037 1035 #'action': False,
1038 1036 #'delete': False,
1039 1037 #'groups': False,
1040 1038 #'load': False,
1041 1039 #'organizations': False,
1042 1040 #'related': False,
1043 1041 #'search': False,
1044 1042 #'users': False
1045 1043 }
1046 1044 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now