##// END OF EJS Templates
v2.9.2 :: Add 'max_count' option in upload_multiple_files_advance function
eynilupu -
r6:9f9e218fed1d
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,871 +1,878
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 5 import sys
6 6 import platform
7 7 import os
8 8 import tempfile
9 9 import shutil
10 10 import zipfile
11 11 import concurrent.futures
12 12 import requests
13 13 import json
14 14 import pathlib
15 15 import uuid
16 16
17 17 class JROAPI():
18 18 """
19 19 FINALIDAD:
20 20 Script para administrar y obtener la data del repositorio por medio de APIs.
21 21
22 22 REQUISITIOS PREVIOS:
23 23 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
24 24 - Paso 2: Instalar lo siguiente como admininstrador:
25 25 En Python 2
26 26 - pip install ckanapi==4.5
27 27 - pip install requests
28 28 - pip install pathlib
29 29 - pip install futures
30 30 - pip install tqdm
31 31 En Python > 3
32 32 - pip3 install ckanapi==4.5
33 33 - pip3 install requests
34 34 - pip3 install tqdm
35 35
36 36 FUNCIONES DISPONIBLES:
37 37 - action
38 38 - upload_file
39 39 - upload_multiple_files
40 40 - upload_multiple_files_advance
41 41 - show
42 42 - search
43 43 - create
44 44 - patch
45 45 - delete
46 46 - download_files
47 47
48 48 EJEMPLOS:
49 49 #1:
50 50 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
51 51 ... some operation(s) ...
52 52 #2:
53 53 <access_name> = JROAPI('http://example.com', Authorization='#########')
54 54 ... some operation(s) ...
55 55 <access_name>.ckan.close()
56 56
57 57 REPORTAR ALGUN PROBLEMA:
58 58 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
59 59 1) Correo para contactarlo
60 60 2) Descripcion del problema
61 61 3) ¿En que paso o seccion encontro el problema?
62 62 4) ¿Cual era el resultado que usted esperaba?
63 63 """
64 64 def __init__(self, url, Authorization=None):
65 65 ua = 'CKAN_JRO/1.1 (+'+str(url)+')'
66 66 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
67 67 self.ckan = RemoteCKAN(url, apikey=Authorization, user_agent=ua)
68 68 #self.ckan = RemoteCKAN(url, apikey=Authorization)
69 69 self.Authorization = Authorization
70 70 if platform.system() == 'Windows':
71 71 self.separator = '\\'
72 72 else:
73 73 self.separator = '/'
74 74
75 75 self.chunk_size = 1024
76 76 self.list = []
77 77 self.dict = {}
78 78 self.str = ''
79 79 self.check = 1
80 80 self.cont = 0
81 81
82 82 def __enter__(self):
83 83 return self
84 84
85 85 def __exit__(self, *args):
86 86 self.ckan.close()
87 87
88 88 def action(self, action, **kwargs):
89 89 """
90 90 FINALIDAD:
91 91 Funcion para llamar a las APIs disponibles
92 92
93 93 APIs DISPONIBLES:
94 94 CONSULTAR: "GUIA DE SCRIPT.pdf"
95 95
96 96 EJEMPLO:
97 97 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
98 98 """
99 99 #--------------- CASE: PACKAGE SEARCH ---------------#
100 100 if kwargs is not None:
101 101 if action == 'package_search':
102 102 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
103 103 for facet in self.list:
104 104 if facet in kwargs:
105 105 kwargs[facet.replace('_', '.')] = kwargs[facet]
106 106 kwargs.pop(facet)
107 107 #----------------------------------------------------#
108 108 try:
109 109 return getattr(self.ckan.action, action)(**kwargs)
110 110 except:
111 111 _, exc_value, _ = sys.exc_info()
112 112 return exc_value
113 113
114 114 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
115 115 # Agregar si es interruptido por teclado
116 116 '''
117 117 FINALIDAD:
118 118 Funcion para subir un unico archivo al repositorio del ROJ.
119 119
120 120 PARAMETROS DISPONIBLES:
121 121 CONSULTAR: "GUIA DE SCRIPT.pdf"
122 122
123 123 ESTRUCTURA:
124 124 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
125 125 '''
126 126 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
127 127 for key1, value1 in kwargs.items():
128 128 if not key1 in self.list:
129 129 self.dict[key1] = value1
130 130
131 131 #---------------------------#
132 132 if not 'others' in kwargs:
133 133 self.dict['others'] = ''
134 134 else:
135 135 if isinstance(kwargs['others'], list):
136 136 self.dict['others'] = json.dumps(kwargs['others'])
137 137 #---------------------------#
138 138
139 139 if not os.path.isfile(file_path):
140 140 return 'File "%s" not exist' % (file_path)
141 141
142 142 if not 'format' in self.dict:
143 143 self.str = ''.join(pathlib.Path(file_path).suffixes)
144 144 if len(self.str) > 0:
145 145 self.dict['format'] = self.str.upper()[1:]
146 146
147 147 try:
148 148 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=pathlib.Path(file_path).name, **self.dict)
149 149 except:
150 150 _, exc_value, _ = sys.exc_info()
151 151 return exc_value
152 152
153 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, ignore_repetition=False, **kwargs):
153 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
154 154 # Agregar si es interruptido por teclado
155 155 '''
156 156 FINALIDAD:
157 157 Funcion para subir multiples archivos al repositorio del ROJ.
158 158
159 159 PARAMETROS DISPONIBLES:
160 160 CONSULTAR: "GUIA DE SCRIPT.pdf"
161 161
162 162 ESTRUCTURA:
163 163 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
164 164 '''
165 165 #-------------------------PACKAGE SHOW-----------------------#
166 166 try:
167 167 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
168 168 except:
169 169 _, exc_value, _ = sys.exc_info()
170 170 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
171 171 return exc_value
172 172 #------------------------------------------------------------#
173 173 resources_name = []
174 174 for u in dataset_show:
175 175 resources_name.append(u['name'].lower())
176 176 #------------------------------------------------------------#
177 177 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
178 178 for key1, value1 in kwargs.items():
179 179 if not key1 in self.list:
180 180 self.dict[key1] = value1
181 181 #------------------------------------------------------------#
182 182 if not 'others' in kwargs:
183 183 self.dict['others'] = ''
184 184 else:
185 185 if isinstance(kwargs['others'], list):
186 186 self.dict['others'] = json.dumps(kwargs['others'])
187 187 #------------------------------------------------------------#
188 188 total_list = []
189 189 #---------------CASO : "path" or "path_list"-----------------#
190 190 if type(path_files) is list:
191 191 if len(path_files) != 0:
192 192 path_files.sort()
193 193 for u in path_files:
194 194 if os.path.isfile(u):
195 195 if pathlib.Path(u).name.lower() in resources_name:
196 196 if not ignore_repetition:
197 197 return 'ERROR:: "%s" file already exist in this dataset' % (pathlib.Path(u).name)
198 198 print('WARRING:: "'+ str(pathlib.Path(u).name) +'" file was ignored because already exist in this dataset')
199 199 else:
200 200 total_list.append({'name':pathlib.Path(u).name, 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
201 201 else:
202 202 return 'File "%s" does not exist' % (u)
203 203 else:
204 204 return 'ERROR:: "path_list is empty"'
205 205
206 206 elif type(path_files) is str:
207 207 if os.path.isdir(path_files):
208 208 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
209 209 path_order.sort()
210 210 if path_order:
211 211 for name in path_order:
212 212 if name.lower() in resources_name:
213 213 if not ignore_repetition:
214 214 return 'ERROR:: "%s" file already exist in this dataset' % (name)
215 215 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
216 216 else:
217 217 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
218 218 else:
219 219 return "ERROR:: There aren't files in this directory"
220 220 else:
221 221 return 'ERROR:: Directory "%s" does not exist' % (path_files)
222 222 else:
223 223 return 'ERROR:: "path_files" must be a str or list'
224 224 #------------------------------------------------------------#
225 225 try:
226 226 uuid.UUID(str(dataset_id), version=4)
227 227 package_id_or_name = '"id": "' + str(dataset_id) + '"'
228 228 except ValueError:
229 229 package_id_or_name = '"name": "' + str(dataset_id) + '"'
230 230 #------------------------------------------------------------#
231 231 blocks = [[]]
232 232 size_file = 0
233 count_file = 0
233 234 inter_num = 0
234 235 for value in total_list:
235 if value['size'] > 1048576 * float(max_size):
236 return 'ERROR:: The size of the "%s" file is %sMB, please change "max_size" value' % (value['name'], str(round(value['size']/1048576, 1)))
236 if value['size'] > 1024 * 1024 * float(max_size):
237 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
238 if not 1 <= int(max_count) <= 999:
239 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
240
237 241 size_file = size_file + value['size']
238 if size_file <= 1048576 * float(max_size):
242 count_file = count_file + 1
243 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
239 244 del value['size']
240 245 blocks[inter_num].append(value)
241 246 else:
242 247 inter_num = inter_num + 1
248 size_file = value['size']
249 count_file = 1
243 250 blocks.append([])
244 251 del value['size']
245 252 blocks[inter_num].append(value)
246 253 #------------------------------------------------------------#
247 254 if len(blocks[0]) > 0:
248 255 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
249 256 for count1, block in enumerate(blocks):
250 257 print('---- BLOCK N°{} ----'.format(count1 + 1))
251 258 resource_extend = []
252 259 files_dict = {}
253 260 for count2, value2 in enumerate(block):
254 261 value2['file_date'] = file_date
255 262 value2['voc_file_type'] = file_type
256 263 value2.update(self.dict)
257 264
258 265 if not 'format' in value2:
259 266 format = ''.join(pathlib.Path(value2['name']).suffixes)
260 267 if len(format) > 0:
261 268 value2['format'] = format.upper()[1:]
262 269
263 270 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
264 271 del value2['upload']
265 272 resource_extend.append(value2)
266 273
267 274 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
268 275 try:
269 276 result = self.ckan.call_action(
270 277 'package_revise',
271 278 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
272 279 files=files_dict
273 280 )
274 281 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
275 282 if len(blocks) == count1 + 1:
276 283 return result
277 284 except:
278 285 print('ERROR :: Use the "print" for more information')
279 286 _, exc_value, _ = sys.exc_info()
280 287 return exc_value
281 288 else:
282 289 return "ERROR:: No file(s) found to upload"
283 290
284 291 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, **kwargs):
285 292 # Agregar si es interruptido por teclado
286 293 '''
287 294 FINALIDAD:
288 295 Funcion para subir multiples archivos al repositorio del ROJ.
289 296
290 297 PARAMETROS DISPONIBLES:
291 298 CONSULTAR: "GUIA DE SCRIPT.pdf"
292 299
293 300 ESTRUCTURA:
294 301 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
295 302 '''
296 303
297 304 params_dict = {'upload':[], 'name':[]}
298 305 if not 'format' in kwargs:
299 306 params_dict.update({'format':[]})
300 307 #---------------CASO : "path" or "path_list"-----------------#
301 308 if type(path_files) is list:
302 309 if len(path_files) != 0:
303 310 path_files.sort()
304 311 for u in path_files:
305 312 if os.path.isfile(u):
306 313 params_dict['upload'].append(open(u, 'rb'))
307 314 params_dict['name'].append(pathlib.Path(u).name)
308 315 if not 'format' in kwargs:
309 316 format = ''.join(pathlib.Path(u).suffixes)
310 317 if len(format) > 0:
311 318 params_dict['format'].append(format.upper()[1:])
312 319 else:
313 320 params_dict['format'].append('')
314 321 else:
315 322 return 'File "%s" does not exist' % (u)
316 323 else:
317 324 return 'ERROR:: "path_list is empty"'
318 325 elif type(path_files) is str:
319 326 if os.path.isdir(path_files):
320 327 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
321 328 path_order.sort()
322 329 if path_order:
323 330 for name in path_order:
324 331 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
325 332 params_dict['name'].append(name)
326 333 if not 'format' in kwargs:
327 334 format = ''.join(pathlib.Path(name).suffixes)
328 335 if len(format) > 0:
329 336 params_dict['format'].append(format.upper()[1:])
330 337 else:
331 338 params_dict['format'].append('')
332 339 else:
333 340 return "ERROR:: There aren't files in this directory"
334 341 else:
335 342 return 'ERROR:: Directory "%s" does not exist' % (path_files)
336 343 else:
337 344 return 'ERROR:: "path_files" must be a str or list'
338 345 #------------------------------------------------------------#
339 346 params_no_dict = {'package_id': dataset_id}
340 347 if type(date_files) is list:
341 348 params_dict['file_date'] = date_files
342 349 else:
343 350 params_no_dict['file_date'] = date_files
344 351
345 352 if type(type_files) is list:
346 353 params_dict['voc_file_type'] = type_files
347 354 else:
348 355 params_no_dict['voc_file_type'] = type_files
349 356
350 357 for key1, value1 in kwargs.items():
351 358 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
352 359 if type(value1) is list:
353 360 params_dict[key1] = value1
354 361 else:
355 362 params_no_dict[key1] = value1
356 363 #------------------------------------------#
357 364 if not 'others' in kwargs:
358 365 params_no_dict['others'] = ''
359 366 else:
360 367 if isinstance(kwargs['others'], tuple):
361 368 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
362 369 elif isinstance(kwargs['others'], list):
363 370 params_no_dict['others'] = json.dumps(kwargs['others'])
364 371 elif isinstance(kwargs['others'], str):
365 372 params_no_dict['others'] = kwargs['others']
366 373 else:
367 374 return 'ERROR:: "others" must be a tuple, list or str'
368 375 #------------------------------------------#
369 376 len_params_dict = []
370 377 for value2 in params_dict.values():
371 378 len_params_dict.append(len(value2))
372 379
373 380 if len(list(set(len_params_dict))) > 1:
374 381 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
375 382 #------------------------------------------------------------#
376 383 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
377 384 for v in range(len(params_dict['name'])):
378 385 try:
379 386 send = {}
380 387 for key_dict, value_dict in params_dict.items():
381 388 send[key_dict] = value_dict[v]
382 389 for key_no_dict, value_no_dict in params_no_dict.items():
383 390 send[key_no_dict] = value_no_dict
384 391
385 392 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
386 393 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
387 394 except:
388 395 _, exc_value, _ = sys.exc_info()
389 396 self.list.append(exc_value)
390 397 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
391 398 return self.list
392 399 #------------------------------------------------------------#
393 400
394 401 def show(self, type_option, id, **kwargs):
395 402 '''
396 403 FINALIDAD:
397 404 Funcion personalizada para una busqueda en especifico.
398 405
399 406 PARAMETROS DISPONIBLES:
400 407 CONSULTAR: "GUIA DE SCRIPT.pdf"
401 408
402 409 ESTRUCTURA:
403 410 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
404 411 '''
405 412 if type(type_option) is str:
406 413 try:
407 414 if type_option == 'dataset':
408 415 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
409 416 elif type_option == 'resource':
410 417 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
411 418 elif type_option == 'project':
412 419 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
413 420 elif type_option == 'collaborator':
414 421 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
415 422 elif type_option == 'member':
416 423 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
417 424 elif type_option == 'vocabulary':
418 425 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
419 426 elif type_option == 'tag':
420 427 if not 'vocabulary_id' in kwargs:
421 428 print('Missing "vocabulary_id" value: assume it is a free tag')
422 429 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
423 430 elif type_option == 'user':
424 431 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
425 432 elif type_option == 'job':
426 433 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
427 434 else:
428 435 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
429 436 except:
430 437 _, exc_value, _ = sys.exc_info()
431 438 return exc_value
432 439 else:
433 440 return 'ERROR:: "type_option" must be a str'
434 441
435 442 def search(self, type_option, query=None, **kwargs):
436 443 '''
437 444 FINALIDAD:
438 445 Funcion personalizada para busquedas que satisfagan algun criterio.
439 446
440 447 PARAMETROS DISPONIBLES:
441 448 CONSULTAR: "GUIA DE SCRIPT.pdf"
442 449
443 450 ESTRUCTURA:
444 451 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
445 452 '''
446 453 if type(type_option) is str:
447 454 try:
448 455 if type_option == 'dataset':
449 456 key_replace = ['fq', 'fq_list', 'include_private']
450 457 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
451 458 for key1, value1 in kwargs.items():
452 459 if not key1 in key_replace:
453 460 if key1 in key_point:
454 461 self.dict[key1.replace('_', '.')] = value1
455 462 else:
456 463 self.dict[key1] = value1
457 464
458 465 if query is not None:
459 466 if type(query) is dict:
460 467 self.dict['fq_list'] = []
461 468 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
462 469 #----------------------------------------------------#
463 470 if 'dataset_start_date' in query:
464 471 if type(query['dataset_start_date']) is str:
465 472 try:
466 473 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
467 474 if len(query['dataset_start_date']) != 10:
468 475 return '"dataset_start_date", must be: <YYYY-MM-DD>'
469 476 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
470 477 self.list.append('dataset_start_date')
471 478 except:
472 479 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
473 480 else:
474 481 return '"dataset_start_date" must be <str>'
475 482 #----------------------------------------------------#
476 483 if 'dataset_end_date' in query:
477 484 if type(query['dataset_end_date']) is str:
478 485 try:
479 486 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
480 487 if len(query['dataset_end_date']) != 10:
481 488 return '"dataset_end_date", must be: <YYYY-MM-DD>'
482 489
483 490 if 'dataset_start_date' in query:
484 491 if query['dataset_start_date'] > query['dataset_end_date']:
485 492 return '"dataset_end_date" must be greater than "dataset_start_date"'
486 493
487 494 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
488 495 self.list.append('dataset_end_date')
489 496 except:
490 497 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
491 498 else:
492 499 return '"dataset_end_date" must be <str>'
493 500 #----------------------------------------------------#
494 501 for key, value in query.items():
495 502 if value is not None and not key in self.list:
496 503 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
497 504 else:
498 505 return '"query" must be <dict>'
499 506
500 507 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
501 508
502 509 elif type_option == 'resource':
503 510 for key1, value1 in kwargs.items():
504 511 if key1 != 'fields':
505 512 self.dict[key1] = value1
506 513
507 514 if query is not None:
508 515 if type(query) is dict:
509 516 #----------------------------------------------------#
510 517 if 'file_date_min' in query:
511 518 if type(query['file_date_min']) is str:
512 519 try:
513 520 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
514 521 if len(query['file_date_min']) != 10:
515 522 return '"file_date_min", must be: <YYYY-MM-DD>'
516 523 except:
517 524 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
518 525 else:
519 526 return '"file_date_min" must be <str>'
520 527 #----------------------------------------------------#
521 528 if 'file_date_max' in query:
522 529 if type(query['file_date_max']) is str:
523 530 try:
524 531 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
525 532 if len(query['file_date_max']) != 10:
526 533 return '"file_date_max", must be: <YYYY-MM-DD>'
527 534
528 535 if 'file_date_min' in query:
529 536 if query['file_date_min'] > query['file_date_max']:
530 537 return '"file_date_max" must be greater than "file_date_min"'
531 538 except:
532 539 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
533 540 else:
534 541 return '"file_date_max" must be <str>'
535 542 #----------------------------------------------------#
536 543 self.dict['query'] = query
537 544 else:
538 545 return '"query" must be <dict>'
539 546 return getattr(self.ckan.action, 'resources_search')(**self.dict)
540 547
541 548 elif type_option == 'tag':
542 549 for key1, value1 in kwargs.items():
543 550 if key1 != 'fields':
544 551 self.dict[key1] = value1
545 552
546 553 if not 'vocabulary_id' in kwargs:
547 554 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
548 555 else:
549 556 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
550 557
551 558 if query is not None:
552 559 if type(query) is dict:
553 560 if 'search' in query:
554 561 if type(query['search']) is list or type(query['search']) is str:
555 562 self.dict['query'] = query['search']
556 563 else:
557 564 return '"search" must be <list> or <str>'
558 565 else:
559 566 return '"query" must be <dict>'
560 567 return getattr(self.ckan.action, 'tag_search')(**self.dict)
561 568
562 569 else:
563 570 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
564 571
565 572 except:
566 573 _, exc_value, _ = sys.exc_info()
567 574 return exc_value
568 575 else:
569 576 return 'ERROR:: "type_option" must be <str>'
570 577
571 578 def create(self, type_option, select=None, **kwargs):
572 579 '''
573 580 FINALIDAD:
574 581 Funcion personalizada para crear.
575 582
576 583 PARAMETROS DISPONIBLES:
577 584 CONSULTAR: "GUIA DE SCRIPT.pdf"
578 585
579 586 ESTRUCTURA:
580 587 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
581 588 '''
582 589 if type(type_option) is str:
583 590 try:
584 591 if type_option == 'dataset':
585 592 return getattr(self.ckan.action, 'package_create')(**kwargs)
586 593 elif type_option == 'project':
587 594 return getattr(self.ckan.action, 'organization_create')(**kwargs)
588 595 elif type_option == 'member':
589 596 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
590 597 elif type_option == 'collaborator':
591 598 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
592 599 elif type_option == 'vocabulary':
593 600 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
594 601 elif type_option == 'tag':
595 602 return getattr(self.ckan.action, 'tag_create')(**kwargs)
596 603 elif type_option == 'user':
597 604 return getattr(self.ckan.action, 'user_create')(**kwargs)
598 605 elif type_option == 'views':
599 606 if 'resource' == select:
600 607 self.list = ['package']
601 608 for key1, value1 in kwargs.items():
602 609 if not key1 in self.list:
603 610 self.dict[key1] = value1
604 611 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
605 612 elif 'dataset' == select:
606 613 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
607 614 else:
608 615 return 'ERROR:: "select = %s" is not accepted' % (select)
609 616 else:
610 617 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
611 618 except:
612 619 _, exc_value, _ = sys.exc_info()
613 620 return exc_value
614 621 else:
615 622 return 'ERROR:: "type_option" must be <str>'
616 623
617 624 def patch(self, type_option, **kwargs):
618 625 '''
619 626 FINALIDAD:
620 627 Funciones personalizadas para actualizar
621 628
622 629 PARAMETROS DISPONIBLES:
623 630 CONSULTAR: "GUIA DE SCRIPT.pdf"
624 631
625 632 ESTRUCTURA:
626 633 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
627 634 '''
628 635 if type(type_option) is str:
629 636 try:
630 637 if type_option == 'dataset':
631 638 return getattr(self.ckan.action, 'package_patch')(**kwargs)
632 639 elif type_option == 'project':
633 640 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
634 641 elif type_option == 'resource':
635 642 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
636 643 elif type_option == 'member':
637 644 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
638 645 elif type_option == 'collaborator':
639 646 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
640 647 else:
641 648 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
642 649 except:
643 650 _, exc_value, _ = sys.exc_info()
644 651 return exc_value
645 652 else:
646 653 return 'ERROR:: "type_option" must be <str>'
647 654
648 655 def delete(self, type_option, select=None, **kwargs):
649 656 '''
650 657 FINALIDAD:
651 658 Función personalizada para eliminar y/o purgar.
652 659
653 660 PARAMETROS DISPONIBLES:
654 661 CONSULTAR: "GUIA DE SCRIPT.pdf"
655 662
656 663 ESTRUCTURA:
657 664 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
658 665 '''
659 666 if type(type_option) is str:
660 667 try:
661 668 if type_option == 'dataset':
662 669 if select is None:
663 670 return 'ERROR:: "select" must not be "None"'
664 671 else:
665 672 if 'delete' == select:
666 673 return getattr(self.ckan.action, 'package_delete')(**kwargs)
667 674 elif 'purge' == select:
668 675 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
669 676 else:
670 677 return 'ERROR:: "select = %s" is not accepted' % (select)
671 678 elif type_option == 'project':
672 679 if select is None:
673 680 return 'ERROR:: "select" must not be "None"'
674 681 else:
675 682 if 'delete' == select:
676 683 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
677 684 elif 'purge' == select:
678 685 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
679 686 else:
680 687 return 'ERROR:: "select = %s" is not accepted' % (select)
681 688 elif type_option == 'resource':
682 689 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
683 690 elif type_option == 'vocabulary':
684 691 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
685 692 elif type_option == 'tag':
686 693 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
687 694 elif type_option == 'user':
688 695 return getattr(self.ckan.action, 'user_delete')(**kwargs)
689 696 else:
690 697 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
691 698 except:
692 699 _, exc_value, _ = sys.exc_info()
693 700 return exc_value
694 701 else:
695 702 return 'ERROR:: "type_option" must be <str>'
696 703
697 704 def f_status_note(self, total, result, path):
698 705 file_txt = open(path+'status_note.txt', 'w')
699 706 file_txt = open(path+'status_note.txt', 'a')
700 707
701 708 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
702 709 file_txt.write(''+ os.linesep)
703 710 for u in result['name']:
704 711 file_txt.write(' - '+ u + os.linesep)
705 712 file_txt.write(''+ os.linesep)
706 713
707 714 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
708 715 file_txt.write(''+ os.linesep)
709 716 if len(total['name'])-len(result['name']) != 0:
710 717 for u in total['name']:
711 718 if not u in result['name']:
712 719 file_txt.write(' - '+ u + os.linesep)
713 720 else:
714 721 file_txt.write(' "None"'+ os.linesep)
715 722
716 723 def f_name(self, name_dataset, ext, tempdir):
717 724 while self.check:
718 725 self.str = ''
719 726 if self.cont == 0:
720 727 if os.path.exists(tempdir + name_dataset + ext):
721 728 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
722 729 else:
723 730 self.check = self.check * 0
724 731 self.str = name_dataset + ext
725 732 else:
726 733 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
727 734 self.check = self.check * 0
728 735 self.str = name_dataset+'('+str(self.cont)+')'+ ext
729 736 self.cont = self.cont+1
730 737 return self.str
731 738
732 739 def f_zipdir(self, path, ziph, zip_name):
733 740 for root, _, files in os.walk(path):
734 741 print('.....')
735 742 print('Creating: "{}" >>'.format(zip_name))
736 743 for __file in tqdm(iterable=files, total=len(files)):
737 744 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
738 745 ziph.write(os.path.join(root, __file), new_dir)
739 746 print('Created >>')
740 747
741 748 def download_by_step(self, response, tempdir_name):
742 749 try:
743 750 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}) as resp:
744 751 if resp.status_code == 200:
745 752 with open(tempdir_name+response['name'], 'wb') as file:
746 753 for chunk in resp.iter_content(chunk_size = self.chunk_size):
747 754 if chunk:
748 755 file.write(chunk)
749 756 except requests.exceptions.RequestException:
750 757 pass
751 758
752 759 def download_files(self, **kwargs):
753 760 '''
754 761 FINALIDAD:
755 762 Funcion personalizada para la descarga de archivos existentes de un dataset.
756 763
757 764 PARAMETROS DISPONIBLES:
758 765 CONSULTAR: "GUIA DE SCRIPT.pdf"
759 766
760 767 ESTRUCTURA:
761 768 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
762 769 '''
763 770 dict_local = {}
764 771 #----------------------------------------------#
765 772 if 'zip' in kwargs:
766 773 if type(kwargs['zip']) is not bool:
767 774 return 'ERROR:: "zip" must be: <class "bool">'
768 775 else:
769 776 dict_local['zip'] = kwargs['zip']
770 777 else:
771 778 dict_local['zip'] = False
772 779 #----------------------------------------------#
773 780 if 'status_note' in kwargs:
774 781 if type(kwargs['status_note']) is not bool:
775 782 return 'ERROR:: "status_note" must be: <class "bool">'
776 783 else:
777 784 dict_local['status_note'] = kwargs['status_note']
778 785 else:
779 786 dict_local['status_note'] = False
780 787 #----------------------------------------------#
781 788 if 'path' in kwargs:
782 789 if type(kwargs['path']) is str:
783 790 if os.path.isdir(kwargs['path']) == False:
784 791 return 'ERROR:: "path" does not exist'
785 792 else:
786 793 if kwargs['path'][-1:] != self.separator:
787 794 dict_local['path'] = kwargs['path']+self.separator
788 795 else:
789 796 dict_local['path'] = kwargs['path']
790 797
791 798 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
792 799 if int(platform.python_version()[0]) == 3:
793 800 try:
794 801 file_txt = open(txt, 'w')
795 802 file_txt.close()
796 803 os.remove(txt)
797 804 except PermissionError:
798 805 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
799 806 else:
800 807 try:
801 808 file_txt = open(txt, 'w')
802 809 file_txt.close()
803 810 os.remove(txt)
804 811 except:
805 812 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
806 813 else:
807 814 return 'ERROR:: "path" must be: <class "str">'
808 815 else:
809 816 dict_local['path'] = ''
810 817 #----------------------------------------------#
811 818 for key, value in kwargs.items():
812 819 if not key in dict_local:
813 820 self.dict[key] = value
814 821 try:
815 822 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
816 823 except:
817 824 _, exc_value, _ = sys.exc_info()
818 825 return exc_value
819 826
820 827 if len(response) != 0:
821 828 #--------------TEMP PATH---------------#
822 829 if dict_local['zip']:
823 830 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
824 831 os.mkdir(tempdir+kwargs['id'])
825 832 dir_name = tempdir + kwargs['id'] + self.separator
826 833 else:
827 834 dir = self.f_name(kwargs['id'], '', dict_local['path'])
828 835 os.mkdir(dict_local['path'] + dir)
829 836 dir_name = dict_local['path'] + dir + self.separator
830 837 #-----------DOWNLOAD FILES-------------#
831 838 print('.....')
832 839 print('Downloading "{}" file(s) >>'.format(len(response)))
833 840 name_total = {'name': []}
834 841 with concurrent.futures.ThreadPoolExecutor() as executor:
835 842 for u in tqdm(iterable=response, total=len(response)):
836 843 name_total['name'].append(u['name'])
837 844 executor.submit(self.download_by_step, u, dir_name)
838 845 name_check = {}
839 846 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
840 847 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
841 848 #--------------------------------------#
842 849 if len(name_check['name']) != 0:
843 850 #----------Status Note---------#
844 851 if dict_local['status_note']:
845 852 print('.....')
846 853 print('Creating: "status_note.txt" >>')
847 854 self.f_status_note(name_total, name_check, dir_name)
848 855 print('Created>>')
849 856 #----------ZIP CREATE----------#
850 857 if dict_local['zip']:
851 858 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
852 859 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
853 860 self.f_zipdir(dir_name, ziph, zip_name)
854 861 ziph.close()
855 862 #Delete Temporal Path
856 863 if os.path.exists(tempdir[:-1]):
857 864 shutil.rmtree(tempdir[:-1])
858 865 #------------------------------#
859 866 print('.....')
860 867 return 'DOWNLOAD FINISHED'
861 868 else:
862 869 #Delete Temporal Path
863 870 if dict_local['zip']:
864 871 if os.path.exists(tempdir[:-1]):
865 872 shutil.rmtree(tempdir[:-1])
866 873 else:
867 874 if os.path.exists(dir_name[:-1]):
868 875 shutil.rmtree(dir_name[:-1])
869 876 return 'NO FILES WERE DOWNLOADED'
870 877 else:
871 878 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now