##// END OF EJS Templates
v2.9.2 :: Add 'validator name' upload file and multiple_file
eynilupu -
r8:19d5924fa8b4
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,878 +1,916
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 5 import sys
6 6 import platform
7 7 import os
8 8 import tempfile
9 9 import shutil
10 10 import zipfile
11 11 import concurrent.futures
12 12 import requests
13 13 import json
14 14 #import pathlib
15 15 import uuid
16 16
17 17 class JROAPI():
18 18 """
19 19 FINALIDAD:
20 20 Script para administrar y obtener la data del repositorio por medio de APIs.
21 21
22 22 REQUISITIOS PREVIOS:
23 23 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
24 24 - Paso 2: Instalar lo siguiente como admininstrador:
25 25 En Python 2
26 26 - pip install ckanapi==4.5
27 27 - pip install requests
28 28 - pip install futures
29 29 - pip install tqdm
30 30 En Python > 3
31 31 - pip3 install ckanapi==4.5
32 32 - pip3 install requests
33 33 - pip3 install tqdm
34 34
35 35 FUNCIONES DISPONIBLES:
36 36 - action
37 37 - upload_file
38 38 - upload_multiple_files
39 39 - upload_multiple_files_advance
40 40 - show
41 41 - search
42 42 - create
43 43 - patch
44 44 - delete
45 45 - download_files
46 46
47 47 EJEMPLOS:
48 48 #1:
49 49 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
50 50 ... some operation(s) ...
51 51 #2:
52 52 <access_name> = JROAPI('http://example.com', Authorization='#########')
53 53 ... some operation(s) ...
54 54 <access_name>.ckan.close()
55 55
56 56 REPORTAR ALGUN PROBLEMA:
57 57 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
58 58 1) Correo para contactarlo
59 59 2) Descripcion del problema
60 60 3) ¿En que paso o seccion encontro el problema?
61 61 4) ¿Cual era el resultado que usted esperaba?
62 62 """
63 63 def __init__(self, url, Authorization=None):
64 64 ua = 'CKAN_JRO/1.1 (+'+str(url)+')'
65 65 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
66 66 self.ckan = RemoteCKAN(url, apikey=Authorization, user_agent=ua)
67 67 #self.ckan = RemoteCKAN(url, apikey=Authorization)
68 68 self.Authorization = Authorization
69 69 # Change for --> self.separator = os.sep
70 70 if platform.system() == 'Windows':
71 71 self.separator = '\\'
72 72 else:
73 73 self.separator = '/'
74 74
75 75 self.chunk_size = 1024
76 76 self.list = []
77 77 self.dict = {}
78 78 self.str = ''
79 79 self.check = 1
80 80 self.cont = 0
81 81
82 82 def __enter__(self):
83 83 return self
84 84
85 85 def __exit__(self, *args):
86 86 self.ckan.close()
87 87
88 88 def action(self, action, **kwargs):
89 89 """
90 90 FINALIDAD:
91 91 Funcion para llamar a las APIs disponibles
92 92
93 93 APIs DISPONIBLES:
94 94 CONSULTAR: "GUIA DE SCRIPT.pdf"
95 95
96 96 EJEMPLO:
97 97 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
98 98 """
99 99 #--------------- CASE: PACKAGE SEARCH ---------------#
100 100 if kwargs is not None:
101 101 if action == 'package_search':
102 102 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
103 103 for facet in self.list:
104 104 if facet in kwargs:
105 105 kwargs[facet.replace('_', '.')] = kwargs[facet]
106 106 kwargs.pop(facet)
107 107 #----------------------------------------------------#
108 108 try:
109 109 return getattr(self.ckan.action, action)(**kwargs)
110 110 except:
111 111 _, exc_value, _ = sys.exc_info()
112 112 return exc_value
113 113
114 114 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
115 115 # Agregar si es interruptido por teclado
116 116 '''
117 117 FINALIDAD:
118 118 Funcion para subir un unico archivo al repositorio del ROJ.
119 119
120 120 PARAMETROS DISPONIBLES:
121 121 CONSULTAR: "GUIA DE SCRIPT.pdf"
122 122
123 123 ESTRUCTURA:
124 124 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
125 125 '''
126 126 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
127 127 for key1, value1 in kwargs.items():
128 128 if not key1 in self.list:
129 129 self.dict[key1] = value1
130 130
131 131 #---------------------------#
132 132 if not 'others' in kwargs:
133 133 self.dict['others'] = ''
134 134 else:
135 135 if isinstance(kwargs['others'], list):
136 136 self.dict['others'] = json.dumps(kwargs['others'])
137 137 #---------------------------#
138 138
139 139 if not os.path.isfile(file_path):
140 140 return 'File "%s" not exist' % (file_path)
141 141
142 142 #if not 'format' in self.dict:
143 143 # self.str = ''.join(pathlib.Path(file_path).suffixes)
144 144 # if len(self.str) > 0:
145 145 # self.dict['format'] = self.str.upper()[1:]
146 146
147 #-------------------------PACKAGE SHOW-----------------------#
148 try:
149 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
150 except:
151 _, exc_value, _ = sys.exc_info()
152 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
153 return exc_value
154
155 resources_name = []
156 for u in dataset_show:
157 resources_name.append(u['name'].lower())
158
159 if os.path.basename(file_path).lower() in resources_name:
160 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(file_path))
161 #------------------------------------------------------------#
162
147 163 try:
148 164 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
149 165 except:
150 166 _, exc_value, _ = sys.exc_info()
151 167 return exc_value
152 168
153 169 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
154 170 # Agregar si es interruptido por teclado
155 171 '''
156 172 FINALIDAD:
157 173 Funcion para subir multiples archivos al repositorio del ROJ.
158 174
159 175 PARAMETROS DISPONIBLES:
160 176 CONSULTAR: "GUIA DE SCRIPT.pdf"
161 177
162 178 ESTRUCTURA:
163 179 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
164 180 '''
165 181 #-------------------------PACKAGE SHOW-----------------------#
166 182 try:
167 183 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
168 184 except:
169 185 _, exc_value, _ = sys.exc_info()
170 186 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
171 187 return exc_value
172 188 #------------------------------------------------------------#
173 189 resources_name = []
174 190 for u in dataset_show:
175 191 resources_name.append(u['name'].lower())
176 192 #------------------------------------------------------------#
177 193 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
178 194 for key1, value1 in kwargs.items():
179 195 if not key1 in self.list:
180 196 self.dict[key1] = value1
181 197 #------------------------------------------------------------#
182 198 if not 'others' in kwargs:
183 199 self.dict['others'] = ''
184 200 else:
185 201 if isinstance(kwargs['others'], list):
186 202 self.dict['others'] = json.dumps(kwargs['others'])
187 203 #------------------------------------------------------------#
188 204 total_list = []
189 205 #---------------CASO : "path" or "path_list"-----------------#
190 206 if type(path_files) is list:
191 207 if len(path_files) != 0:
192 208 path_files.sort()
193 209 for u in path_files:
194 210 if os.path.isfile(u):
195 211 if os.path.basename(u).lower() in resources_name:
196 212 if not ignore_repetition:
197 213 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
198 214 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
199 215 else:
200 216 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
201 217 else:
202 218 return 'File "%s" does not exist' % (u)
203 219 else:
204 220 return 'ERROR:: "path_list is empty"'
205 221
206 222 elif type(path_files) is str:
207 223 if os.path.isdir(path_files):
208 224 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
209 225 path_order.sort()
210 226 if path_order:
211 227 for name in path_order:
212 228 if name.lower() in resources_name:
213 229 if not ignore_repetition:
214 230 return 'ERROR:: "%s" file already exist in this dataset' % (name)
215 231 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
216 232 else:
217 233 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
218 234 else:
219 235 return "ERROR:: There aren't files in this directory"
220 236 else:
221 237 return 'ERROR:: Directory "%s" does not exist' % (path_files)
222 238 else:
223 239 return 'ERROR:: "path_files" must be a str or list'
224 240 #------------------------------------------------------------#
225 241 try:
226 242 uuid.UUID(str(dataset_id), version=4)
227 243 package_id_or_name = '"id": "' + str(dataset_id) + '"'
228 244 except ValueError:
229 245 package_id_or_name = '"name": "' + str(dataset_id) + '"'
230 246 #------------------------------------------------------------#
231 247 blocks = [[]]
232 248 size_file = 0
233 249 count_file = 0
234 250 inter_num = 0
235 251 for value in total_list:
236 252 if value['size'] > 1024 * 1024 * float(max_size):
237 253 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
238 254 if not 1 <= int(max_count) <= 999:
239 255 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
240 256
241 257 size_file = size_file + value['size']
242 258 count_file = count_file + 1
243 259 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
244 260 del value['size']
245 261 blocks[inter_num].append(value)
246 262 else:
247 263 inter_num = inter_num + 1
248 264 size_file = value['size']
249 265 count_file = 1
250 266 blocks.append([])
251 267 del value['size']
252 268 blocks[inter_num].append(value)
253 269 #------------------------------------------------------------#
254 270 if len(blocks[0]) > 0:
255 271 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
256 272 for count1, block in enumerate(blocks):
257 273 print('---- BLOCK N°{} ----'.format(count1 + 1))
258 274 resource_extend = []
259 275 files_dict = {}
260 276 for count2, value2 in enumerate(block):
261 277 value2['file_date'] = file_date
262 278 value2['voc_file_type'] = file_type
263 279 value2.update(self.dict)
264 280
265 281 #if not 'format' in value2:
266 282 # format = ''.join(pathlib.Path(value2['name']).suffixes)
267 283 # if len(format) > 0:
268 284 # value2['format'] = format.upper()[1:]
269 285
270 286 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
271 287 del value2['upload']
272 288 resource_extend.append(value2)
273 289
274 290 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
275 291 try:
276 292 result = self.ckan.call_action(
277 293 'package_revise',
278 294 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
279 295 files=files_dict
280 296 )
281 297 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
282 298 if len(blocks) == count1 + 1:
283 299 return result
284 300 except:
285 301 print('ERROR :: Use the "print" for more information')
286 302 _, exc_value, _ = sys.exc_info()
287 303 return exc_value
288 304 else:
289 305 return "ERROR:: No file(s) found to upload"
290 306
291 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, **kwargs):
307 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
292 308 # Agregar si es interruptido por teclado
293 309 '''
294 310 FINALIDAD:
295 311 Funcion para subir multiples archivos al repositorio del ROJ.
296 312
297 313 PARAMETROS DISPONIBLES:
298 314 CONSULTAR: "GUIA DE SCRIPT.pdf"
299 315
300 316 ESTRUCTURA:
301 317 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
302 318 '''
319 #-------------------------PACKAGE SHOW-----------------------#
320 try:
321 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
322 except:
323 _, exc_value, _ = sys.exc_info()
324 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
325 return exc_value
326 #------------------------------------------------------------#
327 resources_name = []
328 for u in dataset_show:
329 resources_name.append(u['name'].lower())
330 #------------------------------------------------------------#
303 331
304 332 params_dict = {'upload':[], 'name':[]}
305 333 #if not 'format' in kwargs:
306 334 # params_dict.update({'format':[]})
307 335 #---------------CASO : "path" or "path_list"-----------------#
308 336 if type(path_files) is list:
309 337 if len(path_files) != 0:
310 338 path_files.sort()
311 339 for u in path_files:
312 340 if os.path.isfile(u):
313 params_dict['upload'].append(open(u, 'rb'))
314 params_dict['name'].append(os.path.basename(u))
315 #if not 'format' in kwargs:
316 # format = ''.join(pathlib.Path(u).suffixes)
317 # if len(format) > 0:
318 # params_dict['format'].append(format.upper()[1:])
319 # else:
320 # params_dict['format'].append('')
341 if os.path.basename(u).lower() in resources_name:
342 if not ignore_repetition:
343 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
344 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
345 else:
346 params_dict['upload'].append(open(u, 'rb'))
347 params_dict['name'].append(os.path.basename(u))
348 #if not 'format' in kwargs:
349 # format = ''.join(pathlib.Path(u).suffixes)
350 # if len(format) > 0:
351 # params_dict['format'].append(format.upper()[1:])
352 # else:
353 # params_dict['format'].append('')
321 354 else:
322 355 return 'File "%s" does not exist' % (u)
323 356 else:
324 357 return 'ERROR:: "path_list is empty"'
325 358 elif type(path_files) is str:
326 359 if os.path.isdir(path_files):
327 360 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
328 361 path_order.sort()
329 362 if path_order:
330 363 for name in path_order:
331 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
332 params_dict['name'].append(name)
333 #if not 'format' in kwargs:
334 # format = ''.join(pathlib.Path(name).suffixes)
335 # if len(format) > 0:
336 # params_dict['format'].append(format.upper()[1:])
337 # else:
338 # params_dict['format'].append('')
364 if name.lower() in resources_name:
365 if not ignore_repetition:
366 return 'ERROR:: "%s" file already exist in this dataset' % (name)
367 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
368 else:
369 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
370 params_dict['name'].append(name)
371 #if not 'format' in kwargs:
372 # format = ''.join(pathlib.Path(name).suffixes)
373 # if len(format) > 0:
374 # params_dict['format'].append(format.upper()[1:])
375 # else:
376 # params_dict['format'].append('')
339 377 else:
340 378 return "ERROR:: There aren't files in this directory"
341 379 else:
342 380 return 'ERROR:: Directory "%s" does not exist' % (path_files)
343 381 else:
344 382 return 'ERROR:: "path_files" must be a str or list'
345 383 #------------------------------------------------------------#
346 384 params_no_dict = {'package_id': dataset_id}
347 385 if type(date_files) is list:
348 386 params_dict['file_date'] = date_files
349 387 else:
350 388 params_no_dict['file_date'] = date_files
351 389
352 390 if type(type_files) is list:
353 391 params_dict['voc_file_type'] = type_files
354 392 else:
355 393 params_no_dict['voc_file_type'] = type_files
356 394
357 395 for key1, value1 in kwargs.items():
358 396 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
359 397 if type(value1) is list:
360 398 params_dict[key1] = value1
361 399 else:
362 400 params_no_dict[key1] = value1
363 401 #------------------------------------------#
364 402 if not 'others' in kwargs:
365 403 params_no_dict['others'] = ''
366 404 else:
367 405 if isinstance(kwargs['others'], tuple):
368 406 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
369 407 elif isinstance(kwargs['others'], list):
370 408 params_no_dict['others'] = json.dumps(kwargs['others'])
371 409 elif isinstance(kwargs['others'], str):
372 410 params_no_dict['others'] = kwargs['others']
373 411 else:
374 412 return 'ERROR:: "others" must be a tuple, list or str'
375 413 #------------------------------------------#
376 414 len_params_dict = []
377 415 for value2 in params_dict.values():
378 416 len_params_dict.append(len(value2))
379 417
380 418 if len(list(set(len_params_dict))) > 1:
381 419 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
382 420 #------------------------------------------------------------#
383 421 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
384 422 for v in range(len(params_dict['name'])):
385 423 try:
386 424 send = {}
387 425 for key_dict, value_dict in params_dict.items():
388 426 send[key_dict] = value_dict[v]
389 427 for key_no_dict, value_no_dict in params_no_dict.items():
390 428 send[key_no_dict] = value_no_dict
391 429
392 430 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
393 431 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
394 432 except:
395 433 _, exc_value, _ = sys.exc_info()
396 434 self.list.append(exc_value)
397 435 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
398 436 return self.list
399 437 #------------------------------------------------------------#
400 438
401 439 def show(self, type_option, id, **kwargs):
402 440 '''
403 441 FINALIDAD:
404 442 Funcion personalizada para una busqueda en especifico.
405 443
406 444 PARAMETROS DISPONIBLES:
407 445 CONSULTAR: "GUIA DE SCRIPT.pdf"
408 446
409 447 ESTRUCTURA:
410 448 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
411 449 '''
412 450 if type(type_option) is str:
413 451 try:
414 452 if type_option == 'dataset':
415 453 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
416 454 elif type_option == 'resource':
417 455 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
418 456 elif type_option == 'project':
419 457 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
420 458 elif type_option == 'collaborator':
421 459 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
422 460 elif type_option == 'member':
423 461 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
424 462 elif type_option == 'vocabulary':
425 463 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
426 464 elif type_option == 'tag':
427 465 if not 'vocabulary_id' in kwargs:
428 466 print('Missing "vocabulary_id" value: assume it is a free tag')
429 467 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
430 468 elif type_option == 'user':
431 469 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
432 470 elif type_option == 'job':
433 471 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
434 472 else:
435 473 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
436 474 except:
437 475 _, exc_value, _ = sys.exc_info()
438 476 return exc_value
439 477 else:
440 478 return 'ERROR:: "type_option" must be a str'
441 479
442 480 def search(self, type_option, query=None, **kwargs):
443 481 '''
444 482 FINALIDAD:
445 483 Funcion personalizada para busquedas que satisfagan algun criterio.
446 484
447 485 PARAMETROS DISPONIBLES:
448 486 CONSULTAR: "GUIA DE SCRIPT.pdf"
449 487
450 488 ESTRUCTURA:
451 489 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
452 490 '''
453 491 if type(type_option) is str:
454 492 try:
455 493 if type_option == 'dataset':
456 494 key_replace = ['fq', 'fq_list', 'include_private']
457 495 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
458 496 for key1, value1 in kwargs.items():
459 497 if not key1 in key_replace:
460 498 if key1 in key_point:
461 499 self.dict[key1.replace('_', '.')] = value1
462 500 else:
463 501 self.dict[key1] = value1
464 502
465 503 if query is not None:
466 504 if type(query) is dict:
467 505 self.dict['fq_list'] = []
468 506 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
469 507 #----------------------------------------------------#
470 508 if 'dataset_start_date' in query:
471 509 if type(query['dataset_start_date']) is str:
472 510 try:
473 511 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
474 512 if len(query['dataset_start_date']) != 10:
475 513 return '"dataset_start_date", must be: <YYYY-MM-DD>'
476 514 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
477 515 self.list.append('dataset_start_date')
478 516 except:
479 517 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
480 518 else:
481 519 return '"dataset_start_date" must be <str>'
482 520 #----------------------------------------------------#
483 521 if 'dataset_end_date' in query:
484 522 if type(query['dataset_end_date']) is str:
485 523 try:
486 524 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
487 525 if len(query['dataset_end_date']) != 10:
488 526 return '"dataset_end_date", must be: <YYYY-MM-DD>'
489 527
490 528 if 'dataset_start_date' in query:
491 529 if query['dataset_start_date'] > query['dataset_end_date']:
492 530 return '"dataset_end_date" must be greater than "dataset_start_date"'
493 531
494 532 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
495 533 self.list.append('dataset_end_date')
496 534 except:
497 535 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
498 536 else:
499 537 return '"dataset_end_date" must be <str>'
500 538 #----------------------------------------------------#
501 539 for key, value in query.items():
502 540 if value is not None and not key in self.list:
503 541 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
504 542 else:
505 543 return '"query" must be <dict>'
506 544
507 545 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
508 546
509 547 elif type_option == 'resource':
510 548 for key1, value1 in kwargs.items():
511 549 if key1 != 'fields':
512 550 self.dict[key1] = value1
513 551
514 552 if query is not None:
515 553 if type(query) is dict:
516 554 #----------------------------------------------------#
517 555 if 'file_date_min' in query:
518 556 if type(query['file_date_min']) is str:
519 557 try:
520 558 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
521 559 if len(query['file_date_min']) != 10:
522 560 return '"file_date_min", must be: <YYYY-MM-DD>'
523 561 except:
524 562 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
525 563 else:
526 564 return '"file_date_min" must be <str>'
527 565 #----------------------------------------------------#
528 566 if 'file_date_max' in query:
529 567 if type(query['file_date_max']) is str:
530 568 try:
531 569 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
532 570 if len(query['file_date_max']) != 10:
533 571 return '"file_date_max", must be: <YYYY-MM-DD>'
534 572
535 573 if 'file_date_min' in query:
536 574 if query['file_date_min'] > query['file_date_max']:
537 575 return '"file_date_max" must be greater than "file_date_min"'
538 576 except:
539 577 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
540 578 else:
541 579 return '"file_date_max" must be <str>'
542 580 #----------------------------------------------------#
543 581 self.dict['query'] = query
544 582 else:
545 583 return '"query" must be <dict>'
546 584 return getattr(self.ckan.action, 'resources_search')(**self.dict)
547 585
548 586 elif type_option == 'tag':
549 587 for key1, value1 in kwargs.items():
550 588 if key1 != 'fields':
551 589 self.dict[key1] = value1
552 590
553 591 if not 'vocabulary_id' in kwargs:
554 592 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
555 593 else:
556 594 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
557 595
558 596 if query is not None:
559 597 if type(query) is dict:
560 598 if 'search' in query:
561 599 if type(query['search']) is list or type(query['search']) is str:
562 600 self.dict['query'] = query['search']
563 601 else:
564 602 return '"search" must be <list> or <str>'
565 603 else:
566 604 return '"query" must be <dict>'
567 605 return getattr(self.ckan.action, 'tag_search')(**self.dict)
568 606
569 607 else:
570 608 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
571 609
572 610 except:
573 611 _, exc_value, _ = sys.exc_info()
574 612 return exc_value
575 613 else:
576 614 return 'ERROR:: "type_option" must be <str>'
577 615
578 616 def create(self, type_option, select=None, **kwargs):
579 617 '''
580 618 FINALIDAD:
581 619 Funcion personalizada para crear.
582 620
583 621 PARAMETROS DISPONIBLES:
584 622 CONSULTAR: "GUIA DE SCRIPT.pdf"
585 623
586 624 ESTRUCTURA:
587 625 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
588 626 '''
589 627 if type(type_option) is str:
590 628 try:
591 629 if type_option == 'dataset':
592 630 return getattr(self.ckan.action, 'package_create')(**kwargs)
593 631 elif type_option == 'project':
594 632 return getattr(self.ckan.action, 'organization_create')(**kwargs)
595 633 elif type_option == 'member':
596 634 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
597 635 elif type_option == 'collaborator':
598 636 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
599 637 elif type_option == 'vocabulary':
600 638 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
601 639 elif type_option == 'tag':
602 640 return getattr(self.ckan.action, 'tag_create')(**kwargs)
603 641 elif type_option == 'user':
604 642 return getattr(self.ckan.action, 'user_create')(**kwargs)
605 643 elif type_option == 'views':
606 644 if 'resource' == select:
607 645 self.list = ['package']
608 646 for key1, value1 in kwargs.items():
609 647 if not key1 in self.list:
610 648 self.dict[key1] = value1
611 649 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
612 650 elif 'dataset' == select:
613 651 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
614 652 else:
615 653 return 'ERROR:: "select = %s" is not accepted' % (select)
616 654 else:
617 655 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
618 656 except:
619 657 _, exc_value, _ = sys.exc_info()
620 658 return exc_value
621 659 else:
622 660 return 'ERROR:: "type_option" must be <str>'
623 661
624 662 def patch(self, type_option, **kwargs):
625 663 '''
626 664 FINALIDAD:
627 665 Funciones personalizadas para actualizar
628 666
629 667 PARAMETROS DISPONIBLES:
630 668 CONSULTAR: "GUIA DE SCRIPT.pdf"
631 669
632 670 ESTRUCTURA:
633 671 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
634 672 '''
635 673 if type(type_option) is str:
636 674 try:
637 675 if type_option == 'dataset':
638 676 return getattr(self.ckan.action, 'package_patch')(**kwargs)
639 677 elif type_option == 'project':
640 678 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
641 679 elif type_option == 'resource':
642 680 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
643 681 elif type_option == 'member':
644 682 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
645 683 elif type_option == 'collaborator':
646 684 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
647 685 else:
648 686 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
649 687 except:
650 688 _, exc_value, _ = sys.exc_info()
651 689 return exc_value
652 690 else:
653 691 return 'ERROR:: "type_option" must be <str>'
654 692
655 693 def delete(self, type_option, select=None, **kwargs):
656 694 '''
657 695 FINALIDAD:
658 696 Función personalizada para eliminar y/o purgar.
659 697
660 698 PARAMETROS DISPONIBLES:
661 699 CONSULTAR: "GUIA DE SCRIPT.pdf"
662 700
663 701 ESTRUCTURA:
664 702 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
665 703 '''
666 704 if type(type_option) is str:
667 705 try:
668 706 if type_option == 'dataset':
669 707 if select is None:
670 708 return 'ERROR:: "select" must not be "None"'
671 709 else:
672 710 if 'delete' == select:
673 711 return getattr(self.ckan.action, 'package_delete')(**kwargs)
674 712 elif 'purge' == select:
675 713 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
676 714 else:
677 715 return 'ERROR:: "select = %s" is not accepted' % (select)
678 716 elif type_option == 'project':
679 717 if select is None:
680 718 return 'ERROR:: "select" must not be "None"'
681 719 else:
682 720 if 'delete' == select:
683 721 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
684 722 elif 'purge' == select:
685 723 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
686 724 else:
687 725 return 'ERROR:: "select = %s" is not accepted' % (select)
688 726 elif type_option == 'resource':
689 727 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
690 728 elif type_option == 'vocabulary':
691 729 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
692 730 elif type_option == 'tag':
693 731 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
694 732 elif type_option == 'user':
695 733 return getattr(self.ckan.action, 'user_delete')(**kwargs)
696 734 else:
697 735 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
698 736 except:
699 737 _, exc_value, _ = sys.exc_info()
700 738 return exc_value
701 739 else:
702 740 return 'ERROR:: "type_option" must be <str>'
703 741
704 742 def f_status_note(self, total, result, path):
705 743 file_txt = open(path+'status_note.txt', 'w')
706 744 file_txt = open(path+'status_note.txt', 'a')
707 745
708 746 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
709 747 file_txt.write(''+ os.linesep)
710 748 for u in result['name']:
711 749 file_txt.write(' - '+ u + os.linesep)
712 750 file_txt.write(''+ os.linesep)
713 751
714 752 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
715 753 file_txt.write(''+ os.linesep)
716 754 if len(total['name'])-len(result['name']) != 0:
717 755 for u in total['name']:
718 756 if not u in result['name']:
719 757 file_txt.write(' - '+ u + os.linesep)
720 758 else:
721 759 file_txt.write(' "None"'+ os.linesep)
722 760
723 761 def f_name(self, name_dataset, ext, tempdir):
724 762 while self.check:
725 763 self.str = ''
726 764 if self.cont == 0:
727 765 if os.path.exists(tempdir + name_dataset + ext):
728 766 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
729 767 else:
730 768 self.check = self.check * 0
731 769 self.str = name_dataset + ext
732 770 else:
733 771 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
734 772 self.check = self.check * 0
735 773 self.str = name_dataset+'('+str(self.cont)+')'+ ext
736 774 self.cont = self.cont+1
737 775 return self.str
738 776
739 777 def f_zipdir(self, path, ziph, zip_name):
740 778 for root, _, files in os.walk(path):
741 779 print('.....')
742 780 print('Creating: "{}" >>'.format(zip_name))
743 781 for __file in tqdm(iterable=files, total=len(files)):
744 782 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
745 783 ziph.write(os.path.join(root, __file), new_dir)
746 784 print('Created >>')
747 785
748 786 def download_by_step(self, response, tempdir_name):
749 787 try:
750 788 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}) as resp:
751 789 if resp.status_code == 200:
752 790 with open(tempdir_name+response['name'], 'wb') as file:
753 791 for chunk in resp.iter_content(chunk_size = self.chunk_size):
754 792 if chunk:
755 793 file.write(chunk)
756 794 except requests.exceptions.RequestException:
757 795 pass
758 796
759 797 def download_files(self, **kwargs):
760 798 '''
761 799 FINALIDAD:
762 800 Funcion personalizada para la descarga de archivos existentes de un dataset.
763 801
764 802 PARAMETROS DISPONIBLES:
765 803 CONSULTAR: "GUIA DE SCRIPT.pdf"
766 804
767 805 ESTRUCTURA:
768 806 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
769 807 '''
770 808 dict_local = {}
771 809 #----------------------------------------------#
772 810 if 'zip' in kwargs:
773 811 if type(kwargs['zip']) is not bool:
774 812 return 'ERROR:: "zip" must be: <class "bool">'
775 813 else:
776 814 dict_local['zip'] = kwargs['zip']
777 815 else:
778 816 dict_local['zip'] = False
779 817 #----------------------------------------------#
780 818 if 'status_note' in kwargs:
781 819 if type(kwargs['status_note']) is not bool:
782 820 return 'ERROR:: "status_note" must be: <class "bool">'
783 821 else:
784 822 dict_local['status_note'] = kwargs['status_note']
785 823 else:
786 824 dict_local['status_note'] = False
787 825 #----------------------------------------------#
788 826 if 'path' in kwargs:
789 827 if type(kwargs['path']) is str:
790 828 if os.path.isdir(kwargs['path']) == False:
791 829 return 'ERROR:: "path" does not exist'
792 830 else:
793 831 if kwargs['path'][-1:] != self.separator:
794 832 dict_local['path'] = kwargs['path']+self.separator
795 833 else:
796 834 dict_local['path'] = kwargs['path']
797 835
798 836 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
799 837 if int(platform.python_version()[0]) == 3:
800 838 try:
801 839 file_txt = open(txt, 'w')
802 840 file_txt.close()
803 841 os.remove(txt)
804 842 except PermissionError:
805 843 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
806 844 else:
807 845 try:
808 846 file_txt = open(txt, 'w')
809 847 file_txt.close()
810 848 os.remove(txt)
811 849 except:
812 850 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
813 851 else:
814 852 return 'ERROR:: "path" must be: <class "str">'
815 853 else:
816 854 dict_local['path'] = ''
817 855 #----------------------------------------------#
818 856 for key, value in kwargs.items():
819 857 if not key in dict_local:
820 858 self.dict[key] = value
821 859 try:
822 860 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
823 861 except:
824 862 _, exc_value, _ = sys.exc_info()
825 863 return exc_value
826 864
827 865 if len(response) != 0:
828 866 #--------------TEMP PATH---------------#
829 867 if dict_local['zip']:
830 868 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
831 869 os.mkdir(tempdir+kwargs['id'])
832 870 dir_name = tempdir + kwargs['id'] + self.separator
833 871 else:
834 872 dir = self.f_name(kwargs['id'], '', dict_local['path'])
835 873 os.mkdir(dict_local['path'] + dir)
836 874 dir_name = dict_local['path'] + dir + self.separator
837 875 #-----------DOWNLOAD FILES-------------#
838 876 print('.....')
839 877 print('Downloading "{}" file(s) >>'.format(len(response)))
840 878 name_total = {'name': []}
841 879 with concurrent.futures.ThreadPoolExecutor() as executor:
842 880 for u in tqdm(iterable=response, total=len(response)):
843 881 name_total['name'].append(u['name'])
844 882 executor.submit(self.download_by_step, u, dir_name)
845 883 name_check = {}
846 884 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
847 885 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
848 886 #--------------------------------------#
849 887 if len(name_check['name']) != 0:
850 888 #----------Status Note---------#
851 889 if dict_local['status_note']:
852 890 print('.....')
853 891 print('Creating: "status_note.txt" >>')
854 892 self.f_status_note(name_total, name_check, dir_name)
855 893 print('Created>>')
856 894 #----------ZIP CREATE----------#
857 895 if dict_local['zip']:
858 896 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
859 897 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
860 898 self.f_zipdir(dir_name, ziph, zip_name)
861 899 ziph.close()
862 900 #Delete Temporal Path
863 901 if os.path.exists(tempdir[:-1]):
864 902 shutil.rmtree(tempdir[:-1])
865 903 #------------------------------#
866 904 print('.....')
867 905 return 'DOWNLOAD FINISHED'
868 906 else:
869 907 #Delete Temporal Path
870 908 if dict_local['zip']:
871 909 if os.path.exists(tempdir[:-1]):
872 910 shutil.rmtree(tempdir[:-1])
873 911 else:
874 912 if os.path.exists(dir_name[:-1]):
875 913 shutil.rmtree(dir_name[:-1])
876 914 return 'NO FILES WERE DOWNLOADED'
877 915 else:
878 916 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now