##// END OF EJS Templates
v2.9.2 :: Update 'upload_multiple_files_advance' api and Add views resource
eynilupu -
r4:27e924dd52b0
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,825 +1,871
1 1 from ckanapi import RemoteCKAN
2 from datetime import datetime
3 from tqdm import tqdm
2 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
3 5 import sys
4 6 import platform
5 7 import os
6 8 import tempfile
7 9 import shutil
8 10 import zipfile
9 11 import concurrent.futures
10 12 import requests
11 13 import json
12 14 import pathlib
13 15 import uuid
14 from datetime import datetime
15 from tqdm import tqdm
16 16
17 17 class JROAPI():
18 18 """
19 19 FINALIDAD:
20 20 Script para administrar y obtener la data del repositorio por medio de APIs.
21 21
22 22 REQUISITIOS PREVIOS:
23 23 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
24 24 - Paso 2: Instalar lo siguiente como admininstrador:
25 25 En Python 2
26 26 - pip install ckanapi==4.5
27 27 - pip install requests
28 - pip install pathlib
29 - pip install futures
28 30 - pip install tqdm
29 En Python 3
31 En Python > 3
30 32 - pip3 install ckanapi==4.5
31 33 - pip3 install requests
32 34 - pip3 install tqdm
33 35
34 36 FUNCIONES DISPONIBLES:
35 37 - action
36 38 - upload_file
37 39 - upload_multiple_files
38 40 - upload_multiple_files_advance
39 41 - show
40 42 - search
41 43 - create
42 44 - patch
43 45 - delete
44 46 - download_files
45 47
46 48 EJEMPLOS:
47 49 #1:
48 50 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
49 51 ... some operation(s) ...
50 52 #2:
51 53 <access_name> = JROAPI('http://example.com', Authorization='#########')
52 54 ... some operation(s) ...
53 55 <access_name>.ckan.close()
54 56
55 57 REPORTAR ALGUN PROBLEMA:
56 58 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
57 1) Identifiquese
58 2) Describir el problema
59 3) ¿En que funcion esta el problema?
60 4) ¿Que esperaba que hiciera la funcion sin el problema?
59 1) Correo para contactarlo
60 2) Descripcion del problema
61 3) ¿En que paso o seccion encontro el problema?
62 4) ¿Cual era el resultado que usted esperaba?
61 63 """
62 64 def __init__(self, url, Authorization=None):
63 65 ua = 'CKAN_JRO/1.1 (+'+str(url)+')'
64 66 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
65 67 self.ckan = RemoteCKAN(url, apikey=Authorization, user_agent=ua)
66 68 #self.ckan = RemoteCKAN(url, apikey=Authorization)
67 69 self.Authorization = Authorization
68 70 if platform.system() == 'Windows':
69 71 self.separator = '\\'
70 72 else:
71 73 self.separator = '/'
72 74
73 75 self.chunk_size = 1024
74 76 self.list = []
75 77 self.dict = {}
76 78 self.str = ''
77 79 self.check = 1
78 80 self.cont = 0
79 81
80 82 def __enter__(self):
81 83 return self
82 84
83 85 def __exit__(self, *args):
84 86 self.ckan.close()
85 87
86 88 def action(self, action, **kwargs):
87 89 """
88 90 FINALIDAD:
89 91 Funcion para llamar a las APIs disponibles
90 92
91 93 APIs DISPONIBLES:
92 94 CONSULTAR: "GUIA DE SCRIPT.pdf"
93 95
94 96 EJEMPLO:
95 97 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
96 98 """
97 99 #--------------- CASE: PACKAGE SEARCH ---------------#
98 100 if kwargs is not None:
99 101 if action == 'package_search':
100 102 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
101 103 for facet in self.list:
102 104 if facet in kwargs:
103 105 kwargs[facet.replace('_', '.')] = kwargs[facet]
104 106 kwargs.pop(facet)
105 107 #----------------------------------------------------#
106 108 try:
107 109 return getattr(self.ckan.action, action)(**kwargs)
108 110 except:
109 111 _, exc_value, _ = sys.exc_info()
110 112 return exc_value
111 113
112 114 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
115 # Agregar si es interruptido por teclado
113 116 '''
114 117 FINALIDAD:
115 118 Funcion para subir un unico archivo al repositorio del ROJ.
116 119
117 120 PARAMETROS DISPONIBLES:
118 121 CONSULTAR: "GUIA DE SCRIPT.pdf"
119 122
120 123 ESTRUCTURA:
121 124 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
122 125 '''
123 126 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
124 127 for key1, value1 in kwargs.items():
125 128 if not key1 in self.list:
126 129 self.dict[key1] = value1
127 130
128 131 #---------------------------#
129 132 if not 'others' in kwargs:
130 133 self.dict['others'] = ''
131 134 else:
132 135 if isinstance(kwargs['others'], list):
133 136 self.dict['others'] = json.dumps(kwargs['others'])
134 137 #---------------------------#
135 138
136 139 if not os.path.isfile(file_path):
137 140 return 'File "%s" not exist' % (file_path)
138 141
139 142 if not 'format' in self.dict:
140 143 self.str = ''.join(pathlib.Path(file_path).suffixes)
141 144 if len(self.str) > 0:
142 145 self.dict['format'] = self.str.upper()[1:]
143 146
144 147 try:
145 148 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=pathlib.Path(file_path).name, **self.dict)
146 149 except:
147 150 _, exc_value, _ = sys.exc_info()
148 151 return exc_value
149 152
150
151 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, **kwargs):
153 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, ignore_repetition=False, **kwargs):
154 # Agregar si es interruptido por teclado
152 155 '''
153 156 FINALIDAD:
154 157 Funcion para subir multiples archivos al repositorio del ROJ.
155 158
156 159 PARAMETROS DISPONIBLES:
157 160 CONSULTAR: "GUIA DE SCRIPT.pdf"
158 161
159 162 ESTRUCTURA:
160 163 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
161 164 '''
165 #-------------------------PACKAGE SHOW-----------------------#
166 try:
167 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
168 except:
169 _, exc_value, _ = sys.exc_info()
170 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
171 return exc_value
172 #------------------------------------------------------------#
173 resources_name = []
174 for u in dataset_show:
175 resources_name.append(u['name'].lower())
176 #------------------------------------------------------------#
162 177 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
163
164 178 for key1, value1 in kwargs.items():
165 179 if not key1 in self.list:
166 180 self.dict[key1] = value1
167
168 #---------------------------#
181 #------------------------------------------------------------#
169 182 if not 'others' in kwargs:
170 183 self.dict['others'] = ''
171 184 else:
172 185 if isinstance(kwargs['others'], list):
173 186 self.dict['others'] = json.dumps(kwargs['others'])
174 #---------------------------#
175
176 params_dict = {'upload':[], 'name':[]}
187 #------------------------------------------------------------#
188 total_list = []
177 189 #---------------CASO : "path" or "path_list"-----------------#
178 190 if type(path_files) is list:
179 191 if len(path_files) != 0:
180 192 path_files.sort()
181 193 for u in path_files:
182 194 if os.path.isfile(u):
183 params_dict['upload'].append(open(u, 'rb'))
184 params_dict['name'].append(pathlib.Path(u).name)
195 if pathlib.Path(u).name.lower() in resources_name:
196 if not ignore_repetition:
197 return 'ERROR:: "%s" file already exist in this dataset' % (pathlib.Path(u).name)
198 print('WARRING:: "'+ str(pathlib.Path(u).name) +'" file was ignored because already exist in this dataset')
199 else:
200 total_list.append({'name':pathlib.Path(u).name, 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
185 201 else:
186 202 return 'File "%s" does not exist' % (u)
187 203 else:
188 204 return 'ERROR:: "path_list is empty"'
205
189 206 elif type(path_files) is str:
190 207 if os.path.isdir(path_files):
191 208 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
192 209 path_order.sort()
193 210 if path_order:
194 211 for name in path_order:
195 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
196 params_dict['name'].append(name)
212 if name.lower() in resources_name:
213 if not ignore_repetition:
214 return 'ERROR:: "%s" file already exist in this dataset' % (name)
215 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
216 else:
217 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
197 218 else:
198 219 return "ERROR:: There aren't files in this directory"
199 220 else:
200 221 return 'ERROR:: Directory "%s" does not exist' % (path_files)
201 222 else:
202 223 return 'ERROR:: "path_files" must be a str or list'
203 224 #------------------------------------------------------------#
204 resource_extend = []
205 files_dict = {}
206 for count, name in enumerate(params_dict['name']):
207 param_list = {"name": name, "file_date": file_date, "voc_file_type": file_type}
208 param_list.update(self.dict)
209
210 if not 'format' in param_list:
211 format = ''.join(pathlib.Path(name).suffixes)
212 if len(format) > 0:
213 param_list['format'] = format.upper()[1:]
214
215 resource_extend.append(param_list)
216 files_dict['update__resources__-'+ str(len(params_dict['name'])-count) +'__upload'] = (name, params_dict['upload'][count])
217
218 #------------------------------------------------------------#
219 225 try:
220 226 uuid.UUID(str(dataset_id), version=4)
221 227 package_id_or_name = '"id": "' + str(dataset_id) + '"'
222 228 except ValueError:
223 229 package_id_or_name = '"name": "' + str(dataset_id) + '"'
224 230 #------------------------------------------------------------#
225 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
231 blocks = [[]]
232 size_file = 0
233 inter_num = 0
234 for value in total_list:
235 if value['size'] > 1048576 * float(max_size):
236 return 'ERROR:: The size of the "%s" file is %sMB, please change "max_size" value' % (value['name'], str(round(value['size']/1048576, 1)))
237 size_file = size_file + value['size']
238 if size_file <= 1048576 * float(max_size):
239 del value['size']
240 blocks[inter_num].append(value)
241 else:
242 inter_num = inter_num + 1
243 blocks.append([])
244 del value['size']
245 blocks[inter_num].append(value)
246 #------------------------------------------------------------#
247 if len(blocks[0]) > 0:
248 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
249 for count1, block in enumerate(blocks):
250 print('---- BLOCK N°{} ----'.format(count1 + 1))
251 resource_extend = []
252 files_dict = {}
253 for count2, value2 in enumerate(block):
254 value2['file_date'] = file_date
255 value2['voc_file_type'] = file_type
256 value2.update(self.dict)
257
258 if not 'format' in value2:
259 format = ''.join(pathlib.Path(value2['name']).suffixes)
260 if len(format) > 0:
261 value2['format'] = format.upper()[1:]
262
263 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
264 del value2['upload']
265 resource_extend.append(value2)
266
267 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
226 268 try:
227 269 result = self.ckan.call_action(
228 270 'package_revise',
229 271 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
230 272 files=files_dict
231 273 )
232 print('Uploaded file(s) successfully')
274 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
275 if len(blocks) == count1 + 1:
233 276 return result
234 277 except:
235 print('ERROR :: Use the "print" option for more information')
278 print('ERROR :: Use the "print" for more information')
236 279 _, exc_value, _ = sys.exc_info()
237 280 return exc_value
281 else:
282 return "ERROR:: No file(s) found to upload"
238 283
239 284 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, **kwargs):
285 # Agregar si es interruptido por teclado
240 286 '''
241 287 FINALIDAD:
242 288 Funcion para subir multiples archivos al repositorio del ROJ.
243 289
244 290 PARAMETROS DISPONIBLES:
245 291 CONSULTAR: "GUIA DE SCRIPT.pdf"
246 292
247 293 ESTRUCTURA:
248 294 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
249 295 '''
250 296
251 297 params_dict = {'upload':[], 'name':[]}
252 298 if not 'format' in kwargs:
253 299 params_dict.update({'format':[]})
254 300 #---------------CASO : "path" or "path_list"-----------------#
255 301 if type(path_files) is list:
256 302 if len(path_files) != 0:
257 303 path_files.sort()
258 304 for u in path_files:
259 305 if os.path.isfile(u):
260 306 params_dict['upload'].append(open(u, 'rb'))
261 307 params_dict['name'].append(pathlib.Path(u).name)
262 308 if not 'format' in kwargs:
263 309 format = ''.join(pathlib.Path(u).suffixes)
264 310 if len(format) > 0:
265 311 params_dict['format'].append(format.upper()[1:])
266 312 else:
267 313 params_dict['format'].append('')
268 314 else:
269 315 return 'File "%s" does not exist' % (u)
270 316 else:
271 317 return 'ERROR:: "path_list is empty"'
272 318 elif type(path_files) is str:
273 319 if os.path.isdir(path_files):
274 320 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
275 321 path_order.sort()
276 322 if path_order:
277 323 for name in path_order:
278 324 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
279 325 params_dict['name'].append(name)
280 326 if not 'format' in kwargs:
281 327 format = ''.join(pathlib.Path(name).suffixes)
282 328 if len(format) > 0:
283 329 params_dict['format'].append(format.upper()[1:])
284 330 else:
285 331 params_dict['format'].append('')
286 332 else:
287 333 return "ERROR:: There aren't files in this directory"
288 334 else:
289 335 return 'ERROR:: Directory "%s" does not exist' % (path_files)
290 336 else:
291 337 return 'ERROR:: "path_files" must be a str or list'
292 338 #------------------------------------------------------------#
293 339 params_no_dict = {'package_id': dataset_id}
294 340 if type(date_files) is list:
295 341 params_dict['file_date'] = date_files
296 342 else:
297 343 params_no_dict['file_date'] = date_files
298 344
299 345 if type(type_files) is list:
300 346 params_dict['voc_file_type'] = type_files
301 347 else:
302 348 params_no_dict['voc_file_type'] = type_files
303 349
304 350 for key1, value1 in kwargs.items():
305 351 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
306 352 if type(value1) is list:
307 353 params_dict[key1] = value1
308 354 else:
309 355 params_no_dict[key1] = value1
310 356 #------------------------------------------#
311 357 if not 'others' in kwargs:
312 358 params_no_dict['others'] = ''
313 359 else:
314 360 if isinstance(kwargs['others'], tuple):
315 361 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
316 362 elif isinstance(kwargs['others'], list):
317 363 params_no_dict['others'] = json.dumps(kwargs['others'])
318 364 elif isinstance(kwargs['others'], str):
319 365 params_no_dict['others'] = kwargs['others']
320 366 else:
321 367 return 'ERROR:: "others" must be a tuple, list or str'
322 368 #------------------------------------------#
323 369 len_params_dict = []
324 370 for value2 in params_dict.values():
325 371 len_params_dict.append(len(value2))
326 372
327 373 if len(list(set(len_params_dict))) > 1:
328 374 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
329 375 #------------------------------------------------------------#
330 376 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
331 377 for v in range(len(params_dict['name'])):
332 378 try:
333 379 send = {}
334 380 for key_dict, value_dict in params_dict.items():
335 381 send[key_dict] = value_dict[v]
336 382 for key_no_dict, value_no_dict in params_no_dict.items():
337 383 send[key_no_dict] = value_no_dict
338 384
339 385 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
340 386 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
341 387 except:
342 388 _, exc_value, _ = sys.exc_info()
343 389 self.list.append(exc_value)
344 390 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
345 391 return self.list
346 392 #------------------------------------------------------------#
347 393
348 394 def show(self, type_option, id, **kwargs):
349 395 '''
350 396 FINALIDAD:
351 397 Funcion personalizada para una busqueda en especifico.
352 398
353 399 PARAMETROS DISPONIBLES:
354 400 CONSULTAR: "GUIA DE SCRIPT.pdf"
355 401
356 402 ESTRUCTURA:
357 403 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
358 404 '''
359 405 if type(type_option) is str:
360 406 try:
361 407 if type_option == 'dataset':
362 408 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
363 409 elif type_option == 'resource':
364 410 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
365 411 elif type_option == 'project':
366 412 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
367 413 elif type_option == 'collaborator':
368 414 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
369 415 elif type_option == 'member':
370 416 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
371 417 elif type_option == 'vocabulary':
372 418 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
373 419 elif type_option == 'tag':
374 420 if not 'vocabulary_id' in kwargs:
375 421 print('Missing "vocabulary_id" value: assume it is a free tag')
376 422 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
377 423 elif type_option == 'user':
378 424 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
379 425 elif type_option == 'job':
380 426 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
381 427 else:
382 428 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
383 429 except:
384 430 _, exc_value, _ = sys.exc_info()
385 431 return exc_value
386 432 else:
387 433 return 'ERROR:: "type_option" must be a str'
388 434
389 435 def search(self, type_option, query=None, **kwargs):
390 436 '''
391 437 FINALIDAD:
392 438 Funcion personalizada para busquedas que satisfagan algun criterio.
393 439
394 440 PARAMETROS DISPONIBLES:
395 441 CONSULTAR: "GUIA DE SCRIPT.pdf"
396 442
397 443 ESTRUCTURA:
398 444 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
399 445 '''
400 446 if type(type_option) is str:
401 447 try:
402 448 if type_option == 'dataset':
403 449 key_replace = ['fq', 'fq_list', 'include_private']
404 450 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
405 451 for key1, value1 in kwargs.items():
406 452 if not key1 in key_replace:
407 453 if key1 in key_point:
408 454 self.dict[key1.replace('_', '.')] = value1
409 455 else:
410 456 self.dict[key1] = value1
411 457
412 458 if query is not None:
413 459 if type(query) is dict:
414 460 self.dict['fq_list'] = []
415 461 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
416 462 #----------------------------------------------------#
417 463 if 'dataset_start_date' in query:
418 464 if type(query['dataset_start_date']) is str:
419 465 try:
420 466 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
421 467 if len(query['dataset_start_date']) != 10:
422 468 return '"dataset_start_date", must be: <YYYY-MM-DD>'
423 469 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
424 470 self.list.append('dataset_start_date')
425 471 except:
426 472 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
427 473 else:
428 474 return '"dataset_start_date" must be <str>'
429 475 #----------------------------------------------------#
430 476 if 'dataset_end_date' in query:
431 477 if type(query['dataset_end_date']) is str:
432 478 try:
433 479 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
434 480 if len(query['dataset_end_date']) != 10:
435 481 return '"dataset_end_date", must be: <YYYY-MM-DD>'
436 482
437 483 if 'dataset_start_date' in query:
438 484 if query['dataset_start_date'] > query['dataset_end_date']:
439 485 return '"dataset_end_date" must be greater than "dataset_start_date"'
440 486
441 487 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
442 488 self.list.append('dataset_end_date')
443 489 except:
444 490 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
445 491 else:
446 492 return '"dataset_end_date" must be <str>'
447 493 #----------------------------------------------------#
448 494 for key, value in query.items():
449 495 if value is not None and not key in self.list:
450 496 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
451 497 else:
452 498 return '"query" must be <dict>'
453 499
454 500 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
455 501
456 502 elif type_option == 'resource':
457 503 for key1, value1 in kwargs.items():
458 504 if key1 != 'fields':
459 505 self.dict[key1] = value1
460 506
461 507 if query is not None:
462 508 if type(query) is dict:
463 509 #----------------------------------------------------#
464 510 if 'file_date_min' in query:
465 511 if type(query['file_date_min']) is str:
466 512 try:
467 513 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
468 514 if len(query['file_date_min']) != 10:
469 515 return '"file_date_min", must be: <YYYY-MM-DD>'
470 516 except:
471 517 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
472 518 else:
473 519 return '"file_date_min" must be <str>'
474 520 #----------------------------------------------------#
475 521 if 'file_date_max' in query:
476 522 if type(query['file_date_max']) is str:
477 523 try:
478 524 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
479 525 if len(query['file_date_max']) != 10:
480 526 return '"file_date_max", must be: <YYYY-MM-DD>'
481 527
482 528 if 'file_date_min' in query:
483 529 if query['file_date_min'] > query['file_date_max']:
484 530 return '"file_date_max" must be greater than "file_date_min"'
485 531 except:
486 532 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
487 533 else:
488 534 return '"file_date_max" must be <str>'
489 535 #----------------------------------------------------#
490 536 self.dict['query'] = query
491 537 else:
492 538 return '"query" must be <dict>'
493 539 return getattr(self.ckan.action, 'resources_search')(**self.dict)
494 540
495 541 elif type_option == 'tag':
496 542 for key1, value1 in kwargs.items():
497 543 if key1 != 'fields':
498 544 self.dict[key1] = value1
499 545
500 546 if not 'vocabulary_id' in kwargs:
501 547 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
502 548 else:
503 549 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
504 550
505 551 if query is not None:
506 552 if type(query) is dict:
507 553 if 'search' in query:
508 554 if type(query['search']) is list or type(query['search']) is str:
509 555 self.dict['query'] = query['search']
510 556 else:
511 557 return '"search" must be <list> or <str>'
512 558 else:
513 559 return '"query" must be <dict>'
514 560 return getattr(self.ckan.action, 'tag_search')(**self.dict)
515 561
516 562 else:
517 563 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
518 564
519 565 except:
520 566 _, exc_value, _ = sys.exc_info()
521 567 return exc_value
522 568 else:
523 569 return 'ERROR:: "type_option" must be <str>'
524 570
525 571 def create(self, type_option, select=None, **kwargs):
526 572 '''
527 573 FINALIDAD:
528 574 Funcion personalizada para crear.
529 575
530 576 PARAMETROS DISPONIBLES:
531 577 CONSULTAR: "GUIA DE SCRIPT.pdf"
532 578
533 579 ESTRUCTURA:
534 580 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
535 581 '''
536 582 if type(type_option) is str:
537 583 try:
538 584 if type_option == 'dataset':
539 585 return getattr(self.ckan.action, 'package_create')(**kwargs)
540 586 elif type_option == 'project':
541 587 return getattr(self.ckan.action, 'organization_create')(**kwargs)
542 588 elif type_option == 'member':
543 589 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
544 590 elif type_option == 'collaborator':
545 591 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
546 592 elif type_option == 'vocabulary':
547 593 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
548 594 elif type_option == 'tag':
549 595 return getattr(self.ckan.action, 'tag_create')(**kwargs)
550 596 elif type_option == 'user':
551 597 return getattr(self.ckan.action, 'user_create')(**kwargs)
552 598 elif type_option == 'views':
553 599 if 'resource' == select:
554 600 self.list = ['package']
555 601 for key1, value1 in kwargs.items():
556 602 if not key1 in self.list:
557 603 self.dict[key1] = value1
558 604 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
559 605 elif 'dataset' == select:
560 606 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
561 607 else:
562 608 return 'ERROR:: "select = %s" is not accepted' % (select)
563 609 else:
564 610 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
565 611 except:
566 612 _, exc_value, _ = sys.exc_info()
567 613 return exc_value
568 614 else:
569 615 return 'ERROR:: "type_option" must be <str>'
570 616
571 617 def patch(self, type_option, **kwargs):
572 618 '''
573 619 FINALIDAD:
574 620 Funciones personalizadas para actualizar
575 621
576 622 PARAMETROS DISPONIBLES:
577 623 CONSULTAR: "GUIA DE SCRIPT.pdf"
578 624
579 625 ESTRUCTURA:
580 626 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
581 627 '''
582 628 if type(type_option) is str:
583 629 try:
584 630 if type_option == 'dataset':
585 631 return getattr(self.ckan.action, 'package_patch')(**kwargs)
586 632 elif type_option == 'project':
587 633 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
588 634 elif type_option == 'resource':
589 635 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
590 636 elif type_option == 'member':
591 637 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
592 638 elif type_option == 'collaborator':
593 639 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
594 640 else:
595 641 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
596 642 except:
597 643 _, exc_value, _ = sys.exc_info()
598 644 return exc_value
599 645 else:
600 646 return 'ERROR:: "type_option" must be <str>'
601 647
602 648 def delete(self, type_option, select=None, **kwargs):
603 649 '''
604 650 FINALIDAD:
605 651 Función personalizada para eliminar y/o purgar.
606 652
607 653 PARAMETROS DISPONIBLES:
608 654 CONSULTAR: "GUIA DE SCRIPT.pdf"
609 655
610 656 ESTRUCTURA:
611 657 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
612 658 '''
613 659 if type(type_option) is str:
614 660 try:
615 661 if type_option == 'dataset':
616 662 if select is None:
617 663 return 'ERROR:: "select" must not be "None"'
618 664 else:
619 665 if 'delete' == select:
620 666 return getattr(self.ckan.action, 'package_delete')(**kwargs)
621 667 elif 'purge' == select:
622 668 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
623 669 else:
624 670 return 'ERROR:: "select = %s" is not accepted' % (select)
625 671 elif type_option == 'project':
626 672 if select is None:
627 673 return 'ERROR:: "select" must not be "None"'
628 674 else:
629 675 if 'delete' == select:
630 676 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
631 677 elif 'purge' == select:
632 678 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
633 679 else:
634 680 return 'ERROR:: "select = %s" is not accepted' % (select)
635 681 elif type_option == 'resource':
636 682 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
637 683 elif type_option == 'vocabulary':
638 684 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
639 685 elif type_option == 'tag':
640 686 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
641 687 elif type_option == 'user':
642 688 return getattr(self.ckan.action, 'user_delete')(**kwargs)
643 689 else:
644 690 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
645 691 except:
646 692 _, exc_value, _ = sys.exc_info()
647 693 return exc_value
648 694 else:
649 695 return 'ERROR:: "type_option" must be <str>'
650 696
651 697 def f_status_note(self, total, result, path):
652 698 file_txt = open(path+'status_note.txt', 'w')
653 699 file_txt = open(path+'status_note.txt', 'a')
654 700
655 701 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
656 702 file_txt.write(''+ os.linesep)
657 703 for u in result['name']:
658 704 file_txt.write(' - '+ u + os.linesep)
659 705 file_txt.write(''+ os.linesep)
660 706
661 707 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
662 708 file_txt.write(''+ os.linesep)
663 709 if len(total['name'])-len(result['name']) != 0:
664 710 for u in total['name']:
665 711 if not u in result['name']:
666 712 file_txt.write(' - '+ u + os.linesep)
667 713 else:
668 714 file_txt.write(' "None"'+ os.linesep)
669 715
670 716 def f_name(self, name_dataset, ext, tempdir):
671 717 while self.check:
672 718 self.str = ''
673 719 if self.cont == 0:
674 720 if os.path.exists(tempdir + name_dataset + ext):
675 721 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
676 722 else:
677 723 self.check = self.check * 0
678 724 self.str = name_dataset + ext
679 725 else:
680 726 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
681 727 self.check = self.check * 0
682 728 self.str = name_dataset+'('+str(self.cont)+')'+ ext
683 729 self.cont = self.cont+1
684 730 return self.str
685 731
686 732 def f_zipdir(self, path, ziph, zip_name):
687 733 for root, _, files in os.walk(path):
688 734 print('.....')
689 735 print('Creating: "{}" >>'.format(zip_name))
690 736 for __file in tqdm(iterable=files, total=len(files)):
691 737 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
692 738 ziph.write(os.path.join(root, __file), new_dir)
693 739 print('Created >>')
694 740
695 741 def download_by_step(self, response, tempdir_name):
696 742 try:
697 743 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}) as resp:
698 744 if resp.status_code == 200:
699 745 with open(tempdir_name+response['name'], 'wb') as file:
700 746 for chunk in resp.iter_content(chunk_size = self.chunk_size):
701 747 if chunk:
702 748 file.write(chunk)
703 749 except requests.exceptions.RequestException:
704 750 pass
705 751
706 752 def download_files(self, **kwargs):
707 753 '''
708 754 FINALIDAD:
709 755 Funcion personalizada para la descarga de archivos existentes de un dataset.
710 756
711 757 PARAMETROS DISPONIBLES:
712 758 CONSULTAR: "GUIA DE SCRIPT.pdf"
713 759
714 760 ESTRUCTURA:
715 761 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
716 762 '''
717 763 dict_local = {}
718 764 #----------------------------------------------#
719 765 if 'zip' in kwargs:
720 766 if type(kwargs['zip']) is not bool:
721 767 return 'ERROR:: "zip" must be: <class "bool">'
722 768 else:
723 769 dict_local['zip'] = kwargs['zip']
724 770 else:
725 771 dict_local['zip'] = False
726 772 #----------------------------------------------#
727 773 if 'status_note' in kwargs:
728 774 if type(kwargs['status_note']) is not bool:
729 775 return 'ERROR:: "status_note" must be: <class "bool">'
730 776 else:
731 777 dict_local['status_note'] = kwargs['status_note']
732 778 else:
733 779 dict_local['status_note'] = False
734 780 #----------------------------------------------#
735 781 if 'path' in kwargs:
736 782 if type(kwargs['path']) is str:
737 783 if os.path.isdir(kwargs['path']) == False:
738 784 return 'ERROR:: "path" does not exist'
739 785 else:
740 786 if kwargs['path'][-1:] != self.separator:
741 787 dict_local['path'] = kwargs['path']+self.separator
742 788 else:
743 789 dict_local['path'] = kwargs['path']
744 790
745 791 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
746 792 if int(platform.python_version()[0]) == 3:
747 793 try:
748 794 file_txt = open(txt, 'w')
749 795 file_txt.close()
750 796 os.remove(txt)
751 797 except PermissionError:
752 798 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
753 799 else:
754 800 try:
755 801 file_txt = open(txt, 'w')
756 802 file_txt.close()
757 803 os.remove(txt)
758 804 except:
759 805 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
760 806 else:
761 807 return 'ERROR:: "path" must be: <class "str">'
762 808 else:
763 809 dict_local['path'] = ''
764 810 #----------------------------------------------#
765 811 for key, value in kwargs.items():
766 812 if not key in dict_local:
767 813 self.dict[key] = value
768 814 try:
769 815 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
770 816 except:
771 817 _, exc_value, _ = sys.exc_info()
772 818 return exc_value
773 819
774 820 if len(response) != 0:
775 821 #--------------TEMP PATH---------------#
776 822 if dict_local['zip']:
777 823 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
778 824 os.mkdir(tempdir+kwargs['id'])
779 825 dir_name = tempdir + kwargs['id'] + self.separator
780 826 else:
781 827 dir = self.f_name(kwargs['id'], '', dict_local['path'])
782 828 os.mkdir(dict_local['path'] + dir)
783 829 dir_name = dict_local['path'] + dir + self.separator
784 830 #-----------DOWNLOAD FILES-------------#
785 831 print('.....')
786 832 print('Downloading "{}" file(s) >>'.format(len(response)))
787 833 name_total = {'name': []}
788 834 with concurrent.futures.ThreadPoolExecutor() as executor:
789 835 for u in tqdm(iterable=response, total=len(response)):
790 836 name_total['name'].append(u['name'])
791 837 executor.submit(self.download_by_step, u, dir_name)
792 838 name_check = {}
793 839 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
794 840 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
795 841 #--------------------------------------#
796 842 if len(name_check['name']) != 0:
797 843 #----------Status Note---------#
798 844 if dict_local['status_note']:
799 845 print('.....')
800 846 print('Creating: "status_note.txt" >>')
801 847 self.f_status_note(name_total, name_check, dir_name)
802 848 print('Created>>')
803 849 #----------ZIP CREATE----------#
804 850 if dict_local['zip']:
805 851 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
806 852 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
807 853 self.f_zipdir(dir_name, ziph, zip_name)
808 854 ziph.close()
809 855 #Delete Temporal Path
810 856 if os.path.exists(tempdir[:-1]):
811 857 shutil.rmtree(tempdir[:-1])
812 858 #------------------------------#
813 859 print('.....')
814 860 return 'DOWNLOAD FINISHED'
815 861 else:
816 862 #Delete Temporal Path
817 863 if dict_local['zip']:
818 864 if os.path.exists(tempdir[:-1]):
819 865 shutil.rmtree(tempdir[:-1])
820 866 else:
821 867 if os.path.exists(dir_name[:-1]):
822 868 shutil.rmtree(dir_name[:-1])
823 869 return 'NO FILES WERE DOWNLOADED'
824 870 else:
825 871 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now