##// END OF EJS Templates
v2.9.2 :: Delete 'old functions' create and download resources
eynilupu -
r18:4695b8d73d71
parent child
Show More
1 NO CONTENT: modified file chmod 100755 => 100644, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
This diff has been collapsed as it changes many lines, (556 lines changed) Show them Hide them
@@ -1,1044 +1,500
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 from tqdm import tqdm
4 3 from CKAN_JRO import logic_download
5 4 from CKAN_JRO import resource
6 5 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
7 6 import sys
8 7 import platform
9 8 import os
10 import tempfile
11 import shutil
12 import zipfile
13 import concurrent.futures
14 9 import requests
15 import json
16 #import pathlib
17 import uuid
18
19 if sys.version_info.major == 3:
20 from urllib.parse import urlparse
21 else:
22 import urlparse
23 10
24 11 class JROAPI():
25 12 """
26 13 FINALIDAD:
27 14 Script para administrar y obtener la data del repositorio por medio de APIs.
28 15
29 16 REQUISITIOS PREVIOS:
30 17 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
31 - Paso 2: Instalar lo siguiente como admininstrador:
32 En Python 2
33 - pip install ckanapi==4.5
34 - pip install requests
35 - pip install futures
36 - pip install tqdm
37 En Python > 3
38 - pip3 install ckanapi==4.5
39 - pip3 install requests
40 - pip3 install tqdm
18 - Paso 2: Instalar los siguientes paquetes:
19 ckanapi==4.7
20 requests
41 21
42 22 FUNCIONES DISPONIBLES:
43 23 - action
44 - upload_file
45 - upload_multiple_files
46 - upload_multiple_files_advance
47 24 - show
48 25 - search
49 26 - create
50 27 - patch
51 28 - delete
52 29 - download_files
53 30
54 31 EJEMPLOS:
55 32 #1:
56 33 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
57 34 ... some operation(s) ...
58 35 #2:
59 36 <access_name> = JROAPI('http://example.com', Authorization='#########')
60 37 ... some operation(s) ...
61 38 <access_name>.ckan.close()
62 39
63 40 REPORTAR ALGUN PROBLEMA:
64 41 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
65 42 1) Correo para contactarlo
66 43 2) Descripcion del problema
67 44 3) ¿En que paso o seccion encontro el problema?
68 45 4) ¿Cual era el resultado que usted esperaba?
69 46 """
70 47 def __init__(self, url, Authorization=None, secure=True):
71 48 #-------- Check Secure -------#
72 49 self.verify = secure
73 50 if not secure and isinstance(secure, bool):
74 51 session = requests.Session()
75 52 session.verify = False
76 53 else:
77 54 session = None
78 55 #------------------------------#
79 56 self.url = url
80 57 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
81 58 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
82 59 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
83 60 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
84 61 self.Authorization = Authorization
85 62 # Change for --> self.separator = os.sep
86 63 if platform.system() == 'Windows':
87 64 self.separator = '\\'
88 65 else:
89 66 self.separator = '/'
90 67
91 68 self.chunk_size = 1024
92 69 self.list = []
93 70 self.dict = {}
94 71 self.str = ''
95 72 self.check = 1
96 73 self.cont = 0
97 74
98 75 def __enter__(self):
99 76 return self
100 77
101 78 def __exit__(self, *args):
102 79 self.ckan.close()
103 80
104 81 def action(self, action, **kwargs):
105 82 """
106 83 FINALIDAD:
107 84 Funcion para llamar a las APIs disponibles
108 85
109 86 APIs DISPONIBLES:
110 87 CONSULTAR: "GUIA DE SCRIPT.pdf"
111 88
112 89 EJEMPLO:
113 90 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
114 91 """
115 92 #--------------- CASE: PACKAGE SEARCH ---------------#
116 93 if kwargs is not None:
117 94 if action == 'package_search':
118 95 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
119 96 for facet in self.list:
120 97 if facet in kwargs:
121 98 kwargs[facet.replace('_', '.')] = kwargs[facet]
122 99 kwargs.pop(facet)
123 100 #----------------------------------------------------#
124 101 try:
125 102 return getattr(self.ckan.action, action)(**kwargs)
126 103 except:
127 104 _, exc_value, _ = sys.exc_info()
128 105 return exc_value
129
130 def upload_file(self, dataset_id, file_date, file_type, file_path=False, url_or_path=False, ignore_repetition=False, **kwargs):
131 # Agregar si es interruptido por teclado
132 '''
133 FINALIDAD:
134 Funcion para crear un unico recurso (puede incluir un archivo asociado) al repositorio del ROJ.
135
136 PARAMETROS DISPONIBLES:
137 CONSULTAR: "GUIA DE SCRIPT.pdf"
138
139 ESTRUCTURA:
140 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_type = <class 'str'>, file_path = <class 'str'>, url_or_path = <class 'str'>, param_1 = <class 'param_1'>, ...)
141 '''
142 #self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
143 self.list = ['package_id', 'upload', 'voc_file_type'] #file_date
144 for key1, value1 in kwargs.items():
145 if not key1 in self.list:
146 self.dict[key1] = value1
147
148 #---------------------------#
149 if not 'others' in kwargs:
150 self.dict['others'] = ''
151 else:
152 if isinstance(kwargs['others'], list):
153 self.dict['others'] = json.dumps(kwargs['others'])
154 #---------------------------#
155
156 if isinstance(file_path, str) and isinstance(url_or_path, str):
157 return 'ERROR:: Choose one: "file_path" or "url_or_path" parameters'
158
159 if isinstance(file_path, str):
160 if not os.path.isfile(file_path):
161 return 'File "%s" not exist' % (file_path)
162
163 self.dict['upload'] = open(file_path, 'rb')
164 self.dict['name'] = os.path.basename(file_path)
165 elif isinstance(url_or_path, str):
166 self.dict['url'] = url_or_path
167 if not 'name' in self.dict:
168 self.dict['name'] = os.path.basename(url_or_path)
169 else:
170 return 'ERROR: Verify "file_path" or "url_or_path" parameters: <class "str"> or choose one'
171
172 #if not 'format' in self.dict:
173 # self.str = ''.join(pathlib.Path(file_path).suffixes)
174 # if len(self.str) > 0:
175 # self.dict['format'] = self.str.upper()[1:]
176
177 #-------------------------PACKAGE SHOW-----------------------#
178 try:
179 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
180 except:
181 _, exc_value, _ = sys.exc_info()
182 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
183 return exc_value
184
185 resources_name = []
186 for u in dataset_show:
187 resources_name.append(u['name'].lower())
188
189 if self.dict['name'].lower() in resources_name:
190 if not ignore_repetition:
191 return 'ERROR:: "%s" resource already exist in this dataset' % (self.dict['name'])
192 print('WARRING:: "'+ str(self.dict['name']) +'" resource already exist in this dataset')
193 #------------------------------------------------------------#
194 try:
195 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, voc_file_type=file_type, **self.dict)
196 except:
197 _, exc_value, _ = sys.exc_info()
198 return exc_value
199
200 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
201 # Agregar si es interruptido por teclado
202 '''
203 FINALIDAD:
204 Funcion para subir multiples archivos al repositorio del ROJ.
205
206 PARAMETROS DISPONIBLES:
207 CONSULTAR: "GUIA DE SCRIPT.pdf"
208
209 ESTRUCTURA:
210 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
211 '''
212 #-------------------------PACKAGE SHOW-----------------------#
213 try:
214 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
215 except:
216 _, exc_value, _ = sys.exc_info()
217 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
218 return exc_value
219 #------------------------------------------------------------#
220 resources_name = []
221 for u in dataset_show:
222 resources_name.append(u['name'].lower())
223 #------------------------------------------------------------#
224 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
225 for key1, value1 in kwargs.items():
226 if not key1 in self.list:
227 self.dict[key1] = value1
228 #------------------------------------------------------------#
229 if not 'others' in kwargs:
230 self.dict['others'] = ''
231 else:
232 if isinstance(kwargs['others'], list):
233 self.dict['others'] = json.dumps(kwargs['others'])
234 #------------------------------------------------------------#
235 total_list = []
236 #---------------CASO : "path" or "path_list"-----------------#
237 if type(path_files) is list:
238 if len(path_files) != 0:
239 path_files.sort()
240 for u in path_files:
241 if os.path.isfile(u):
242 if os.path.basename(u).lower() in resources_name:
243 if not ignore_repetition:
244 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
245 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
246 else:
247 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
248 else:
249 return 'File "%s" does not exist' % (u)
250 else:
251 return 'ERROR:: "path_list is empty"'
252
253 elif type(path_files) is str:
254 if os.path.isdir(path_files):
255 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
256 path_order.sort()
257 if path_order:
258 for name in path_order:
259 if name.lower() in resources_name:
260 if not ignore_repetition:
261 return 'ERROR:: "%s" file already exist in this dataset' % (name)
262 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
263 else:
264 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
265 else:
266 return "ERROR:: There aren't files in this directory"
267 else:
268 return 'ERROR:: Directory "%s" does not exist' % (path_files)
269 else:
270 return 'ERROR:: "path_files" must be a str or list'
271 #------------------------------------------------------------#
272 try:
273 uuid.UUID(str(dataset_id), version=4)
274 package_id_or_name = '"id": "' + str(dataset_id) + '"'
275 except ValueError:
276 package_id_or_name = '"name": "' + str(dataset_id) + '"'
277 #------------------------------------------------------------#
278 blocks = [[]]
279 size_file = 0
280 count_file = 0
281 inter_num = 0
282 for value in total_list:
283 if value['size'] > 1024 * 1024 * float(max_size):
284 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
285 if not 1 <= int(max_count) <= 999:
286 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
287
288 size_file = size_file + value['size']
289 count_file = count_file + 1
290 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
291 del value['size']
292 blocks[inter_num].append(value)
293 else:
294 inter_num = inter_num + 1
295 size_file = value['size']
296 count_file = 1
297 blocks.append([])
298 del value['size']
299 blocks[inter_num].append(value)
300 #------------------------------------------------------------#
301 if len(blocks[0]) > 0:
302 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
303 for count1, block in enumerate(blocks):
304 print('---- BLOCK N°{} ----'.format(count1 + 1))
305 resource_extend = []
306 files_dict = {}
307 for count2, value2 in enumerate(block):
308 value2['file_date'] = file_date
309 value2['voc_file_type'] = file_type
310 value2.update(self.dict)
311
312 #if not 'format' in value2:
313 # format = ''.join(pathlib.Path(value2['name']).suffixes)
314 # if len(format) > 0:
315 # value2['format'] = format.upper()[1:]
316
317 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
318 del value2['upload']
319 resource_extend.append(value2)
320
321 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
322 try:
323 result = self.ckan.call_action(
324 'package_revise',
325 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
326 files=files_dict
327 )
328 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
329 if len(blocks) == count1 + 1:
330 return result
331 except:
332 print('ERROR :: Use the "print" for more information')
333 _, exc_value, _ = sys.exc_info()
334 return exc_value
335 else:
336 return "ERROR:: No file(s) found to upload"
337
338 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
339 # Agregar si es interruptido por teclado
340 '''
341 FINALIDAD:
342 Funcion para subir multiples archivos al repositorio del ROJ.
343
344 PARAMETROS DISPONIBLES:
345 CONSULTAR: "GUIA DE SCRIPT.pdf"
346
347 ESTRUCTURA:
348 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
349 '''
350 #-------------------------PACKAGE SHOW-----------------------#
351 try:
352 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
353 except:
354 _, exc_value, _ = sys.exc_info()
355 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
356 return exc_value
357 #------------------------------------------------------------#
358 resources_name = []
359 for u in dataset_show:
360 resources_name.append(u['name'].lower())
361 #------------------------------------------------------------#
362
363 params_dict = {'upload':[], 'name':[]}
364 #if not 'format' in kwargs:
365 # params_dict.update({'format':[]})
366 #---------------CASO : "path" or "path_list"-----------------#
367 if type(path_files) is list:
368 if len(path_files) != 0:
369 path_files.sort()
370 for u in path_files:
371 if os.path.isfile(u):
372 if os.path.basename(u).lower() in resources_name:
373 if not ignore_repetition:
374 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
375 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
376 else:
377 params_dict['upload'].append(open(u, 'rb'))
378 params_dict['name'].append(os.path.basename(u))
379 #if not 'format' in kwargs:
380 # format = ''.join(pathlib.Path(u).suffixes)
381 # if len(format) > 0:
382 # params_dict['format'].append(format.upper()[1:])
383 # else:
384 # params_dict['format'].append('')
385 else:
386 return 'File "%s" does not exist' % (u)
387 else:
388 return 'ERROR:: "path_list is empty"'
389 elif type(path_files) is str:
390 if os.path.isdir(path_files):
391 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
392 path_order.sort()
393 if path_order:
394 for name in path_order:
395 if name.lower() in resources_name:
396 if not ignore_repetition:
397 return 'ERROR:: "%s" file already exist in this dataset' % (name)
398 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
399 else:
400 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
401 params_dict['name'].append(name)
402 #if not 'format' in kwargs:
403 # format = ''.join(pathlib.Path(name).suffixes)
404 # if len(format) > 0:
405 # params_dict['format'].append(format.upper()[1:])
406 # else:
407 # params_dict['format'].append('')
408 else:
409 return "ERROR:: There aren't files in this directory"
410 else:
411 return 'ERROR:: Directory "%s" does not exist' % (path_files)
412 else:
413 return 'ERROR:: "path_files" must be a str or list'
414 #------------------------------------------------------------#
415 params_no_dict = {'package_id': dataset_id}
416 if type(date_files) is list:
417 params_dict['file_date'] = date_files
418 else:
419 params_no_dict['file_date'] = date_files
420
421 if type(type_files) is list:
422 params_dict['voc_file_type'] = type_files
423 else:
424 params_no_dict['voc_file_type'] = type_files
425
426 for key1, value1 in kwargs.items():
427 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
428 if type(value1) is list:
429 params_dict[key1] = value1
430 else:
431 params_no_dict[key1] = value1
432 #------------------------------------------#
433 if not 'others' in kwargs:
434 params_no_dict['others'] = ''
435 else:
436 if isinstance(kwargs['others'], tuple):
437 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
438 elif isinstance(kwargs['others'], list):
439 params_no_dict['others'] = json.dumps(kwargs['others'])
440 elif isinstance(kwargs['others'], str):
441 params_no_dict['others'] = kwargs['others']
442 else:
443 return 'ERROR:: "others" must be a tuple, list or str'
444 #------------------------------------------#
445 len_params_dict = []
446 for value2 in params_dict.values():
447 len_params_dict.append(len(value2))
448
449 if len(list(set(len_params_dict))) > 1:
450 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
451 #------------------------------------------------------------#
452 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
453 for v in range(len(params_dict['name'])):
454 try:
455 send = {}
456 for key_dict, value_dict in params_dict.items():
457 send[key_dict] = value_dict[v]
458 for key_no_dict, value_no_dict in params_no_dict.items():
459 send[key_no_dict] = value_no_dict
460
461 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
462 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
463 except:
464 _, exc_value, _ = sys.exc_info()
465 self.list.append(exc_value)
466 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
467 return self.list
468 #------------------------------------------------------------#
469
106
470 107 def show(self, type_option, id, **kwargs):
471 108 '''
472 109 FINALIDAD:
473 110 Funcion personalizada para una busqueda en especifico.
474 111
475 112 PARAMETROS DISPONIBLES:
476 113 CONSULTAR: "GUIA DE SCRIPT.pdf"
477 114
478 115 ESTRUCTURA:
479 116 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
480 117 '''
481 118 if type(type_option) is str:
482 119 try:
483 120 if type_option == 'dataset':
484 121 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
485 122 elif type_option == 'resource':
486 123 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
487 124 elif type_option == 'project':
488 125 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
489 126 elif type_option == 'collaborator':
490 127 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
491 128 elif type_option == 'member':
492 129 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
493 130 elif type_option == 'vocabulary':
494 131 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
495 132 elif type_option == 'tag':
496 133 if not 'vocabulary_id' in kwargs:
497 134 print('Missing "vocabulary_id" value: assume it is a free tag')
498 135 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
499 136 elif type_option == 'user':
500 137 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
501 138 elif type_option == 'job':
502 139 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
503 140 else:
504 141 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
505 142 except:
506 143 _, exc_value, _ = sys.exc_info()
507 144 return exc_value
508 145 else:
509 146 return 'ERROR:: "type_option" must be a str'
510 147
511 148 def search(self, type_option, query=None, **kwargs):
512 149 '''
513 150 FINALIDAD:
514 151 Funcion personalizada para busquedas que satisfagan algun criterio.
515 152
516 153 PARAMETROS DISPONIBLES:
517 154 CONSULTAR: "GUIA DE SCRIPT.pdf"
518 155
519 156 ESTRUCTURA:
520 157 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
521 158 '''
522 159 if type(type_option) is str:
523 160 try:
524 161 if type_option == 'dataset':
525 162 key_replace = ['fq', 'fq_list', 'include_private']
526 163 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
527 164 for key1, value1 in kwargs.items():
528 165 if not key1 in key_replace:
529 166 if key1 in key_point:
530 167 self.dict[key1.replace('_', '.')] = value1
531 168 else:
532 169 self.dict[key1] = value1
533 170
534 171 if query is not None:
535 172 if type(query) is dict:
536 173 self.dict['fq_list'] = []
537 174 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
538 175 #----------------------------------------------------#
539 176 if 'dataset_start_date' in query:
540 177 if type(query['dataset_start_date']) is str:
541 178 try:
542 179 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
543 180 if len(query['dataset_start_date']) != 10:
544 181 return '"dataset_start_date", must be: <YYYY-MM-DD>'
545 182 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
546 183 self.list.append('dataset_start_date')
547 184 except:
548 185 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
549 186 else:
550 187 return '"dataset_start_date" must be <str>'
551 188 #----------------------------------------------------#
552 189 if 'dataset_end_date' in query:
553 190 if type(query['dataset_end_date']) is str:
554 191 try:
555 192 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
556 193 if len(query['dataset_end_date']) != 10:
557 194 return '"dataset_end_date", must be: <YYYY-MM-DD>'
558 195
559 196 if 'dataset_start_date' in query:
560 197 if query['dataset_start_date'] > query['dataset_end_date']:
561 198 return '"dataset_end_date" must be greater than "dataset_start_date"'
562 199
563 200 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
564 201 self.list.append('dataset_end_date')
565 202 except:
566 203 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
567 204 else:
568 205 return '"dataset_end_date" must be <str>'
569 206 #----------------------------------------------------#
570 207 for key, value in query.items():
571 208 if value is not None and not key in self.list:
572 209 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
573 210 else:
574 211 return '"query" must be <dict>'
575 212
576 213 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
577 214
578 215 elif type_option == 'resource':
579 216 for key1, value1 in kwargs.items():
580 217 if key1 != 'fields':
581 218 self.dict[key1] = value1
582 219
583 220 if query is not None:
584 221 if type(query) is dict:
585 222 #----------------------------------------------------#
586 223 if 'file_date_min' in query:
587 224 if type(query['file_date_min']) is str:
588 225 try:
589 226 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
590 227 if len(query['file_date_min']) != 10:
591 228 return '"file_date_min", must be: <YYYY-MM-DD>'
592 229 except:
593 230 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
594 231 else:
595 232 return '"file_date_min" must be <str>'
596 233 #----------------------------------------------------#
597 234 if 'file_date_max' in query:
598 235 if type(query['file_date_max']) is str:
599 236 try:
600 237 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
601 238 if len(query['file_date_max']) != 10:
602 239 return '"file_date_max", must be: <YYYY-MM-DD>'
603 240
604 241 if 'file_date_min' in query:
605 242 if query['file_date_min'] > query['file_date_max']:
606 243 return '"file_date_max" must be greater than "file_date_min"'
607 244 except:
608 245 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
609 246 else:
610 247 return '"file_date_max" must be <str>'
611 248 #----------------------------------------------------#
612 249 self.dict['query'] = query
613 250 else:
614 251 return '"query" must be <dict>'
615 252 return getattr(self.ckan.action, 'resources_search')(**self.dict)
616 253
617 254 elif type_option == 'tag':
618 255 for key1, value1 in kwargs.items():
619 256 if key1 != 'fields':
620 257 self.dict[key1] = value1
621 258
622 259 if not 'vocabulary_id' in kwargs:
623 260 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
624 261 else:
625 262 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
626 263
627 264 if query is not None:
628 265 if type(query) is dict:
629 266 if 'search' in query:
630 267 if type(query['search']) is list or type(query['search']) is str:
631 268 self.dict['query'] = query['search']
632 269 else:
633 270 return '"search" must be <list> or <str>'
634 271 else:
635 272 return '"query" must be <dict>'
636 273 return getattr(self.ckan.action, 'tag_search')(**self.dict)
637 274
638 275 else:
639 276 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
640 277
641 278 except:
642 279 _, exc_value, _ = sys.exc_info()
643 280 return exc_value
644 281 else:
645 282 return 'ERROR:: "type_option" must be <str>'
646 283
647 284 def create(self, type_option, select=None, **kwargs):
648 285 '''
649 286 FINALIDAD:
650 287 Funcion personalizada para crear.
651 288
652 289 PARAMETROS DISPONIBLES:
653 290 CONSULTAR: "GUIA DE SCRIPT.pdf"
654 291
655 292 ESTRUCTURA:
656 293 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
657 294 '''
658 295 if type(type_option) is str:
659 296 try:
660 297 if type_option == 'dataset':
661 298 return getattr(self.ckan.action, 'package_create')(**kwargs)
662 299 if type_option == 'resource':
663 300 return resource.resource_create(self, **kwargs)
664 301 elif type_option == 'project':
665 302 return getattr(self.ckan.action, 'organization_create')(**kwargs)
666 303 elif type_option == 'member':
667 304 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
668 305 elif type_option == 'collaborator':
669 306 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
670 307 elif type_option == 'vocabulary':
671 308 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
672 309 elif type_option == 'tag':
673 310 return getattr(self.ckan.action, 'tag_create')(**kwargs)
674 311 elif type_option == 'user':
675 312 return getattr(self.ckan.action, 'user_create')(**kwargs)
676 313 elif type_option == 'views':
677 314 if 'resource' == select:
678 315 self.list = ['package']
679 316 for key1, value1 in kwargs.items():
680 317 if not key1 in self.list:
681 318 self.dict[key1] = value1
682 319 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
683 320 elif 'dataset' == select:
684 321 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
685 322 else:
686 323 return 'ERROR:: "select = %s" is not accepted' % (select)
687 324 else:
688 325 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
689 326 except:
690 327 _, exc_value, _ = sys.exc_info()
691 328 return exc_value
692 329 else:
693 330 return 'ERROR:: "type_option" must be <str>'
694 331
695 332 def patch(self, type_option, **kwargs):
696 333 '''
697 334 FINALIDAD:
698 335 Funciones personalizadas para actualizar
699 336
700 337 PARAMETROS DISPONIBLES:
701 338 CONSULTAR: "GUIA DE SCRIPT.pdf"
702 339
703 340 ESTRUCTURA:
704 341 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
705 342 '''
706 343 if type(type_option) is str:
707 344 try:
708 345 if type_option == 'dataset':
709 346 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
710 347 return getattr(self.ckan.action, 'package_patch')(**kwargs)
711 348 elif type_option == 'project':
712 349 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
713 350 elif type_option == 'resource':
714 351 return resource.resource_patch(self, **kwargs)
715 352 elif type_option == 'member':
716 353 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
717 354 elif type_option == 'collaborator':
718 355 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
719 356 else:
720 357 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
721 358 except:
722 359 _, exc_value, _ = sys.exc_info()
723 360 return exc_value
724 361 else:
725 362 return 'ERROR:: "type_option" must be <str>'
726 363
727 364 def delete(self, type_option, select=None, **kwargs):
728 365 '''
729 366 FINALIDAD:
730 367 Función personalizada para eliminar y/o purgar.
731 368
732 369 PARAMETROS DISPONIBLES:
733 370 CONSULTAR: "GUIA DE SCRIPT.pdf"
734 371
735 372 ESTRUCTURA:
736 373 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
737 374 '''
738 375 if type(type_option) is str:
739 376 try:
740 377 if type_option == 'dataset':
741 378 if select is None:
742 379 return 'ERROR:: "select" must not be "None"'
743 380 else:
744 381 if 'delete' == select:
745 382 return getattr(self.ckan.action, 'package_delete')(**kwargs)
746 383 elif 'purge' == select:
747 384 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
748 385 else:
749 386 return 'ERROR:: "select = %s" is not accepted' % (select)
750 387 elif type_option == 'project':
751 388 if select is None:
752 389 return 'ERROR:: "select" must not be "None"'
753 390 else:
754 391 if 'delete' == select:
755 392 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
756 393 elif 'purge' == select:
757 394 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
758 395 else:
759 396 return 'ERROR:: "select = %s" is not accepted' % (select)
760 397 elif type_option == 'resource':
761 398 if select is None:
762 399 return 'ERROR:: "select" must not be "None"'
763 400 else:
764 401 return resource.resource_delete(self, select, **kwargs)
765 402 elif type_option == 'vocabulary':
766 403 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
767 404 elif type_option == 'tag':
768 405 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
769 406 elif type_option == 'user':
770 407 return getattr(self.ckan.action, 'user_delete')(**kwargs)
771 408 else:
772 409 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
773 410 except:
774 411 _, exc_value, _ = sys.exc_info()
775 412 return exc_value
776 413 else:
777 414 return 'ERROR:: "type_option" must be <str>'
778
779 def f_status_note(self, total, result, path):
780 file_txt = open(path+'status_note.txt', 'w')
781 file_txt = open(path+'status_note.txt', 'a')
782
783 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
784 file_txt.write(''+ os.linesep)
785 for u in result['name']:
786 file_txt.write(' - '+ u + os.linesep)
787 file_txt.write(''+ os.linesep)
788
789 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
790 file_txt.write(''+ os.linesep)
791 if len(total['name'])-len(result['name']) != 0:
792 for u in total['name']:
793 if not u in result['name']:
794 file_txt.write(' - '+ u + os.linesep)
795 else:
796 file_txt.write(' "None"'+ os.linesep)
797
798 def f_name(self, name_dataset, ext, tempdir):
799 while self.check:
800 self.str = ''
801 if self.cont == 0:
802 if os.path.exists(tempdir + name_dataset + ext):
803 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
804 else:
805 self.check = self.check * 0
806 self.str = name_dataset + ext
807 else:
808 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
809 self.check = self.check * 0
810 self.str = name_dataset+'('+str(self.cont)+')'+ ext
811 self.cont = self.cont+1
812 return self.str
813
814 def f_zipdir(self, path, ziph, zip_name):
815 for root, _, files in os.walk(path):
816 print('.....')
817 print('Creating: "{}" >>'.format(zip_name))
818 for __file in tqdm(iterable=files, total=len(files)):
819 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
820 ziph.write(os.path.join(root, __file), new_dir)
821 print('Created >>')
822
823 def download_by_step(self, response, tempdir_name):
824 try:
825 # ---------- REPLACE URL --------- #
826 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
827 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
828 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
829 #----------------------------------#
830 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
831 if resp.status_code == 200:
832 with open(tempdir_name+response['name'], 'wb') as file:
833 for chunk in resp.iter_content(chunk_size = self.chunk_size):
834 if chunk:
835 file.write(chunk)
836 except requests.exceptions.RequestException:
837 pass
838 415
839 def download_files(self, **kwargs):
840 '''
841 FINALIDAD:
842 Funcion personalizada para la descarga de archivos existentes de un dataset.
843
844 PARAMETROS DISPONIBLES:
845 CONSULTAR: "GUIA DE SCRIPT.pdf"
846
847 ESTRUCTURA:
848 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
849 '''
850 dict_local = {}
851 #----------------------------------------------#
852 if 'zip' in kwargs:
853 if type(kwargs['zip']) is not bool:
854 return 'ERROR:: "zip" must be: <class "bool">'
855 else:
856 dict_local['zip'] = kwargs['zip']
857 else:
858 dict_local['zip'] = False
859 #----------------------------------------------#
860 if 'status_note' in kwargs:
861 if type(kwargs['status_note']) is not bool:
862 return 'ERROR:: "status_note" must be: <class "bool">'
863 else:
864 dict_local['status_note'] = kwargs['status_note']
865 else:
866 dict_local['status_note'] = False
867 #----------------------------------------------#
868 if 'path' in kwargs:
869 if type(kwargs['path']) is str:
870 if os.path.isdir(kwargs['path']) == False:
871 return 'ERROR:: "path" does not exist'
872 else:
873 if kwargs['path'][-1:] != self.separator:
874 dict_local['path'] = kwargs['path']+self.separator
875 else:
876 dict_local['path'] = kwargs['path']
877
878 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
879 if int(platform.python_version()[0]) == 3:
880 try:
881 file_txt = open(txt, 'w')
882 file_txt.close()
883 os.remove(txt)
884 except PermissionError:
885 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
886 else:
887 try:
888 file_txt = open(txt, 'w')
889 file_txt.close()
890 os.remove(txt)
891 except:
892 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
893 else:
894 return 'ERROR:: "path" must be: <class "str">'
895 else:
896 dict_local['path'] = ''
897 #----------------------------------------------#
898 for key, value in kwargs.items():
899 if not key in dict_local:
900 self.dict[key] = value
901 try:
902 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
903 except:
904 _, exc_value, _ = sys.exc_info()
905 return exc_value
906
907 if len(response) != 0:
908 #--------------TEMP PATH---------------#
909 if dict_local['zip']:
910 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
911 os.mkdir(tempdir+kwargs['id'])
912 dir_name = tempdir + kwargs['id'] + self.separator
913 else:
914 dir = self.f_name(kwargs['id'], '', dict_local['path'])
915 os.mkdir(dict_local['path'] + dir)
916 dir_name = dict_local['path'] + dir + self.separator
917 #-----------DOWNLOAD FILES-------------#
918 print('.....')
919 print('Downloading "{}" file(s) >>'.format(len(response)))
920 name_total = {'name': []}
921 with concurrent.futures.ThreadPoolExecutor() as executor:
922 for u in tqdm(iterable=response, total=len(response)):
923 name_total['name'].append(u['name'])
924 executor.submit(self.download_by_step, u, dir_name)
925 name_check = {}
926 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
927 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
928 #--------------------------------------#
929 if len(name_check['name']) != 0:
930 #----------Status Note---------#
931 if dict_local['status_note']:
932 print('.....')
933 print('Creating: "status_note.txt" >>')
934 self.f_status_note(name_total, name_check, dir_name)
935 print('Created>>')
936 #----------ZIP CREATE----------#
937 if dict_local['zip']:
938 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
939 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
940 self.f_zipdir(dir_name, ziph, zip_name)
941 ziph.close()
942 #Delete Temporal Path
943 if os.path.exists(tempdir[:-1]):
944 shutil.rmtree(tempdir[:-1])
945 #------------------------------#
946 print('.....')
947 return 'DOWNLOAD FINISHED'
948 else:
949 #Delete Temporal Path
950 if dict_local['zip']:
951 if os.path.exists(tempdir[:-1]):
952 shutil.rmtree(tempdir[:-1])
953 else:
954 if os.path.exists(dir_name[:-1]):
955 shutil.rmtree(dir_name[:-1])
956 return 'NO FILES WERE DOWNLOADED'
957 else:
958 return 'FILES NOT FOUND'
959
960 def download_files_advance(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
416 def download_files(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
961 417 '''
962 418 FINALIDAD:
963 419 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
964 420
965 421 PARAMETROS DISPONIBLES:
966 422 CONSULTAR: "GUIA DE SCRIPT.pdf"
967 423
968 424 ESTRUCTURA:
969 <access_name>.download_files_advance(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
425 <access_name>.download_files(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
970 426 '''
971 427 #------------------ PATH ----------------------#
972 428 if isinstance(path, str):
973 429 if os.path.isdir(path):
974 430 if not path.endswith(os.sep):
975 431 path = path + os.sep
976 432 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
977 433 try:
978 434 file_txt = open(test_txt, 'w')
979 435 file_txt.close()
980 436 os.remove(test_txt)
981 437 except:
982 438 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
983 439 else:
984 440 return 'ERROR:: "path" does not exist'
985 441 else:
986 442 return 'ERROR:: "path" must be: <class "str">'
987 443
988 444 #------------------ PROCESSES -----------------#
989 445 if not isinstance(processes, int):
990 446 return 'ERROR:: "processes" must be: <class "int">'
991 447
992 448 #------------------ ID OR NAME ----------------#
993 449 if isinstance(id_or_name, str):
994 450 id_or_name = [id_or_name]
995 451 elif isinstance(id_or_name, list):
996 452 id_or_name = list(map(str, id_or_name))
997 453 else:
998 454 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
999 455 #----------------------------------------------#
1000 456 arguments = {
1001 457 '--apikey': self.Authorization,
1002 458 '--ckan-user': None,
1003 459 '--config': None,
1004 460 '--datapackages': path,
1005 461 '--datastore-fields': False,
1006 462 '--get-request': False,
1007 463 '--insecure': not self.verify,
1008 464 '--processes': str(processes),
1009 465 '--quiet': False,
1010 466 '--remote': self.url,
1011 467 '--worker': False,
1012 468 #'--log': 'log.txt',
1013 469 #'--all': False,
1014 470 #'--gzip': False,
1015 471 #'--output': None,
1016 472 #'--max-records': None,
1017 473 #'--output-json': False,
1018 474 #'--output-jsonl': False,
1019 475 #'--create-only': False,
1020 476 #'--help': False,
1021 477 #'--input': None,
1022 478 #'--input-json': False,
1023 479 #'--start-record': '1',
1024 480 #'--update-only': False,
1025 481 #'--upload-logo': False,
1026 482 #'--upload-resources': False,
1027 483 #'--version': False,
1028 484 'ID_OR_NAME': id_or_name,
1029 485 'datasets': True,
1030 486 'dump': True,
1031 487 #'ACTION_NAME': None,
1032 488 #'KEY:JSON': [],
1033 489 #'KEY=STRING': [],
1034 490 #'KEY@FILE': [],
1035 491 #'action': False,
1036 492 #'delete': False,
1037 493 #'groups': False,
1038 494 #'load': False,
1039 495 #'organizations': False,
1040 496 #'related': False,
1041 497 #'search': False,
1042 498 #'users': False
1043 499 }
1044 500 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now