##// END OF EJS Templates
v2.9.2 :: Delete 'format' automatically
eynilupu -
r7:9ab1ac032168
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,878 +1,878
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 5 import sys
6 6 import platform
7 7 import os
8 8 import tempfile
9 9 import shutil
10 10 import zipfile
11 11 import concurrent.futures
12 12 import requests
13 13 import json
14 import pathlib
14 #import pathlib
15 15 import uuid
16 16
17 17 class JROAPI():
18 18 """
19 19 FINALIDAD:
20 20 Script para administrar y obtener la data del repositorio por medio de APIs.
21 21
22 22 REQUISITIOS PREVIOS:
23 23 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
24 24 - Paso 2: Instalar lo siguiente como admininstrador:
25 25 En Python 2
26 26 - pip install ckanapi==4.5
27 27 - pip install requests
28 - pip install pathlib
29 28 - pip install futures
30 29 - pip install tqdm
31 30 En Python > 3
32 31 - pip3 install ckanapi==4.5
33 32 - pip3 install requests
34 33 - pip3 install tqdm
35 34
36 35 FUNCIONES DISPONIBLES:
37 36 - action
38 37 - upload_file
39 38 - upload_multiple_files
40 39 - upload_multiple_files_advance
41 40 - show
42 41 - search
43 42 - create
44 43 - patch
45 44 - delete
46 45 - download_files
47 46
48 47 EJEMPLOS:
49 48 #1:
50 49 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
51 50 ... some operation(s) ...
52 51 #2:
53 52 <access_name> = JROAPI('http://example.com', Authorization='#########')
54 53 ... some operation(s) ...
55 54 <access_name>.ckan.close()
56 55
57 56 REPORTAR ALGUN PROBLEMA:
58 57 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
59 58 1) Correo para contactarlo
60 59 2) Descripcion del problema
61 60 3) ¿En que paso o seccion encontro el problema?
62 61 4) ¿Cual era el resultado que usted esperaba?
63 62 """
64 63 def __init__(self, url, Authorization=None):
65 64 ua = 'CKAN_JRO/1.1 (+'+str(url)+')'
66 65 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
67 66 self.ckan = RemoteCKAN(url, apikey=Authorization, user_agent=ua)
68 67 #self.ckan = RemoteCKAN(url, apikey=Authorization)
69 68 self.Authorization = Authorization
69 # Change for --> self.separator = os.sep
70 70 if platform.system() == 'Windows':
71 71 self.separator = '\\'
72 72 else:
73 73 self.separator = '/'
74 74
75 75 self.chunk_size = 1024
76 76 self.list = []
77 77 self.dict = {}
78 78 self.str = ''
79 79 self.check = 1
80 80 self.cont = 0
81 81
82 82 def __enter__(self):
83 83 return self
84 84
85 85 def __exit__(self, *args):
86 86 self.ckan.close()
87 87
88 88 def action(self, action, **kwargs):
89 89 """
90 90 FINALIDAD:
91 91 Funcion para llamar a las APIs disponibles
92 92
93 93 APIs DISPONIBLES:
94 94 CONSULTAR: "GUIA DE SCRIPT.pdf"
95 95
96 96 EJEMPLO:
97 97 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
98 98 """
99 99 #--------------- CASE: PACKAGE SEARCH ---------------#
100 100 if kwargs is not None:
101 101 if action == 'package_search':
102 102 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
103 103 for facet in self.list:
104 104 if facet in kwargs:
105 105 kwargs[facet.replace('_', '.')] = kwargs[facet]
106 106 kwargs.pop(facet)
107 107 #----------------------------------------------------#
108 108 try:
109 109 return getattr(self.ckan.action, action)(**kwargs)
110 110 except:
111 111 _, exc_value, _ = sys.exc_info()
112 112 return exc_value
113 113
114 114 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
115 115 # Agregar si es interruptido por teclado
116 116 '''
117 117 FINALIDAD:
118 118 Funcion para subir un unico archivo al repositorio del ROJ.
119 119
120 120 PARAMETROS DISPONIBLES:
121 121 CONSULTAR: "GUIA DE SCRIPT.pdf"
122 122
123 123 ESTRUCTURA:
124 124 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
125 125 '''
126 126 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
127 127 for key1, value1 in kwargs.items():
128 128 if not key1 in self.list:
129 129 self.dict[key1] = value1
130 130
131 131 #---------------------------#
132 132 if not 'others' in kwargs:
133 133 self.dict['others'] = ''
134 134 else:
135 135 if isinstance(kwargs['others'], list):
136 136 self.dict['others'] = json.dumps(kwargs['others'])
137 137 #---------------------------#
138 138
139 139 if not os.path.isfile(file_path):
140 140 return 'File "%s" not exist' % (file_path)
141 141
142 if not 'format' in self.dict:
143 self.str = ''.join(pathlib.Path(file_path).suffixes)
144 if len(self.str) > 0:
145 self.dict['format'] = self.str.upper()[1:]
142 #if not 'format' in self.dict:
143 # self.str = ''.join(pathlib.Path(file_path).suffixes)
144 # if len(self.str) > 0:
145 # self.dict['format'] = self.str.upper()[1:]
146 146
147 147 try:
148 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=pathlib.Path(file_path).name, **self.dict)
148 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
149 149 except:
150 150 _, exc_value, _ = sys.exc_info()
151 151 return exc_value
152 152
153 153 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
154 154 # Agregar si es interruptido por teclado
155 155 '''
156 156 FINALIDAD:
157 157 Funcion para subir multiples archivos al repositorio del ROJ.
158 158
159 159 PARAMETROS DISPONIBLES:
160 160 CONSULTAR: "GUIA DE SCRIPT.pdf"
161 161
162 162 ESTRUCTURA:
163 163 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
164 164 '''
165 165 #-------------------------PACKAGE SHOW-----------------------#
166 166 try:
167 167 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
168 168 except:
169 169 _, exc_value, _ = sys.exc_info()
170 170 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
171 171 return exc_value
172 172 #------------------------------------------------------------#
173 173 resources_name = []
174 174 for u in dataset_show:
175 175 resources_name.append(u['name'].lower())
176 176 #------------------------------------------------------------#
177 177 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
178 178 for key1, value1 in kwargs.items():
179 179 if not key1 in self.list:
180 180 self.dict[key1] = value1
181 181 #------------------------------------------------------------#
182 182 if not 'others' in kwargs:
183 183 self.dict['others'] = ''
184 184 else:
185 185 if isinstance(kwargs['others'], list):
186 186 self.dict['others'] = json.dumps(kwargs['others'])
187 187 #------------------------------------------------------------#
188 188 total_list = []
189 189 #---------------CASO : "path" or "path_list"-----------------#
190 190 if type(path_files) is list:
191 191 if len(path_files) != 0:
192 192 path_files.sort()
193 193 for u in path_files:
194 194 if os.path.isfile(u):
195 if pathlib.Path(u).name.lower() in resources_name:
195 if os.path.basename(u).lower() in resources_name:
196 196 if not ignore_repetition:
197 return 'ERROR:: "%s" file already exist in this dataset' % (pathlib.Path(u).name)
198 print('WARRING:: "'+ str(pathlib.Path(u).name) +'" file was ignored because already exist in this dataset')
197 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
198 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
199 199 else:
200 total_list.append({'name':pathlib.Path(u).name, 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
200 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
201 201 else:
202 202 return 'File "%s" does not exist' % (u)
203 203 else:
204 204 return 'ERROR:: "path_list is empty"'
205 205
206 206 elif type(path_files) is str:
207 207 if os.path.isdir(path_files):
208 208 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
209 209 path_order.sort()
210 210 if path_order:
211 211 for name in path_order:
212 212 if name.lower() in resources_name:
213 213 if not ignore_repetition:
214 214 return 'ERROR:: "%s" file already exist in this dataset' % (name)
215 215 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
216 216 else:
217 217 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
218 218 else:
219 219 return "ERROR:: There aren't files in this directory"
220 220 else:
221 221 return 'ERROR:: Directory "%s" does not exist' % (path_files)
222 222 else:
223 223 return 'ERROR:: "path_files" must be a str or list'
224 224 #------------------------------------------------------------#
225 225 try:
226 226 uuid.UUID(str(dataset_id), version=4)
227 227 package_id_or_name = '"id": "' + str(dataset_id) + '"'
228 228 except ValueError:
229 229 package_id_or_name = '"name": "' + str(dataset_id) + '"'
230 230 #------------------------------------------------------------#
231 231 blocks = [[]]
232 232 size_file = 0
233 233 count_file = 0
234 234 inter_num = 0
235 235 for value in total_list:
236 236 if value['size'] > 1024 * 1024 * float(max_size):
237 237 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
238 238 if not 1 <= int(max_count) <= 999:
239 239 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
240 240
241 241 size_file = size_file + value['size']
242 242 count_file = count_file + 1
243 243 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
244 244 del value['size']
245 245 blocks[inter_num].append(value)
246 246 else:
247 247 inter_num = inter_num + 1
248 248 size_file = value['size']
249 249 count_file = 1
250 250 blocks.append([])
251 251 del value['size']
252 252 blocks[inter_num].append(value)
253 253 #------------------------------------------------------------#
254 254 if len(blocks[0]) > 0:
255 255 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
256 256 for count1, block in enumerate(blocks):
257 257 print('---- BLOCK N°{} ----'.format(count1 + 1))
258 258 resource_extend = []
259 259 files_dict = {}
260 260 for count2, value2 in enumerate(block):
261 261 value2['file_date'] = file_date
262 262 value2['voc_file_type'] = file_type
263 263 value2.update(self.dict)
264 264
265 if not 'format' in value2:
266 format = ''.join(pathlib.Path(value2['name']).suffixes)
267 if len(format) > 0:
268 value2['format'] = format.upper()[1:]
265 #if not 'format' in value2:
266 # format = ''.join(pathlib.Path(value2['name']).suffixes)
267 # if len(format) > 0:
268 # value2['format'] = format.upper()[1:]
269 269
270 270 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
271 271 del value2['upload']
272 272 resource_extend.append(value2)
273 273
274 274 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
275 275 try:
276 276 result = self.ckan.call_action(
277 277 'package_revise',
278 278 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
279 279 files=files_dict
280 280 )
281 281 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
282 282 if len(blocks) == count1 + 1:
283 283 return result
284 284 except:
285 285 print('ERROR :: Use the "print" for more information')
286 286 _, exc_value, _ = sys.exc_info()
287 287 return exc_value
288 288 else:
289 289 return "ERROR:: No file(s) found to upload"
290 290
291 291 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, **kwargs):
292 292 # Agregar si es interruptido por teclado
293 293 '''
294 294 FINALIDAD:
295 295 Funcion para subir multiples archivos al repositorio del ROJ.
296 296
297 297 PARAMETROS DISPONIBLES:
298 298 CONSULTAR: "GUIA DE SCRIPT.pdf"
299 299
300 300 ESTRUCTURA:
301 301 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
302 302 '''
303 303
304 304 params_dict = {'upload':[], 'name':[]}
305 if not 'format' in kwargs:
306 params_dict.update({'format':[]})
305 #if not 'format' in kwargs:
306 # params_dict.update({'format':[]})
307 307 #---------------CASO : "path" or "path_list"-----------------#
308 308 if type(path_files) is list:
309 309 if len(path_files) != 0:
310 310 path_files.sort()
311 311 for u in path_files:
312 312 if os.path.isfile(u):
313 313 params_dict['upload'].append(open(u, 'rb'))
314 params_dict['name'].append(pathlib.Path(u).name)
315 if not 'format' in kwargs:
316 format = ''.join(pathlib.Path(u).suffixes)
317 if len(format) > 0:
318 params_dict['format'].append(format.upper()[1:])
319 else:
320 params_dict['format'].append('')
314 params_dict['name'].append(os.path.basename(u))
315 #if not 'format' in kwargs:
316 # format = ''.join(pathlib.Path(u).suffixes)
317 # if len(format) > 0:
318 # params_dict['format'].append(format.upper()[1:])
319 # else:
320 # params_dict['format'].append('')
321 321 else:
322 322 return 'File "%s" does not exist' % (u)
323 323 else:
324 324 return 'ERROR:: "path_list is empty"'
325 325 elif type(path_files) is str:
326 326 if os.path.isdir(path_files):
327 327 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
328 328 path_order.sort()
329 329 if path_order:
330 330 for name in path_order:
331 331 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
332 332 params_dict['name'].append(name)
333 if not 'format' in kwargs:
334 format = ''.join(pathlib.Path(name).suffixes)
335 if len(format) > 0:
336 params_dict['format'].append(format.upper()[1:])
337 else:
338 params_dict['format'].append('')
333 #if not 'format' in kwargs:
334 # format = ''.join(pathlib.Path(name).suffixes)
335 # if len(format) > 0:
336 # params_dict['format'].append(format.upper()[1:])
337 # else:
338 # params_dict['format'].append('')
339 339 else:
340 340 return "ERROR:: There aren't files in this directory"
341 341 else:
342 342 return 'ERROR:: Directory "%s" does not exist' % (path_files)
343 343 else:
344 344 return 'ERROR:: "path_files" must be a str or list'
345 345 #------------------------------------------------------------#
346 346 params_no_dict = {'package_id': dataset_id}
347 347 if type(date_files) is list:
348 348 params_dict['file_date'] = date_files
349 349 else:
350 350 params_no_dict['file_date'] = date_files
351 351
352 352 if type(type_files) is list:
353 353 params_dict['voc_file_type'] = type_files
354 354 else:
355 355 params_no_dict['voc_file_type'] = type_files
356 356
357 357 for key1, value1 in kwargs.items():
358 358 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
359 359 if type(value1) is list:
360 360 params_dict[key1] = value1
361 361 else:
362 362 params_no_dict[key1] = value1
363 363 #------------------------------------------#
364 364 if not 'others' in kwargs:
365 365 params_no_dict['others'] = ''
366 366 else:
367 367 if isinstance(kwargs['others'], tuple):
368 368 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
369 369 elif isinstance(kwargs['others'], list):
370 370 params_no_dict['others'] = json.dumps(kwargs['others'])
371 371 elif isinstance(kwargs['others'], str):
372 372 params_no_dict['others'] = kwargs['others']
373 373 else:
374 374 return 'ERROR:: "others" must be a tuple, list or str'
375 375 #------------------------------------------#
376 376 len_params_dict = []
377 377 for value2 in params_dict.values():
378 378 len_params_dict.append(len(value2))
379 379
380 380 if len(list(set(len_params_dict))) > 1:
381 381 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
382 382 #------------------------------------------------------------#
383 383 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
384 384 for v in range(len(params_dict['name'])):
385 385 try:
386 386 send = {}
387 387 for key_dict, value_dict in params_dict.items():
388 388 send[key_dict] = value_dict[v]
389 389 for key_no_dict, value_no_dict in params_no_dict.items():
390 390 send[key_no_dict] = value_no_dict
391 391
392 392 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
393 393 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
394 394 except:
395 395 _, exc_value, _ = sys.exc_info()
396 396 self.list.append(exc_value)
397 397 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
398 398 return self.list
399 399 #------------------------------------------------------------#
400 400
401 401 def show(self, type_option, id, **kwargs):
402 402 '''
403 403 FINALIDAD:
404 404 Funcion personalizada para una busqueda en especifico.
405 405
406 406 PARAMETROS DISPONIBLES:
407 407 CONSULTAR: "GUIA DE SCRIPT.pdf"
408 408
409 409 ESTRUCTURA:
410 410 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
411 411 '''
412 412 if type(type_option) is str:
413 413 try:
414 414 if type_option == 'dataset':
415 415 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
416 416 elif type_option == 'resource':
417 417 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
418 418 elif type_option == 'project':
419 419 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
420 420 elif type_option == 'collaborator':
421 421 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
422 422 elif type_option == 'member':
423 423 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
424 424 elif type_option == 'vocabulary':
425 425 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
426 426 elif type_option == 'tag':
427 427 if not 'vocabulary_id' in kwargs:
428 428 print('Missing "vocabulary_id" value: assume it is a free tag')
429 429 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
430 430 elif type_option == 'user':
431 431 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
432 432 elif type_option == 'job':
433 433 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
434 434 else:
435 435 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
436 436 except:
437 437 _, exc_value, _ = sys.exc_info()
438 438 return exc_value
439 439 else:
440 440 return 'ERROR:: "type_option" must be a str'
441 441
442 442 def search(self, type_option, query=None, **kwargs):
443 443 '''
444 444 FINALIDAD:
445 445 Funcion personalizada para busquedas que satisfagan algun criterio.
446 446
447 447 PARAMETROS DISPONIBLES:
448 448 CONSULTAR: "GUIA DE SCRIPT.pdf"
449 449
450 450 ESTRUCTURA:
451 451 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
452 452 '''
453 453 if type(type_option) is str:
454 454 try:
455 455 if type_option == 'dataset':
456 456 key_replace = ['fq', 'fq_list', 'include_private']
457 457 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
458 458 for key1, value1 in kwargs.items():
459 459 if not key1 in key_replace:
460 460 if key1 in key_point:
461 461 self.dict[key1.replace('_', '.')] = value1
462 462 else:
463 463 self.dict[key1] = value1
464 464
465 465 if query is not None:
466 466 if type(query) is dict:
467 467 self.dict['fq_list'] = []
468 468 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
469 469 #----------------------------------------------------#
470 470 if 'dataset_start_date' in query:
471 471 if type(query['dataset_start_date']) is str:
472 472 try:
473 473 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
474 474 if len(query['dataset_start_date']) != 10:
475 475 return '"dataset_start_date", must be: <YYYY-MM-DD>'
476 476 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
477 477 self.list.append('dataset_start_date')
478 478 except:
479 479 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
480 480 else:
481 481 return '"dataset_start_date" must be <str>'
482 482 #----------------------------------------------------#
483 483 if 'dataset_end_date' in query:
484 484 if type(query['dataset_end_date']) is str:
485 485 try:
486 486 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
487 487 if len(query['dataset_end_date']) != 10:
488 488 return '"dataset_end_date", must be: <YYYY-MM-DD>'
489 489
490 490 if 'dataset_start_date' in query:
491 491 if query['dataset_start_date'] > query['dataset_end_date']:
492 492 return '"dataset_end_date" must be greater than "dataset_start_date"'
493 493
494 494 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
495 495 self.list.append('dataset_end_date')
496 496 except:
497 497 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
498 498 else:
499 499 return '"dataset_end_date" must be <str>'
500 500 #----------------------------------------------------#
501 501 for key, value in query.items():
502 502 if value is not None and not key in self.list:
503 503 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
504 504 else:
505 505 return '"query" must be <dict>'
506 506
507 507 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
508 508
509 509 elif type_option == 'resource':
510 510 for key1, value1 in kwargs.items():
511 511 if key1 != 'fields':
512 512 self.dict[key1] = value1
513 513
514 514 if query is not None:
515 515 if type(query) is dict:
516 516 #----------------------------------------------------#
517 517 if 'file_date_min' in query:
518 518 if type(query['file_date_min']) is str:
519 519 try:
520 520 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
521 521 if len(query['file_date_min']) != 10:
522 522 return '"file_date_min", must be: <YYYY-MM-DD>'
523 523 except:
524 524 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
525 525 else:
526 526 return '"file_date_min" must be <str>'
527 527 #----------------------------------------------------#
528 528 if 'file_date_max' in query:
529 529 if type(query['file_date_max']) is str:
530 530 try:
531 531 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
532 532 if len(query['file_date_max']) != 10:
533 533 return '"file_date_max", must be: <YYYY-MM-DD>'
534 534
535 535 if 'file_date_min' in query:
536 536 if query['file_date_min'] > query['file_date_max']:
537 537 return '"file_date_max" must be greater than "file_date_min"'
538 538 except:
539 539 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
540 540 else:
541 541 return '"file_date_max" must be <str>'
542 542 #----------------------------------------------------#
543 543 self.dict['query'] = query
544 544 else:
545 545 return '"query" must be <dict>'
546 546 return getattr(self.ckan.action, 'resources_search')(**self.dict)
547 547
548 548 elif type_option == 'tag':
549 549 for key1, value1 in kwargs.items():
550 550 if key1 != 'fields':
551 551 self.dict[key1] = value1
552 552
553 553 if not 'vocabulary_id' in kwargs:
554 554 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
555 555 else:
556 556 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
557 557
558 558 if query is not None:
559 559 if type(query) is dict:
560 560 if 'search' in query:
561 561 if type(query['search']) is list or type(query['search']) is str:
562 562 self.dict['query'] = query['search']
563 563 else:
564 564 return '"search" must be <list> or <str>'
565 565 else:
566 566 return '"query" must be <dict>'
567 567 return getattr(self.ckan.action, 'tag_search')(**self.dict)
568 568
569 569 else:
570 570 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
571 571
572 572 except:
573 573 _, exc_value, _ = sys.exc_info()
574 574 return exc_value
575 575 else:
576 576 return 'ERROR:: "type_option" must be <str>'
577 577
578 578 def create(self, type_option, select=None, **kwargs):
579 579 '''
580 580 FINALIDAD:
581 581 Funcion personalizada para crear.
582 582
583 583 PARAMETROS DISPONIBLES:
584 584 CONSULTAR: "GUIA DE SCRIPT.pdf"
585 585
586 586 ESTRUCTURA:
587 587 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
588 588 '''
589 589 if type(type_option) is str:
590 590 try:
591 591 if type_option == 'dataset':
592 592 return getattr(self.ckan.action, 'package_create')(**kwargs)
593 593 elif type_option == 'project':
594 594 return getattr(self.ckan.action, 'organization_create')(**kwargs)
595 595 elif type_option == 'member':
596 596 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
597 597 elif type_option == 'collaborator':
598 598 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
599 599 elif type_option == 'vocabulary':
600 600 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
601 601 elif type_option == 'tag':
602 602 return getattr(self.ckan.action, 'tag_create')(**kwargs)
603 603 elif type_option == 'user':
604 604 return getattr(self.ckan.action, 'user_create')(**kwargs)
605 605 elif type_option == 'views':
606 606 if 'resource' == select:
607 607 self.list = ['package']
608 608 for key1, value1 in kwargs.items():
609 609 if not key1 in self.list:
610 610 self.dict[key1] = value1
611 611 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
612 612 elif 'dataset' == select:
613 613 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
614 614 else:
615 615 return 'ERROR:: "select = %s" is not accepted' % (select)
616 616 else:
617 617 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
618 618 except:
619 619 _, exc_value, _ = sys.exc_info()
620 620 return exc_value
621 621 else:
622 622 return 'ERROR:: "type_option" must be <str>'
623 623
624 624 def patch(self, type_option, **kwargs):
625 625 '''
626 626 FINALIDAD:
627 627 Funciones personalizadas para actualizar
628 628
629 629 PARAMETROS DISPONIBLES:
630 630 CONSULTAR: "GUIA DE SCRIPT.pdf"
631 631
632 632 ESTRUCTURA:
633 633 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
634 634 '''
635 635 if type(type_option) is str:
636 636 try:
637 637 if type_option == 'dataset':
638 638 return getattr(self.ckan.action, 'package_patch')(**kwargs)
639 639 elif type_option == 'project':
640 640 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
641 641 elif type_option == 'resource':
642 642 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
643 643 elif type_option == 'member':
644 644 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
645 645 elif type_option == 'collaborator':
646 646 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
647 647 else:
648 648 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
649 649 except:
650 650 _, exc_value, _ = sys.exc_info()
651 651 return exc_value
652 652 else:
653 653 return 'ERROR:: "type_option" must be <str>'
654 654
655 655 def delete(self, type_option, select=None, **kwargs):
656 656 '''
657 657 FINALIDAD:
658 658 Función personalizada para eliminar y/o purgar.
659 659
660 660 PARAMETROS DISPONIBLES:
661 661 CONSULTAR: "GUIA DE SCRIPT.pdf"
662 662
663 663 ESTRUCTURA:
664 664 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
665 665 '''
666 666 if type(type_option) is str:
667 667 try:
668 668 if type_option == 'dataset':
669 669 if select is None:
670 670 return 'ERROR:: "select" must not be "None"'
671 671 else:
672 672 if 'delete' == select:
673 673 return getattr(self.ckan.action, 'package_delete')(**kwargs)
674 674 elif 'purge' == select:
675 675 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
676 676 else:
677 677 return 'ERROR:: "select = %s" is not accepted' % (select)
678 678 elif type_option == 'project':
679 679 if select is None:
680 680 return 'ERROR:: "select" must not be "None"'
681 681 else:
682 682 if 'delete' == select:
683 683 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
684 684 elif 'purge' == select:
685 685 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
686 686 else:
687 687 return 'ERROR:: "select = %s" is not accepted' % (select)
688 688 elif type_option == 'resource':
689 689 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
690 690 elif type_option == 'vocabulary':
691 691 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
692 692 elif type_option == 'tag':
693 693 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
694 694 elif type_option == 'user':
695 695 return getattr(self.ckan.action, 'user_delete')(**kwargs)
696 696 else:
697 697 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
698 698 except:
699 699 _, exc_value, _ = sys.exc_info()
700 700 return exc_value
701 701 else:
702 702 return 'ERROR:: "type_option" must be <str>'
703 703
704 704 def f_status_note(self, total, result, path):
705 705 file_txt = open(path+'status_note.txt', 'w')
706 706 file_txt = open(path+'status_note.txt', 'a')
707 707
708 708 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
709 709 file_txt.write(''+ os.linesep)
710 710 for u in result['name']:
711 711 file_txt.write(' - '+ u + os.linesep)
712 712 file_txt.write(''+ os.linesep)
713 713
714 714 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
715 715 file_txt.write(''+ os.linesep)
716 716 if len(total['name'])-len(result['name']) != 0:
717 717 for u in total['name']:
718 718 if not u in result['name']:
719 719 file_txt.write(' - '+ u + os.linesep)
720 720 else:
721 721 file_txt.write(' "None"'+ os.linesep)
722 722
723 723 def f_name(self, name_dataset, ext, tempdir):
724 724 while self.check:
725 725 self.str = ''
726 726 if self.cont == 0:
727 727 if os.path.exists(tempdir + name_dataset + ext):
728 728 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
729 729 else:
730 730 self.check = self.check * 0
731 731 self.str = name_dataset + ext
732 732 else:
733 733 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
734 734 self.check = self.check * 0
735 735 self.str = name_dataset+'('+str(self.cont)+')'+ ext
736 736 self.cont = self.cont+1
737 737 return self.str
738 738
739 739 def f_zipdir(self, path, ziph, zip_name):
740 740 for root, _, files in os.walk(path):
741 741 print('.....')
742 742 print('Creating: "{}" >>'.format(zip_name))
743 743 for __file in tqdm(iterable=files, total=len(files)):
744 744 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
745 745 ziph.write(os.path.join(root, __file), new_dir)
746 746 print('Created >>')
747 747
748 748 def download_by_step(self, response, tempdir_name):
749 749 try:
750 750 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}) as resp:
751 751 if resp.status_code == 200:
752 752 with open(tempdir_name+response['name'], 'wb') as file:
753 753 for chunk in resp.iter_content(chunk_size = self.chunk_size):
754 754 if chunk:
755 755 file.write(chunk)
756 756 except requests.exceptions.RequestException:
757 757 pass
758 758
759 759 def download_files(self, **kwargs):
760 760 '''
761 761 FINALIDAD:
762 762 Funcion personalizada para la descarga de archivos existentes de un dataset.
763 763
764 764 PARAMETROS DISPONIBLES:
765 765 CONSULTAR: "GUIA DE SCRIPT.pdf"
766 766
767 767 ESTRUCTURA:
768 768 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
769 769 '''
770 770 dict_local = {}
771 771 #----------------------------------------------#
772 772 if 'zip' in kwargs:
773 773 if type(kwargs['zip']) is not bool:
774 774 return 'ERROR:: "zip" must be: <class "bool">'
775 775 else:
776 776 dict_local['zip'] = kwargs['zip']
777 777 else:
778 778 dict_local['zip'] = False
779 779 #----------------------------------------------#
780 780 if 'status_note' in kwargs:
781 781 if type(kwargs['status_note']) is not bool:
782 782 return 'ERROR:: "status_note" must be: <class "bool">'
783 783 else:
784 784 dict_local['status_note'] = kwargs['status_note']
785 785 else:
786 786 dict_local['status_note'] = False
787 787 #----------------------------------------------#
788 788 if 'path' in kwargs:
789 789 if type(kwargs['path']) is str:
790 790 if os.path.isdir(kwargs['path']) == False:
791 791 return 'ERROR:: "path" does not exist'
792 792 else:
793 793 if kwargs['path'][-1:] != self.separator:
794 794 dict_local['path'] = kwargs['path']+self.separator
795 795 else:
796 796 dict_local['path'] = kwargs['path']
797 797
798 798 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
799 799 if int(platform.python_version()[0]) == 3:
800 800 try:
801 801 file_txt = open(txt, 'w')
802 802 file_txt.close()
803 803 os.remove(txt)
804 804 except PermissionError:
805 805 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
806 806 else:
807 807 try:
808 808 file_txt = open(txt, 'w')
809 809 file_txt.close()
810 810 os.remove(txt)
811 811 except:
812 812 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
813 813 else:
814 814 return 'ERROR:: "path" must be: <class "str">'
815 815 else:
816 816 dict_local['path'] = ''
817 817 #----------------------------------------------#
818 818 for key, value in kwargs.items():
819 819 if not key in dict_local:
820 820 self.dict[key] = value
821 821 try:
822 822 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
823 823 except:
824 824 _, exc_value, _ = sys.exc_info()
825 825 return exc_value
826 826
827 827 if len(response) != 0:
828 828 #--------------TEMP PATH---------------#
829 829 if dict_local['zip']:
830 830 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
831 831 os.mkdir(tempdir+kwargs['id'])
832 832 dir_name = tempdir + kwargs['id'] + self.separator
833 833 else:
834 834 dir = self.f_name(kwargs['id'], '', dict_local['path'])
835 835 os.mkdir(dict_local['path'] + dir)
836 836 dir_name = dict_local['path'] + dir + self.separator
837 837 #-----------DOWNLOAD FILES-------------#
838 838 print('.....')
839 839 print('Downloading "{}" file(s) >>'.format(len(response)))
840 840 name_total = {'name': []}
841 841 with concurrent.futures.ThreadPoolExecutor() as executor:
842 842 for u in tqdm(iterable=response, total=len(response)):
843 843 name_total['name'].append(u['name'])
844 844 executor.submit(self.download_by_step, u, dir_name)
845 845 name_check = {}
846 846 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
847 847 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
848 848 #--------------------------------------#
849 849 if len(name_check['name']) != 0:
850 850 #----------Status Note---------#
851 851 if dict_local['status_note']:
852 852 print('.....')
853 853 print('Creating: "status_note.txt" >>')
854 854 self.f_status_note(name_total, name_check, dir_name)
855 855 print('Created>>')
856 856 #----------ZIP CREATE----------#
857 857 if dict_local['zip']:
858 858 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
859 859 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
860 860 self.f_zipdir(dir_name, ziph, zip_name)
861 861 ziph.close()
862 862 #Delete Temporal Path
863 863 if os.path.exists(tempdir[:-1]):
864 864 shutil.rmtree(tempdir[:-1])
865 865 #------------------------------#
866 866 print('.....')
867 867 return 'DOWNLOAD FINISHED'
868 868 else:
869 869 #Delete Temporal Path
870 870 if dict_local['zip']:
871 871 if os.path.exists(tempdir[:-1]):
872 872 shutil.rmtree(tempdir[:-1])
873 873 else:
874 874 if os.path.exists(dir_name[:-1]):
875 875 shutil.rmtree(dir_name[:-1])
876 876 return 'NO FILES WERE DOWNLOADED'
877 877 else:
878 878 return 'FILES NOT FOUND' No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now