##// END OF EJS Templates
v2.9.2 :: Update 'isfile' create resource and download [bugs]
eynilupu -
r17:187bc9eb8a7d
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
@@ -1,1044 +1,1044
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 from CKAN_JRO import logic_download
5 5 from CKAN_JRO import resource
6 6 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
7 7 import sys
8 8 import platform
9 9 import os
10 10 import tempfile
11 11 import shutil
12 12 import zipfile
13 13 import concurrent.futures
14 14 import requests
15 15 import json
16 16 #import pathlib
17 17 import uuid
18 18
19 19 if sys.version_info.major == 3:
20 20 from urllib.parse import urlparse
21 21 else:
22 22 import urlparse
23 23
24 24 class JROAPI():
25 25 """
26 26 FINALIDAD:
27 27 Script para administrar y obtener la data del repositorio por medio de APIs.
28 28
29 29 REQUISITIOS PREVIOS:
30 30 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
31 31 - Paso 2: Instalar lo siguiente como admininstrador:
32 32 En Python 2
33 33 - pip install ckanapi==4.5
34 34 - pip install requests
35 35 - pip install futures
36 36 - pip install tqdm
37 37 En Python > 3
38 38 - pip3 install ckanapi==4.5
39 39 - pip3 install requests
40 40 - pip3 install tqdm
41 41
42 42 FUNCIONES DISPONIBLES:
43 43 - action
44 44 - upload_file
45 45 - upload_multiple_files
46 46 - upload_multiple_files_advance
47 47 - show
48 48 - search
49 49 - create
50 50 - patch
51 51 - delete
52 52 - download_files
53 53
54 54 EJEMPLOS:
55 55 #1:
56 56 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
57 57 ... some operation(s) ...
58 58 #2:
59 59 <access_name> = JROAPI('http://example.com', Authorization='#########')
60 60 ... some operation(s) ...
61 61 <access_name>.ckan.close()
62 62
63 63 REPORTAR ALGUN PROBLEMA:
64 64 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
65 65 1) Correo para contactarlo
66 66 2) Descripcion del problema
67 67 3) ¿En que paso o seccion encontro el problema?
68 68 4) ¿Cual era el resultado que usted esperaba?
69 69 """
70 70 def __init__(self, url, Authorization=None, secure=True):
71 71 #-------- Check Secure -------#
72 72 self.verify = secure
73 73 if not secure and isinstance(secure, bool):
74 74 session = requests.Session()
75 75 session.verify = False
76 76 else:
77 77 session = None
78 78 #------------------------------#
79 79 self.url = url
80 80 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
81 81 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
82 82 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
83 83 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
84 84 self.Authorization = Authorization
85 85 # Change for --> self.separator = os.sep
86 86 if platform.system() == 'Windows':
87 87 self.separator = '\\'
88 88 else:
89 89 self.separator = '/'
90 90
91 91 self.chunk_size = 1024
92 92 self.list = []
93 93 self.dict = {}
94 94 self.str = ''
95 95 self.check = 1
96 96 self.cont = 0
97 97
98 98 def __enter__(self):
99 99 return self
100 100
101 101 def __exit__(self, *args):
102 102 self.ckan.close()
103 103
104 104 def action(self, action, **kwargs):
105 105 """
106 106 FINALIDAD:
107 107 Funcion para llamar a las APIs disponibles
108 108
109 109 APIs DISPONIBLES:
110 110 CONSULTAR: "GUIA DE SCRIPT.pdf"
111 111
112 112 EJEMPLO:
113 113 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
114 114 """
115 115 #--------------- CASE: PACKAGE SEARCH ---------------#
116 116 if kwargs is not None:
117 117 if action == 'package_search':
118 118 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
119 119 for facet in self.list:
120 120 if facet in kwargs:
121 121 kwargs[facet.replace('_', '.')] = kwargs[facet]
122 122 kwargs.pop(facet)
123 123 #----------------------------------------------------#
124 124 try:
125 125 return getattr(self.ckan.action, action)(**kwargs)
126 126 except:
127 127 _, exc_value, _ = sys.exc_info()
128 128 return exc_value
129 129
130 130 def upload_file(self, dataset_id, file_date, file_type, file_path=False, url_or_path=False, ignore_repetition=False, **kwargs):
131 131 # Agregar si es interruptido por teclado
132 132 '''
133 133 FINALIDAD:
134 134 Funcion para crear un unico recurso (puede incluir un archivo asociado) al repositorio del ROJ.
135 135
136 136 PARAMETROS DISPONIBLES:
137 137 CONSULTAR: "GUIA DE SCRIPT.pdf"
138 138
139 139 ESTRUCTURA:
140 140 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_type = <class 'str'>, file_path = <class 'str'>, url_or_path = <class 'str'>, param_1 = <class 'param_1'>, ...)
141 141 '''
142 142 #self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
143 143 self.list = ['package_id', 'upload', 'voc_file_type'] #file_date
144 144 for key1, value1 in kwargs.items():
145 145 if not key1 in self.list:
146 146 self.dict[key1] = value1
147 147
148 148 #---------------------------#
149 149 if not 'others' in kwargs:
150 150 self.dict['others'] = ''
151 151 else:
152 152 if isinstance(kwargs['others'], list):
153 153 self.dict['others'] = json.dumps(kwargs['others'])
154 154 #---------------------------#
155 155
156 156 if isinstance(file_path, str) and isinstance(url_or_path, str):
157 157 return 'ERROR:: Choose one: "file_path" or "url_or_path" parameters'
158 158
159 159 if isinstance(file_path, str):
160 160 if not os.path.isfile(file_path):
161 161 return 'File "%s" not exist' % (file_path)
162 162
163 163 self.dict['upload'] = open(file_path, 'rb')
164 164 self.dict['name'] = os.path.basename(file_path)
165 165 elif isinstance(url_or_path, str):
166 166 self.dict['url'] = url_or_path
167 167 if not 'name' in self.dict:
168 168 self.dict['name'] = os.path.basename(url_or_path)
169 169 else:
170 170 return 'ERROR: Verify "file_path" or "url_or_path" parameters: <class "str"> or choose one'
171 171
172 172 #if not 'format' in self.dict:
173 173 # self.str = ''.join(pathlib.Path(file_path).suffixes)
174 174 # if len(self.str) > 0:
175 175 # self.dict['format'] = self.str.upper()[1:]
176 176
177 177 #-------------------------PACKAGE SHOW-----------------------#
178 178 try:
179 179 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
180 180 except:
181 181 _, exc_value, _ = sys.exc_info()
182 182 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
183 183 return exc_value
184 184
185 185 resources_name = []
186 186 for u in dataset_show:
187 187 resources_name.append(u['name'].lower())
188 188
189 189 if self.dict['name'].lower() in resources_name:
190 190 if not ignore_repetition:
191 191 return 'ERROR:: "%s" resource already exist in this dataset' % (self.dict['name'])
192 192 print('WARRING:: "'+ str(self.dict['name']) +'" resource already exist in this dataset')
193 193 #------------------------------------------------------------#
194 194 try:
195 195 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, voc_file_type=file_type, **self.dict)
196 196 except:
197 197 _, exc_value, _ = sys.exc_info()
198 198 return exc_value
199 199
200 200 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
201 201 # Agregar si es interruptido por teclado
202 202 '''
203 203 FINALIDAD:
204 204 Funcion para subir multiples archivos al repositorio del ROJ.
205 205
206 206 PARAMETROS DISPONIBLES:
207 207 CONSULTAR: "GUIA DE SCRIPT.pdf"
208 208
209 209 ESTRUCTURA:
210 210 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
211 211 '''
212 212 #-------------------------PACKAGE SHOW-----------------------#
213 213 try:
214 214 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
215 215 except:
216 216 _, exc_value, _ = sys.exc_info()
217 217 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
218 218 return exc_value
219 219 #------------------------------------------------------------#
220 220 resources_name = []
221 221 for u in dataset_show:
222 222 resources_name.append(u['name'].lower())
223 223 #------------------------------------------------------------#
224 224 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
225 225 for key1, value1 in kwargs.items():
226 226 if not key1 in self.list:
227 227 self.dict[key1] = value1
228 228 #------------------------------------------------------------#
229 229 if not 'others' in kwargs:
230 230 self.dict['others'] = ''
231 231 else:
232 232 if isinstance(kwargs['others'], list):
233 233 self.dict['others'] = json.dumps(kwargs['others'])
234 234 #------------------------------------------------------------#
235 235 total_list = []
236 236 #---------------CASO : "path" or "path_list"-----------------#
237 237 if type(path_files) is list:
238 238 if len(path_files) != 0:
239 239 path_files.sort()
240 240 for u in path_files:
241 241 if os.path.isfile(u):
242 242 if os.path.basename(u).lower() in resources_name:
243 243 if not ignore_repetition:
244 244 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
245 245 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
246 246 else:
247 247 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
248 248 else:
249 249 return 'File "%s" does not exist' % (u)
250 250 else:
251 251 return 'ERROR:: "path_list is empty"'
252 252
253 253 elif type(path_files) is str:
254 254 if os.path.isdir(path_files):
255 255 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
256 256 path_order.sort()
257 257 if path_order:
258 258 for name in path_order:
259 259 if name.lower() in resources_name:
260 260 if not ignore_repetition:
261 261 return 'ERROR:: "%s" file already exist in this dataset' % (name)
262 262 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
263 263 else:
264 264 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
265 265 else:
266 266 return "ERROR:: There aren't files in this directory"
267 267 else:
268 268 return 'ERROR:: Directory "%s" does not exist' % (path_files)
269 269 else:
270 270 return 'ERROR:: "path_files" must be a str or list'
271 271 #------------------------------------------------------------#
272 272 try:
273 273 uuid.UUID(str(dataset_id), version=4)
274 274 package_id_or_name = '"id": "' + str(dataset_id) + '"'
275 275 except ValueError:
276 276 package_id_or_name = '"name": "' + str(dataset_id) + '"'
277 277 #------------------------------------------------------------#
278 278 blocks = [[]]
279 279 size_file = 0
280 280 count_file = 0
281 281 inter_num = 0
282 282 for value in total_list:
283 283 if value['size'] > 1024 * 1024 * float(max_size):
284 284 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
285 285 if not 1 <= int(max_count) <= 999:
286 286 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
287 287
288 288 size_file = size_file + value['size']
289 289 count_file = count_file + 1
290 290 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
291 291 del value['size']
292 292 blocks[inter_num].append(value)
293 293 else:
294 294 inter_num = inter_num + 1
295 295 size_file = value['size']
296 296 count_file = 1
297 297 blocks.append([])
298 298 del value['size']
299 299 blocks[inter_num].append(value)
300 300 #------------------------------------------------------------#
301 301 if len(blocks[0]) > 0:
302 302 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
303 303 for count1, block in enumerate(blocks):
304 304 print('---- BLOCK N°{} ----'.format(count1 + 1))
305 305 resource_extend = []
306 306 files_dict = {}
307 307 for count2, value2 in enumerate(block):
308 308 value2['file_date'] = file_date
309 309 value2['voc_file_type'] = file_type
310 310 value2.update(self.dict)
311 311
312 312 #if not 'format' in value2:
313 313 # format = ''.join(pathlib.Path(value2['name']).suffixes)
314 314 # if len(format) > 0:
315 315 # value2['format'] = format.upper()[1:]
316 316
317 317 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
318 318 del value2['upload']
319 319 resource_extend.append(value2)
320 320
321 321 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
322 322 try:
323 323 result = self.ckan.call_action(
324 324 'package_revise',
325 325 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
326 326 files=files_dict
327 327 )
328 328 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
329 329 if len(blocks) == count1 + 1:
330 330 return result
331 331 except:
332 332 print('ERROR :: Use the "print" for more information')
333 333 _, exc_value, _ = sys.exc_info()
334 334 return exc_value
335 335 else:
336 336 return "ERROR:: No file(s) found to upload"
337 337
338 338 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
339 339 # Agregar si es interruptido por teclado
340 340 '''
341 341 FINALIDAD:
342 342 Funcion para subir multiples archivos al repositorio del ROJ.
343 343
344 344 PARAMETROS DISPONIBLES:
345 345 CONSULTAR: "GUIA DE SCRIPT.pdf"
346 346
347 347 ESTRUCTURA:
348 348 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
349 349 '''
350 350 #-------------------------PACKAGE SHOW-----------------------#
351 351 try:
352 352 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
353 353 except:
354 354 _, exc_value, _ = sys.exc_info()
355 355 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
356 356 return exc_value
357 357 #------------------------------------------------------------#
358 358 resources_name = []
359 359 for u in dataset_show:
360 360 resources_name.append(u['name'].lower())
361 361 #------------------------------------------------------------#
362 362
363 363 params_dict = {'upload':[], 'name':[]}
364 364 #if not 'format' in kwargs:
365 365 # params_dict.update({'format':[]})
366 366 #---------------CASO : "path" or "path_list"-----------------#
367 367 if type(path_files) is list:
368 368 if len(path_files) != 0:
369 369 path_files.sort()
370 370 for u in path_files:
371 371 if os.path.isfile(u):
372 372 if os.path.basename(u).lower() in resources_name:
373 373 if not ignore_repetition:
374 374 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
375 375 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
376 376 else:
377 377 params_dict['upload'].append(open(u, 'rb'))
378 378 params_dict['name'].append(os.path.basename(u))
379 379 #if not 'format' in kwargs:
380 380 # format = ''.join(pathlib.Path(u).suffixes)
381 381 # if len(format) > 0:
382 382 # params_dict['format'].append(format.upper()[1:])
383 383 # else:
384 384 # params_dict['format'].append('')
385 385 else:
386 386 return 'File "%s" does not exist' % (u)
387 387 else:
388 388 return 'ERROR:: "path_list is empty"'
389 389 elif type(path_files) is str:
390 390 if os.path.isdir(path_files):
391 391 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
392 392 path_order.sort()
393 393 if path_order:
394 394 for name in path_order:
395 395 if name.lower() in resources_name:
396 396 if not ignore_repetition:
397 397 return 'ERROR:: "%s" file already exist in this dataset' % (name)
398 398 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
399 399 else:
400 400 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
401 401 params_dict['name'].append(name)
402 402 #if not 'format' in kwargs:
403 403 # format = ''.join(pathlib.Path(name).suffixes)
404 404 # if len(format) > 0:
405 405 # params_dict['format'].append(format.upper()[1:])
406 406 # else:
407 407 # params_dict['format'].append('')
408 408 else:
409 409 return "ERROR:: There aren't files in this directory"
410 410 else:
411 411 return 'ERROR:: Directory "%s" does not exist' % (path_files)
412 412 else:
413 413 return 'ERROR:: "path_files" must be a str or list'
414 414 #------------------------------------------------------------#
415 415 params_no_dict = {'package_id': dataset_id}
416 416 if type(date_files) is list:
417 417 params_dict['file_date'] = date_files
418 418 else:
419 419 params_no_dict['file_date'] = date_files
420 420
421 421 if type(type_files) is list:
422 422 params_dict['voc_file_type'] = type_files
423 423 else:
424 424 params_no_dict['voc_file_type'] = type_files
425 425
426 426 for key1, value1 in kwargs.items():
427 427 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
428 428 if type(value1) is list:
429 429 params_dict[key1] = value1
430 430 else:
431 431 params_no_dict[key1] = value1
432 432 #------------------------------------------#
433 433 if not 'others' in kwargs:
434 434 params_no_dict['others'] = ''
435 435 else:
436 436 if isinstance(kwargs['others'], tuple):
437 437 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
438 438 elif isinstance(kwargs['others'], list):
439 439 params_no_dict['others'] = json.dumps(kwargs['others'])
440 440 elif isinstance(kwargs['others'], str):
441 441 params_no_dict['others'] = kwargs['others']
442 442 else:
443 443 return 'ERROR:: "others" must be a tuple, list or str'
444 444 #------------------------------------------#
445 445 len_params_dict = []
446 446 for value2 in params_dict.values():
447 447 len_params_dict.append(len(value2))
448 448
449 449 if len(list(set(len_params_dict))) > 1:
450 450 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
451 451 #------------------------------------------------------------#
452 452 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
453 453 for v in range(len(params_dict['name'])):
454 454 try:
455 455 send = {}
456 456 for key_dict, value_dict in params_dict.items():
457 457 send[key_dict] = value_dict[v]
458 458 for key_no_dict, value_no_dict in params_no_dict.items():
459 459 send[key_no_dict] = value_no_dict
460 460
461 461 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
462 462 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
463 463 except:
464 464 _, exc_value, _ = sys.exc_info()
465 465 self.list.append(exc_value)
466 466 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
467 467 return self.list
468 468 #------------------------------------------------------------#
469 469
470 470 def show(self, type_option, id, **kwargs):
471 471 '''
472 472 FINALIDAD:
473 473 Funcion personalizada para una busqueda en especifico.
474 474
475 475 PARAMETROS DISPONIBLES:
476 476 CONSULTAR: "GUIA DE SCRIPT.pdf"
477 477
478 478 ESTRUCTURA:
479 479 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
480 480 '''
481 481 if type(type_option) is str:
482 482 try:
483 483 if type_option == 'dataset':
484 484 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
485 485 elif type_option == 'resource':
486 486 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
487 487 elif type_option == 'project':
488 488 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
489 489 elif type_option == 'collaborator':
490 490 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
491 491 elif type_option == 'member':
492 492 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
493 493 elif type_option == 'vocabulary':
494 494 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
495 495 elif type_option == 'tag':
496 496 if not 'vocabulary_id' in kwargs:
497 497 print('Missing "vocabulary_id" value: assume it is a free tag')
498 498 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
499 499 elif type_option == 'user':
500 500 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
501 501 elif type_option == 'job':
502 502 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
503 503 else:
504 504 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
505 505 except:
506 506 _, exc_value, _ = sys.exc_info()
507 507 return exc_value
508 508 else:
509 509 return 'ERROR:: "type_option" must be a str'
510 510
511 511 def search(self, type_option, query=None, **kwargs):
512 512 '''
513 513 FINALIDAD:
514 514 Funcion personalizada para busquedas que satisfagan algun criterio.
515 515
516 516 PARAMETROS DISPONIBLES:
517 517 CONSULTAR: "GUIA DE SCRIPT.pdf"
518 518
519 519 ESTRUCTURA:
520 520 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
521 521 '''
522 522 if type(type_option) is str:
523 523 try:
524 524 if type_option == 'dataset':
525 525 key_replace = ['fq', 'fq_list', 'include_private']
526 526 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
527 527 for key1, value1 in kwargs.items():
528 528 if not key1 in key_replace:
529 529 if key1 in key_point:
530 530 self.dict[key1.replace('_', '.')] = value1
531 531 else:
532 532 self.dict[key1] = value1
533 533
534 534 if query is not None:
535 535 if type(query) is dict:
536 536 self.dict['fq_list'] = []
537 537 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
538 538 #----------------------------------------------------#
539 539 if 'dataset_start_date' in query:
540 540 if type(query['dataset_start_date']) is str:
541 541 try:
542 542 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
543 543 if len(query['dataset_start_date']) != 10:
544 544 return '"dataset_start_date", must be: <YYYY-MM-DD>'
545 545 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
546 546 self.list.append('dataset_start_date')
547 547 except:
548 548 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
549 549 else:
550 550 return '"dataset_start_date" must be <str>'
551 551 #----------------------------------------------------#
552 552 if 'dataset_end_date' in query:
553 553 if type(query['dataset_end_date']) is str:
554 554 try:
555 555 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
556 556 if len(query['dataset_end_date']) != 10:
557 557 return '"dataset_end_date", must be: <YYYY-MM-DD>'
558 558
559 559 if 'dataset_start_date' in query:
560 560 if query['dataset_start_date'] > query['dataset_end_date']:
561 561 return '"dataset_end_date" must be greater than "dataset_start_date"'
562 562
563 563 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
564 564 self.list.append('dataset_end_date')
565 565 except:
566 566 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
567 567 else:
568 568 return '"dataset_end_date" must be <str>'
569 569 #----------------------------------------------------#
570 570 for key, value in query.items():
571 571 if value is not None and not key in self.list:
572 572 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
573 573 else:
574 574 return '"query" must be <dict>'
575 575
576 576 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
577 577
578 578 elif type_option == 'resource':
579 579 for key1, value1 in kwargs.items():
580 580 if key1 != 'fields':
581 581 self.dict[key1] = value1
582 582
583 583 if query is not None:
584 584 if type(query) is dict:
585 585 #----------------------------------------------------#
586 586 if 'file_date_min' in query:
587 587 if type(query['file_date_min']) is str:
588 588 try:
589 589 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
590 590 if len(query['file_date_min']) != 10:
591 591 return '"file_date_min", must be: <YYYY-MM-DD>'
592 592 except:
593 593 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
594 594 else:
595 595 return '"file_date_min" must be <str>'
596 596 #----------------------------------------------------#
597 597 if 'file_date_max' in query:
598 598 if type(query['file_date_max']) is str:
599 599 try:
600 600 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
601 601 if len(query['file_date_max']) != 10:
602 602 return '"file_date_max", must be: <YYYY-MM-DD>'
603 603
604 604 if 'file_date_min' in query:
605 605 if query['file_date_min'] > query['file_date_max']:
606 606 return '"file_date_max" must be greater than "file_date_min"'
607 607 except:
608 608 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
609 609 else:
610 610 return '"file_date_max" must be <str>'
611 611 #----------------------------------------------------#
612 612 self.dict['query'] = query
613 613 else:
614 614 return '"query" must be <dict>'
615 615 return getattr(self.ckan.action, 'resources_search')(**self.dict)
616 616
617 617 elif type_option == 'tag':
618 618 for key1, value1 in kwargs.items():
619 619 if key1 != 'fields':
620 620 self.dict[key1] = value1
621 621
622 622 if not 'vocabulary_id' in kwargs:
623 623 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
624 624 else:
625 625 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
626 626
627 627 if query is not None:
628 628 if type(query) is dict:
629 629 if 'search' in query:
630 630 if type(query['search']) is list or type(query['search']) is str:
631 631 self.dict['query'] = query['search']
632 632 else:
633 633 return '"search" must be <list> or <str>'
634 634 else:
635 635 return '"query" must be <dict>'
636 636 return getattr(self.ckan.action, 'tag_search')(**self.dict)
637 637
638 638 else:
639 639 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
640 640
641 641 except:
642 642 _, exc_value, _ = sys.exc_info()
643 643 return exc_value
644 644 else:
645 645 return 'ERROR:: "type_option" must be <str>'
646 646
647 647 def create(self, type_option, select=None, **kwargs):
648 648 '''
649 649 FINALIDAD:
650 650 Funcion personalizada para crear.
651 651
652 652 PARAMETROS DISPONIBLES:
653 653 CONSULTAR: "GUIA DE SCRIPT.pdf"
654 654
655 655 ESTRUCTURA:
656 656 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
657 657 '''
658 658 if type(type_option) is str:
659 659 try:
660 660 if type_option == 'dataset':
661 661 return getattr(self.ckan.action, 'package_create')(**kwargs)
662 662 if type_option == 'resource':
663 663 return resource.resource_create(self, **kwargs)
664 664 elif type_option == 'project':
665 665 return getattr(self.ckan.action, 'organization_create')(**kwargs)
666 666 elif type_option == 'member':
667 667 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
668 668 elif type_option == 'collaborator':
669 669 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
670 670 elif type_option == 'vocabulary':
671 671 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
672 672 elif type_option == 'tag':
673 673 return getattr(self.ckan.action, 'tag_create')(**kwargs)
674 674 elif type_option == 'user':
675 675 return getattr(self.ckan.action, 'user_create')(**kwargs)
676 676 elif type_option == 'views':
677 677 if 'resource' == select:
678 678 self.list = ['package']
679 679 for key1, value1 in kwargs.items():
680 680 if not key1 in self.list:
681 681 self.dict[key1] = value1
682 682 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
683 683 elif 'dataset' == select:
684 684 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
685 685 else:
686 686 return 'ERROR:: "select = %s" is not accepted' % (select)
687 687 else:
688 688 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
689 689 except:
690 690 _, exc_value, _ = sys.exc_info()
691 691 return exc_value
692 692 else:
693 693 return 'ERROR:: "type_option" must be <str>'
694 694
695 695 def patch(self, type_option, **kwargs):
696 696 '''
697 697 FINALIDAD:
698 698 Funciones personalizadas para actualizar
699 699
700 700 PARAMETROS DISPONIBLES:
701 701 CONSULTAR: "GUIA DE SCRIPT.pdf"
702 702
703 703 ESTRUCTURA:
704 704 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
705 705 '''
706 706 if type(type_option) is str:
707 707 try:
708 708 if type_option == 'dataset':
709 709 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
710 710 return getattr(self.ckan.action, 'package_patch')(**kwargs)
711 711 elif type_option == 'project':
712 712 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
713 713 elif type_option == 'resource':
714 714 return resource.resource_patch(self, **kwargs)
715 715 elif type_option == 'member':
716 716 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
717 717 elif type_option == 'collaborator':
718 718 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
719 719 else:
720 720 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
721 721 except:
722 722 _, exc_value, _ = sys.exc_info()
723 723 return exc_value
724 724 else:
725 725 return 'ERROR:: "type_option" must be <str>'
726 726
727 727 def delete(self, type_option, select=None, **kwargs):
728 728 '''
729 729 FINALIDAD:
730 730 Función personalizada para eliminar y/o purgar.
731 731
732 732 PARAMETROS DISPONIBLES:
733 733 CONSULTAR: "GUIA DE SCRIPT.pdf"
734 734
735 735 ESTRUCTURA:
736 736 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
737 737 '''
738 738 if type(type_option) is str:
739 739 try:
740 740 if type_option == 'dataset':
741 741 if select is None:
742 742 return 'ERROR:: "select" must not be "None"'
743 743 else:
744 744 if 'delete' == select:
745 745 return getattr(self.ckan.action, 'package_delete')(**kwargs)
746 746 elif 'purge' == select:
747 747 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
748 748 else:
749 749 return 'ERROR:: "select = %s" is not accepted' % (select)
750 750 elif type_option == 'project':
751 751 if select is None:
752 752 return 'ERROR:: "select" must not be "None"'
753 753 else:
754 754 if 'delete' == select:
755 755 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
756 756 elif 'purge' == select:
757 757 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
758 758 else:
759 759 return 'ERROR:: "select = %s" is not accepted' % (select)
760 760 elif type_option == 'resource':
761 761 if select is None:
762 762 return 'ERROR:: "select" must not be "None"'
763 763 else:
764 764 return resource.resource_delete(self, select, **kwargs)
765 765 elif type_option == 'vocabulary':
766 766 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
767 767 elif type_option == 'tag':
768 768 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
769 769 elif type_option == 'user':
770 770 return getattr(self.ckan.action, 'user_delete')(**kwargs)
771 771 else:
772 772 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
773 773 except:
774 774 _, exc_value, _ = sys.exc_info()
775 775 return exc_value
776 776 else:
777 777 return 'ERROR:: "type_option" must be <str>'
778 778
779 779 def f_status_note(self, total, result, path):
780 780 file_txt = open(path+'status_note.txt', 'w')
781 781 file_txt = open(path+'status_note.txt', 'a')
782 782
783 783 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
784 784 file_txt.write(''+ os.linesep)
785 785 for u in result['name']:
786 786 file_txt.write(' - '+ u + os.linesep)
787 787 file_txt.write(''+ os.linesep)
788 788
789 789 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
790 790 file_txt.write(''+ os.linesep)
791 791 if len(total['name'])-len(result['name']) != 0:
792 792 for u in total['name']:
793 793 if not u in result['name']:
794 794 file_txt.write(' - '+ u + os.linesep)
795 795 else:
796 796 file_txt.write(' "None"'+ os.linesep)
797 797
798 798 def f_name(self, name_dataset, ext, tempdir):
799 799 while self.check:
800 800 self.str = ''
801 801 if self.cont == 0:
802 802 if os.path.exists(tempdir + name_dataset + ext):
803 803 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
804 804 else:
805 805 self.check = self.check * 0
806 806 self.str = name_dataset + ext
807 807 else:
808 808 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
809 809 self.check = self.check * 0
810 810 self.str = name_dataset+'('+str(self.cont)+')'+ ext
811 811 self.cont = self.cont+1
812 812 return self.str
813 813
814 814 def f_zipdir(self, path, ziph, zip_name):
815 815 for root, _, files in os.walk(path):
816 816 print('.....')
817 817 print('Creating: "{}" >>'.format(zip_name))
818 818 for __file in tqdm(iterable=files, total=len(files)):
819 819 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
820 820 ziph.write(os.path.join(root, __file), new_dir)
821 821 print('Created >>')
822 822
823 823 def download_by_step(self, response, tempdir_name):
824 824 try:
825 825 # ---------- REPLACE URL --------- #
826 826 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
827 827 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
828 828 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
829 829 #----------------------------------#
830 830 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
831 831 if resp.status_code == 200:
832 832 with open(tempdir_name+response['name'], 'wb') as file:
833 833 for chunk in resp.iter_content(chunk_size = self.chunk_size):
834 834 if chunk:
835 835 file.write(chunk)
836 836 except requests.exceptions.RequestException:
837 837 pass
838 838
839 839 def download_files(self, **kwargs):
840 840 '''
841 841 FINALIDAD:
842 842 Funcion personalizada para la descarga de archivos existentes de un dataset.
843 843
844 844 PARAMETROS DISPONIBLES:
845 845 CONSULTAR: "GUIA DE SCRIPT.pdf"
846 846
847 847 ESTRUCTURA:
848 848 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
849 849 '''
850 850 dict_local = {}
851 851 #----------------------------------------------#
852 852 if 'zip' in kwargs:
853 853 if type(kwargs['zip']) is not bool:
854 854 return 'ERROR:: "zip" must be: <class "bool">'
855 855 else:
856 856 dict_local['zip'] = kwargs['zip']
857 857 else:
858 858 dict_local['zip'] = False
859 859 #----------------------------------------------#
860 860 if 'status_note' in kwargs:
861 861 if type(kwargs['status_note']) is not bool:
862 862 return 'ERROR:: "status_note" must be: <class "bool">'
863 863 else:
864 864 dict_local['status_note'] = kwargs['status_note']
865 865 else:
866 866 dict_local['status_note'] = False
867 867 #----------------------------------------------#
868 868 if 'path' in kwargs:
869 869 if type(kwargs['path']) is str:
870 870 if os.path.isdir(kwargs['path']) == False:
871 871 return 'ERROR:: "path" does not exist'
872 872 else:
873 873 if kwargs['path'][-1:] != self.separator:
874 874 dict_local['path'] = kwargs['path']+self.separator
875 875 else:
876 876 dict_local['path'] = kwargs['path']
877 877
878 878 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
879 879 if int(platform.python_version()[0]) == 3:
880 880 try:
881 881 file_txt = open(txt, 'w')
882 882 file_txt.close()
883 883 os.remove(txt)
884 884 except PermissionError:
885 885 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
886 886 else:
887 887 try:
888 888 file_txt = open(txt, 'w')
889 889 file_txt.close()
890 890 os.remove(txt)
891 891 except:
892 892 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
893 893 else:
894 894 return 'ERROR:: "path" must be: <class "str">'
895 895 else:
896 896 dict_local['path'] = ''
897 897 #----------------------------------------------#
898 898 for key, value in kwargs.items():
899 899 if not key in dict_local:
900 900 self.dict[key] = value
901 901 try:
902 902 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
903 903 except:
904 904 _, exc_value, _ = sys.exc_info()
905 905 return exc_value
906 906
907 907 if len(response) != 0:
908 908 #--------------TEMP PATH---------------#
909 909 if dict_local['zip']:
910 910 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
911 911 os.mkdir(tempdir+kwargs['id'])
912 912 dir_name = tempdir + kwargs['id'] + self.separator
913 913 else:
914 914 dir = self.f_name(kwargs['id'], '', dict_local['path'])
915 915 os.mkdir(dict_local['path'] + dir)
916 916 dir_name = dict_local['path'] + dir + self.separator
917 917 #-----------DOWNLOAD FILES-------------#
918 918 print('.....')
919 919 print('Downloading "{}" file(s) >>'.format(len(response)))
920 920 name_total = {'name': []}
921 921 with concurrent.futures.ThreadPoolExecutor() as executor:
922 922 for u in tqdm(iterable=response, total=len(response)):
923 923 name_total['name'].append(u['name'])
924 924 executor.submit(self.download_by_step, u, dir_name)
925 925 name_check = {}
926 926 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
927 927 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
928 928 #--------------------------------------#
929 929 if len(name_check['name']) != 0:
930 930 #----------Status Note---------#
931 931 if dict_local['status_note']:
932 932 print('.....')
933 933 print('Creating: "status_note.txt" >>')
934 934 self.f_status_note(name_total, name_check, dir_name)
935 935 print('Created>>')
936 936 #----------ZIP CREATE----------#
937 937 if dict_local['zip']:
938 938 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
939 939 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
940 940 self.f_zipdir(dir_name, ziph, zip_name)
941 941 ziph.close()
942 942 #Delete Temporal Path
943 943 if os.path.exists(tempdir[:-1]):
944 944 shutil.rmtree(tempdir[:-1])
945 945 #------------------------------#
946 946 print('.....')
947 947 return 'DOWNLOAD FINISHED'
948 948 else:
949 949 #Delete Temporal Path
950 950 if dict_local['zip']:
951 951 if os.path.exists(tempdir[:-1]):
952 952 shutil.rmtree(tempdir[:-1])
953 953 else:
954 954 if os.path.exists(dir_name[:-1]):
955 955 shutil.rmtree(dir_name[:-1])
956 956 return 'NO FILES WERE DOWNLOADED'
957 957 else:
958 958 return 'FILES NOT FOUND'
959 959
960 960 def download_files_advance(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
961 961 '''
962 962 FINALIDAD:
963 963 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
964 964
965 965 PARAMETROS DISPONIBLES:
966 966 CONSULTAR: "GUIA DE SCRIPT.pdf"
967 967
968 968 ESTRUCTURA:
969 969 <access_name>.download_files_advance(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
970 970 '''
971 971 #------------------ PATH ----------------------#
972 972 if isinstance(path, str):
973 973 if os.path.isdir(path):
974 974 if not path.endswith(os.sep):
975 975 path = path + os.sep
976 976 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
977 977 try:
978 978 file_txt = open(test_txt, 'w')
979 979 file_txt.close()
980 980 os.remove(test_txt)
981 981 except:
982 982 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
983 983 else:
984 984 return 'ERROR:: "path" does not exist'
985 985 else:
986 986 return 'ERROR:: "path" must be: <class "str">'
987 987
988 988 #------------------ PROCESSES -----------------#
989 989 if not isinstance(processes, int):
990 990 return 'ERROR:: "processes" must be: <class "int">'
991 991
992 992 #------------------ ID OR NAME ----------------#
993 993 if isinstance(id_or_name, str):
994 994 id_or_name = [id_or_name]
995 995 elif isinstance(id_or_name, list):
996 996 id_or_name = list(map(str, id_or_name))
997 997 else:
998 998 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
999 999 #----------------------------------------------#
1000 1000 arguments = {
1001 1001 '--apikey': self.Authorization,
1002 1002 '--ckan-user': None,
1003 1003 '--config': None,
1004 1004 '--datapackages': path,
1005 1005 '--datastore-fields': False,
1006 1006 '--get-request': False,
1007 1007 '--insecure': not self.verify,
1008 '--log': '/home/soporte/DUMP/download.txt',
1009 1008 '--processes': str(processes),
1010 1009 '--quiet': False,
1011 1010 '--remote': self.url,
1012 1011 '--worker': False,
1012 #'--log': 'log.txt',
1013 1013 #'--all': False,
1014 1014 #'--gzip': False,
1015 1015 #'--output': None,
1016 1016 #'--max-records': None,
1017 1017 #'--output-json': False,
1018 1018 #'--output-jsonl': False,
1019 1019 #'--create-only': False,
1020 1020 #'--help': False,
1021 1021 #'--input': None,
1022 1022 #'--input-json': False,
1023 1023 #'--start-record': '1',
1024 1024 #'--update-only': False,
1025 1025 #'--upload-logo': False,
1026 1026 #'--upload-resources': False,
1027 1027 #'--version': False,
1028 1028 'ID_OR_NAME': id_or_name,
1029 1029 'datasets': True,
1030 1030 'dump': True,
1031 1031 #'ACTION_NAME': None,
1032 1032 #'KEY:JSON': [],
1033 1033 #'KEY=STRING': [],
1034 1034 #'KEY@FILE': [],
1035 1035 #'action': False,
1036 1036 #'delete': False,
1037 1037 #'groups': False,
1038 1038 #'load': False,
1039 1039 #'organizations': False,
1040 1040 #'related': False,
1041 1041 #'search': False,
1042 1042 #'users': False
1043 1043 }
1044 1044 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
@@ -1,226 +1,234
1 1 #from ckanapi.datapackage import populate_schema_from_datastore
2 2 from ckanapi.cli import workers, dump
3 3 from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe
4 4 from datetime import datetime
5 5 import sys
6 6 import json
7 7 import os
8 8 import requests
9 9 import six
10 10
11 11 if sys.version_info.major == 3:
12 12 from urllib.parse import urlparse
13 13 else:
14 14 import urlparse
15 15
16 16 DL_CHUNK_SIZE = 100 * 1024
17 17
18 18 def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs):
19 19 if worker_pool is None:
20 20 worker_pool = workers.worker_pool
21 21 if stdout is None:
22 22 stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__)
23 23 if stderr is None:
24 24 stderr = getattr(sys.stderr, 'buffer', sys.stderr)
25 25
26 26 if arguments['--worker']:
27 27 return dump.dump_things_worker(ckan, thing, arguments)
28 28 '''
29 29 log = None
30 30 if arguments['--log']:
31 31 log = open(arguments['--log'], 'a')
32 32 '''
33 33 jsonl_output = stdout
34 34 if arguments['--datapackages']:
35 35 jsonl_output = open(os.devnull, 'wb')
36 36
37 37 names = arguments['ID_OR_NAME']
38 38
39 39 if names and isinstance(names[0], dict):
40 40 names = [rec.get('name',rec.get('id')) for rec in names]
41 41 '''
42 42 if arguments['--datapackages']:
43 43 arguments['--datastore-fields'] = True
44 44 '''
45 45 #----------------------------#
46 46 filtered_urls = {}
47 for name in names:
47 for val in names:
48 48 try:
49 response = getattr(ckan.action, 'url_resources')(id=name, **kwargs)
49 filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs)
50 50 except:
51 51 _, exc_value, _ = sys.exc_info()
52 52 return exc_value
53 filtered_urls[name] = response
54 53 #----------------------------#
55 54
56 55 cmd = dump._worker_command_line(thing, arguments)
57 56 processes = int(arguments['--processes'])
58 57 if hasattr(ckan, 'parallel_limit'):
59 58 processes = min(processes, ckan.parallel_limit)
60 59 stats = completion_stats(processes)
61 60 pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names))
62 61
63 62 results = {}
64 63 expecting_number = 0
65 64 with quiet_int_pipe() as errors:
66 65 for job_ids, finished, result in pool:
67 66 if not result:
68 67 return 1
69 68 timestamp, error, record = json.loads(result.decode('utf-8'))
70 69 results[finished] = record
71 70
71 #----------------------------------------#
72 datapackages_path = arguments['--datapackages']
73 datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path)
74 #----------------------------------------#
72 75 if not arguments['--quiet']:
73 stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Dataset Name: {4}\n'.format(
76 stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format(
74 77 finished,
75 78 job_ids,
76 79 next(stats),
77 80 error,
81 datapackage_dir,
78 82 record.get('name', '') if record else '',
79 83 ).encode('utf-8'))
80 84 '''
81 85 if log:
82 86 log.write(compact_json([
83 87 timestamp,
84 88 finished,
85 89 error,
86 90 record.get('name', '') if record else None,
87 91 ]) + b'\n')
88 92 '''
89 datapackages_path = arguments['--datapackages']
90 93 if datapackages_path:
91 create_datapackage_change(record, filtered_urls[record.get('name', '')], datapackages_path, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'])
94 try:
95 filter_url = filtered_urls[record.get('name', '')]
96 except:
97 filter_url = filtered_urls[record.get('id', '')]
98 create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'])
99
92 100 while expecting_number in results:
93 101 record = results.pop(expecting_number)
94 102 if record:
95 103 jsonl_output.write(compact_json(record, sort_keys=True) + b'\n')
96 104 expecting_number += 1
97 105 if 'pipe' in errors:
98 106 return 1
99 107 if 'interrupt' in errors:
100 108 return 2
101 109
102 def create_datapackage_change(record, filtered_url, base_path, stderr, apikey, host_url, insecure):
110 def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure):
103 111 resource_formats_to_ignore = ['API', 'api']
104 #----------------------------------------#
105 datapackage_dir = name_no_repetition(record.get('name', ''), base_path)
106 #----------------------------------------#
112
107 113 os.makedirs(os.path.join(datapackage_dir, 'data'))
108 114 record['path'] = datapackage_dir
109 115
110 116 ckan_resources = []
111 117 for resource in record.get('resources', []):
112 118 if resource['format'] in resource_formats_to_ignore:
113 119 continue
114 120
115 121 if not {'name': resource['name'], 'url': resource['url']} in filtered_url:
116 122 continue
117 123
118 124 if len(resource['url']) == 0:
119 125 continue
120 126
121 127 filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource')
122 128 resource['path'] = os.path.join(datapackage_dir, 'data', filename)
123 129
124 130 cres = create_resource_change(resource, stderr, apikey, host_url, insecure)
125 131 if not cres:
126 132 continue
127 133 '''
128 134 #----------------------------------------#
129 135 dres = {'path': os.path.join('data', filename),
130 136 'description': cres.get('description', ''),
131 137 'format': cres.get('format', ''),
132 138 'name': cres.get('name', ''),
133 139 'title': cres.get('name', '').title()}
134 140 #----------------------------------------#
135 141 populate_schema_from_datastore(cres, dres)
136 142 '''
137 143 ckan_resources.append(resource)
138 144
139 145 dataset = dict(record, resources=ckan_resources)
140 146 datapackage = dataset_to_datapackage_change(dataset)
141 147
142 148 json_path = os.path.join(datapackage_dir, 'datapackage.json')
143 149 with open(json_path, 'wb') as out:
144 150 out.write(pretty_json(datapackage))
145 151
146 152 return datapackage_dir, datapackage, json_path
147 153
148 154 def create_resource_change(resource, stderr, apikey, host_url, insecure):
149 155 # ---------- REPLACE URL --------- #
150 156 if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe':
151 157 resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc,
152 158 urlparse(host_url).scheme + '://' + urlparse(host_url).netloc)
153 159 #----------------------------------#
154 160 try:
155 161 r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure)
156 162 #---------------------------------------#
157 163 try:
158 164 r.raise_for_status()
159 165 except requests.exceptions.HTTPError as e:
160 166 return False
161 167 #---------------------------------------#
162 168 with open(resource['path'], 'wb') as f:
163 169 for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE):
164 170 if chunk:
165 171 f.write(chunk)
166 172
167 173 except requests.ConnectionError:
168 174 stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8'))
169 175 except requests.exceptions.RequestException as e:
170 176 stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
171 177 except Exception as e:
172 178 stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
173 179 return resource
174 180
175 181 def dataset_to_datapackage_change(dataset_dict):
176 182 dp = {'name': dataset_dict['name'],
177 183 'id': dataset_dict['id'],
178 184 'path': dataset_dict['path'],
179 185 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")}
180 186
181 187 resources = dataset_dict.get('resources')
182 188 if resources:
183 189 dp['resources'] = [convert_to_datapackage_resource_change(r)
184 190 for r in resources]
185 191 return dp
186 192
187 193 def convert_to_datapackage_resource_change(resource_dict):
188 194 resource = {}
189 195
190 196 if resource_dict.get('id'):
191 197 resource['id'] = resource_dict['id']
192 198
193 199 if resource_dict.get('name'):
194 200 resource['name'] = resource_dict['name']
195 201
196 202 if resource_dict.get('path'):
197 resource['path'] = resource_dict['path']
203 if os.path.isfile(resource_dict['path']):
204 resource['path'] = resource_dict['path']
205 else:
206 resource['url'] = resource_dict['url']
198 207
199 208 schema = resource_dict.get('schema')
200 209 if isinstance(schema, six.string_types):
201 210 try:
202 211 resource['schema'] = json.loads(schema)
203 212 except ValueError:
204 213 resource['schema'] = schema
205 214 elif isinstance(schema, dict):
206 215 resource['schema'] = schema
207
208 216 return resource
209 217
210 218 def name_no_repetition(name, dir, option=''):
211 219 count = 0
212 220 while True:
213 221 count = count + 1
214 222 if not os.path.exists(os.path.join(dir, name)):
215 223 if option == 'resource':
216 224 return name
217 225 else:
218 226 return os.path.join(dir, name)
219 227
220 228 elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)):
221 229 if option == 'resource':
222 230 return '('+str(count)+')'+name
223 231 else:
224 232 return os.path.join(dir, '('+str(count)+')'+name)
225 233 else:
226 234 pass No newline at end of file
@@ -1,343 +1,345
1 1 import json
2 2 import uuid
3 3 import os
4 4 import sys
5 5
6 6 def resource_create(self, package_id, file_type, others='', max_size=100, max_count=500, ignore_repetition=False, **kwargs):
7 7 #---------------------------------------------------------------------#
8 8 kwargs['voc_file_type'] = file_type
9 9 kwargs['others'] = others
10 10
11 11 if 'clear_upload' in kwargs:
12 12 del kwargs['clear_upload']
13 13 #---------------------------------------------------------------------#
14 14
15 15 url_upload = {}
16 16 if 'upload' in kwargs and 'url' in kwargs:
17 17 return 'ERROR:: Choose one: "upload" or "url" parameters'
18 18 elif 'upload' in kwargs:
19 19 url_upload['upload'] = kwargs['upload']
20 20 elif 'url' in kwargs:
21 21 url_upload['url'] = kwargs['url']
22 22 else:
23 23 return 'ERROR:: Missing value: "upload" or "url" parameters'
24 24
25 25 value_u = list(url_upload.values())[0]
26 26 key_u = list(url_upload.keys())[0]
27 27
28 28 if not isinstance(value_u, list) and not isinstance(value_u, str):
29 29 return 'ERROR:: "%s" must be <str> or <list>' % (key_u)
30 30 #---------------------------------------------------------------------#
31 31
32 32 if isinstance(value_u, str):
33 33 if len(value_u) != 0:
34 34 if key_u == 'upload':
35 35 if os.path.isdir(value_u):
36 36 path_order = [f for f in os.listdir(value_u) if os.path.isfile(os.path.join(value_u, f))]
37 37 path_order.sort()
38 38 if path_order:
39 39 kwargs['upload'] = []
40 40 for name in path_order:
41 41 kwargs['upload'].append(os.path.join(value_u, name))
42 42 else:
43 43 return "ERROR:: There aren't files in this directory"
44 elif os.path.isfile(value_u):
45 pass
44 46 else:
45 47 return 'ERROR:: Directory or File does not exist'
46 48 else:
47 49 return 'ERROR:: "path_list is empty"'
48 50 #---------------------------------------------------------------------#
49 51
50 52 if not isinstance(kwargs[key_u], list):
51 53 kwargs[key_u] = [kwargs[key_u]]
52 54
53 55 if kwargs.get('upload', None):
54 56 if len(kwargs['upload']) != len(set(kwargs['upload'])):
55 57 return 'Duplicate files found in "upload" parameter'
56 58 #---------------------------------------------------------------------#
57 59
58 60 change_kwargs = {}
59 61 for key1, value1 in kwargs.items():
60 62 if key1 == 'others':
61 63 if isinstance(value1, tuple):
62 64 if len(value1) != len(kwargs[key_u]):
63 65 return 'ERROR:: "%s" value(s) must be same length as "%s" value(s)' % (key1, key_u)
64 66 else:
65 67 change_kwargs[key1] = value1
66 68 else:
67 69 change_kwargs[key1] = (value1,) * len(kwargs[key_u])
68 70 else:
69 71 if isinstance(value1, list):
70 72 if len(value1) != len(kwargs[key_u]):
71 73 return 'ERROR:: "%s" value(s) must be same length as "%s" value(s)' % (key1, key_u)
72 74 else:
73 75 change_kwargs[key1] = value1
74 76 else:
75 77 change_kwargs[key1] = [value1] * len(kwargs[key_u])
76 78 #---------------------------------------------------------------------#
77 79
78 80 try:
79 81 dataset_show = getattr(self.ckan.action, 'package_show')(id=package_id)['resources']
80 82 resources_name = []
81 83 for u in dataset_show:
82 84 resources_name.append(u['name'].lower())
83 85 except:
84 86 _, exc_value, _ = sys.exc_info()
85 87 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
86 88 return exc_value
87 89 #---------------------------------------------------------------------#
88 90
89 91 for c1 in range(len(kwargs[key_u])):
90 92 new_kwargs = {}
91 93 for k2, v2 in change_kwargs.items():
92 94 #-------------------------------------------------------------#
93 95 if k2 == 'upload':
94 96 if not os.path.isfile(v2[c1]):
95 97 return 'File "%s" does not exist' % (v2[c1])
96 98
97 99 if not kwargs.get('size', None):
98 100 new_kwargs['size'] = os.stat(v2[c1]).st_size
99 101 #-------------------------------------------------------------#
100 102 new_kwargs[k2] = v2[c1]
101 103 #-----------------------------------------------------------------#
102 104
103 105 if not kwargs.get('name', None):
104 106 new_kwargs['name'] = os.path.basename(new_kwargs[key_u])
105 107
106 108 if new_kwargs['name'].lower() in resources_name:
107 109 if not ignore_repetition:
108 110 return 'ERROR:: "%s" resource already exist in this dataset' % (new_kwargs['name'])
109 111 print('WARRING:: "'+ new_kwargs['name'] +'" resource was ignored because already exist in this dataset')
110 112 else:
111 113 self.list.append(new_kwargs)
112 114 #---------------------------------------------------------------------#
113 115
114 116 try:
115 117 uuid.UUID(package_id, version=4)
116 118 self.dict['match'] = json.dumps({'id': package_id})
117 119 except ValueError:
118 120 self.dict['match'] = json.dumps({'name': package_id})
119 121 #---------------------------------------------------------------------#
120 122
121 123 if kwargs.get('upload', None):
122 124 blocks = [[]]
123 125 size_file = 0
124 126 count_file = 0
125 127 inter_num = 0
126 128 for value in self.list:
127 129 if value['size'] > 1024 * 1024 * float(max_size):
128 130 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
129 131 if not 1 <= int(max_count) <= 999:
130 132 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
131 133
132 134 size_file = size_file + value['size']
133 135 count_file = count_file + 1
134 136 if size_file > 1024 * 1024 * float(max_size) or count_file > int(max_count):
135 137 inter_num = inter_num + 1
136 138 size_file = value['size']
137 139 count_file = 1
138 140 blocks.append([])
139 141
140 142 del value['size']
141 143 blocks[inter_num].append(value)
142 144 #------------------------------------------------------------#
143 145
144 146 if len(blocks[0]) > 0:
145 147 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
146 148
147 149 for count1, block in enumerate(blocks):
148 150 upload_files = {}
149 151 for count2, value2 in enumerate(block):
150 152 upload_files['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], open(value2['upload'], 'rb'))
151 153 del value2['upload']
152 154
153 155 print('---- BLOCK N°{} ----'.format(count1 + 1))
154 156 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
155 157
156 158 try:
157 159 result = self.ckan.call_action('package_revise', {'match': self.dict['match'], 'update__resources__extend': json.dumps(block)}, files=upload_files)
158 160 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
159 161 if len(blocks) == count1 + 1:
160 162 return result
161 163 except:
162 164 print('ERROR :: Use the "print" for more information')
163 165 _, exc_value, _ = sys.exc_info()
164 166 return exc_value
165 167 else:
166 168 return "ERROR:: No file(s) found to upload"
167 169 else:
168 170 if len(self.list) > 0:
169 171 return self.ckan.call_action('package_revise', {'match': self.dict['match'], 'update__resources__extend': json.dumps(self.list)})
170 172 else:
171 173 return "ERROR:: No resource(s) found to create"
172 174
173 175
174 176 def resource_patch(self, id, package_id, max_size=100, max_count=500, **kwargs):
175 177 #Cambiar el nombre al actualizar con un nuevo archivo
176 178
177 179 if 'file_type' in kwargs:
178 180 kwargs['voc_file_type'] = kwargs['file_type']
179 181 del kwargs['file_type']
180 182
181 183 if 'upload' in kwargs and 'url' in kwargs:
182 184 return 'ERROR:: Choose one: "upload" or "url" parameters'
183 185 #---------------------------------------------------------------------#
184 186
185 187 if not isinstance(id, list) and not isinstance(id, str):
186 188 return 'ERROR:: "id" must be <str> or <list>'
187 189
188 190 if isinstance(id, str):
189 191 id = [id]
190 192
191 193 change_kwargs = {}
192 194 for key1, value1 in kwargs.items():
193 195 if key1 == 'others':
194 196 if isinstance(value1, tuple):
195 197 if len(value1) != len(id):
196 198 return 'ERROR:: "%s" value(s) must be same length as "id" value(s)' % (key1)
197 199 else:
198 200 change_kwargs[key1] = value1
199 201 else:
200 202 change_kwargs[key1] = (value1,) * len(id)
201 203 else:
202 204 if isinstance(value1, list):
203 205 if len(value1) != len(id):
204 206 return 'ERROR:: "%s" value(s) must be same length as "id" value(s)' % (key1)
205 207 else:
206 208 change_kwargs[key1] = value1
207 209 else:
208 210 change_kwargs[key1] = [value1] * len(id)
209 211 #---------------------------------------------------------------------#
210 212
211 213 for c1, v1 in enumerate(id):
212 214 new_kwargs = {}
213 215 for k2, v2 in change_kwargs.items():
214 216 #-------------------------------------------------------------#
215 217 if k2 == 'upload':
216 218 if not os.path.isfile(v2[c1]):
217 219 return 'File "%s" does not exist' % (v2[c1])
218 220
219 221 new_kwargs['size'] = os.stat(v2[c1]).st_size
220 222
221 223 if k2 == 'url':
222 224 new_kwargs['clear_upload'] = True
223 225 new_kwargs['size'] = 0
224 226 new_kwargs['mimetype'] = None
225 227 #-------------------------------------------------------------#
226 228 new_kwargs[k2] = v2[c1]
227 229
228 230 if new_kwargs.get('upload', None):
229 231 #-------------------------------------------------------------#
230 232 if new_kwargs.get('clear_upload', None):
231 233 del new_kwargs['clear_upload']
232 234 #-------------------------------------------------------------#
233 235 self.dict['update__resources__'+v1[:6]] = new_kwargs
234 236 else:
235 237 self.dict['update__resources__'+v1[:6]] = json.dumps(new_kwargs)
236 238 #---------------------------------------------------------------------#
237 239
238 240 try:
239 241 uuid.UUID(package_id, version=4)
240 242 package_id = json.dumps({'id': package_id})
241 243 except ValueError:
242 244 package_id = json.dumps({'name': package_id})
243 245 #---------------------------------------------------------------------#
244 246
245 247 if kwargs.get('upload', None):
246 248 blocks = [{}]
247 249 upload_files = [{}]
248 250 size_file = 0
249 251 count_file = 0
250 252 inter_num = 0
251 253
252 254 for dict_key, dict_value in self.dict.items():
253 255 if dict_value['size'] > 1024 * 1024 * float(max_size):
254 256 return 'ERROR:: "%s" size out of limit' % (dict_value['upload'])
255 257
256 258 if not 1 <= int(max_count) <= 999:
257 259 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
258 260
259 261 size_file = size_file + dict_value['size']
260 262 count_file = count_file + 1
261 263 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
262 264 upload_files[inter_num][dict_key+'__upload'] = (os.path.basename(dict_value['upload']), open(dict_value['upload'], 'rb'))
263 265
264 266 else:
265 267 inter_num = inter_num + 1
266 268 size_file = dict_value['size']
267 269 count_file = 1
268 270 upload_files.append({dict_key+'__upload': (os.path.basename(dict_value['upload']), open(dict_value['upload'], 'rb'))})
269 271 blocks.append({})
270 272
271 273 del dict_value['upload']
272 274 del dict_value['size']
273 275 blocks[inter_num]['match'] = package_id
274 276 blocks[inter_num][dict_key] = json.dumps(dict_value)
275 277
276 278 #------------------------------------------------------------#
277 279 if len(blocks[0]) > 0:
278 280 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
279 281 for count1, block in enumerate(blocks):
280 282 print('---- BLOCK N°{} ----'.format(count1 + 1))
281 283 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)-1))
282 284 try:
283 285 result = self.ckan.call_action('package_revise', block, files=upload_files[count1])
284 286
285 287 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
286 288 if len(blocks) == count1 + 1:
287 289 return result
288 290 except:
289 291 print('ERROR :: Use the "print" for more information')
290 292 _, exc_value, _ = sys.exc_info()
291 293 return exc_value
292 294 else:
293 295 return "ERROR:: No file(s) found to upload"
294 296 else:
295 297 self.dict['match'] = package_id
296 298 return self.ckan.call_action('package_revise', self.dict)
297 299
298 300
299 301 def resource_delete(self, select, id, **kwargs):
300 302
301 303 if not isinstance(id, list) and not isinstance(id, str):
302 304 return 'ERROR:: "id" must be <str> or <list>'
303 305
304 306 if isinstance(id, list):
305 307 if not 'package_id' in kwargs:
306 308 return "ERROR:: 'package_id' parameter is empty"
307 309 #---------------------------------------------------------------------#
308 310
309 311 if 'delete' == select:
310 312 if kwargs.get('package_id', None):
311 313 pkg_dict = getattr(self.ckan.action, 'package_show')(id=kwargs['package_id'])
312 314
313 315 if pkg_dict.get('resources'):
314 316 pkg_dict['resources'] = [res for res in pkg_dict['resources'] if not
315 317 res['id'] in id]
316 318 if pkg_dict['num_resources'] - len(pkg_dict['resources']) == len(id):
317 319 return getattr(self.ckan.action, 'package_update')(**pkg_dict)
318 320 else:
319 321 return "ERROR:: No changes have been applied"
320 322 else:
321 323 return getattr(self.ckan.action, 'resource_delete')(id=id)
322 324
323 325 elif 'purge' == select:
324 326 if kwargs.get('package_id', None):
325 327 pkg_dict = getattr(self.ckan.action, 'package_show')(id=kwargs['package_id'])
326 328 if pkg_dict.get('resources'):
327 329 pkg_dict['resources'] = [res for res in pkg_dict['resources'] if not
328 330 res['id'] in id]
329 331 if pkg_dict['num_resources'] - len(pkg_dict['resources']) == len(id):
330 332 print('[DELETING FILES]')
331 333 resource_patch(self=self, id=id, package_id=kwargs['package_id'], clear_upload=True)
332 334 print('[DELETING RESOURCES]')
333 335 return getattr(self.ckan.action, 'package_update')(**pkg_dict)
334 336 else:
335 337 return "ERROR:: No changes have been applied, please check 'id' parameter"
336 338
337 339 else:
338 340 print('[DELETING FILE]')
339 341 getattr(self.ckan.action, 'resource_patch')(id=id, clear_upload=True)
340 342 print('[DELETING RESOURCE]')
341 343 return getattr(self.ckan.action, 'resource_delete')(id=id)
342 344 else:
343 345 return 'ERROR:: "select = %s" is not accepted' % (select) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now