##// END OF EJS Templates
v2.9.2 :: Fixed 'download' bugs
eynilupu -
r19:1886da7a44e6
parent child
Show More
1 NO CONTENT: modified file, binary diff hidden
1 NO CONTENT: modified file, binary diff hidden
@@ -1,500 +1,500
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from CKAN_JRO import logic_download
4 4 from CKAN_JRO import resource
5 5 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
6 6 import sys
7 7 import platform
8 8 import os
9 9 import requests
10 10
11 11 class JROAPI():
12 12 """
13 13 FINALIDAD:
14 14 Script para administrar y obtener la data del repositorio por medio de APIs.
15 15
16 16 REQUISITIOS PREVIOS:
17 17 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
18 18 - Paso 2: Instalar los siguientes paquetes:
19 19 ckanapi==4.7
20 20 requests
21 21
22 22 FUNCIONES DISPONIBLES:
23 23 - action
24 24 - show
25 25 - search
26 26 - create
27 27 - patch
28 28 - delete
29 29 - download_files
30 30
31 31 EJEMPLOS:
32 32 #1:
33 33 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
34 34 ... some operation(s) ...
35 35 #2:
36 36 <access_name> = JROAPI('http://example.com', Authorization='#########')
37 37 ... some operation(s) ...
38 38 <access_name>.ckan.close()
39 39
40 40 REPORTAR ALGUN PROBLEMA:
41 41 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
42 42 1) Correo para contactarlo
43 43 2) Descripcion del problema
44 44 3) ¿En que paso o seccion encontro el problema?
45 45 4) ¿Cual era el resultado que usted esperaba?
46 46 """
47 47 def __init__(self, url, Authorization=None, secure=True):
48 48 #-------- Check Secure -------#
49 49 self.verify = secure
50 50 if not secure and isinstance(secure, bool):
51 51 session = requests.Session()
52 52 session.verify = False
53 53 else:
54 54 session = None
55 55 #------------------------------#
56 56 self.url = url
57 57 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
58 58 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
59 59 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
60 60 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
61 61 self.Authorization = Authorization
62 62 # Change for --> self.separator = os.sep
63 63 if platform.system() == 'Windows':
64 64 self.separator = '\\'
65 65 else:
66 66 self.separator = '/'
67 67
68 68 self.chunk_size = 1024
69 69 self.list = []
70 70 self.dict = {}
71 71 self.str = ''
72 72 self.check = 1
73 73 self.cont = 0
74 74
75 75 def __enter__(self):
76 76 return self
77 77
78 78 def __exit__(self, *args):
79 79 self.ckan.close()
80 80
81 81 def action(self, action, **kwargs):
82 82 """
83 83 FINALIDAD:
84 84 Funcion para llamar a las APIs disponibles
85 85
86 86 APIs DISPONIBLES:
87 87 CONSULTAR: "GUIA DE SCRIPT.pdf"
88 88
89 89 EJEMPLO:
90 90 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
91 91 """
92 92 #--------------- CASE: PACKAGE SEARCH ---------------#
93 93 if kwargs is not None:
94 94 if action == 'package_search':
95 95 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
96 96 for facet in self.list:
97 97 if facet in kwargs:
98 98 kwargs[facet.replace('_', '.')] = kwargs[facet]
99 99 kwargs.pop(facet)
100 100 #----------------------------------------------------#
101 101 try:
102 102 return getattr(self.ckan.action, action)(**kwargs)
103 103 except:
104 104 _, exc_value, _ = sys.exc_info()
105 105 return exc_value
106 106
107 107 def show(self, type_option, id, **kwargs):
108 108 '''
109 109 FINALIDAD:
110 110 Funcion personalizada para una busqueda en especifico.
111 111
112 112 PARAMETROS DISPONIBLES:
113 113 CONSULTAR: "GUIA DE SCRIPT.pdf"
114 114
115 115 ESTRUCTURA:
116 116 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
117 117 '''
118 118 if type(type_option) is str:
119 119 try:
120 120 if type_option == 'dataset':
121 121 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
122 122 elif type_option == 'resource':
123 123 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
124 124 elif type_option == 'project':
125 125 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
126 126 elif type_option == 'collaborator':
127 127 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
128 128 elif type_option == 'member':
129 129 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
130 130 elif type_option == 'vocabulary':
131 131 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
132 132 elif type_option == 'tag':
133 133 if not 'vocabulary_id' in kwargs:
134 134 print('Missing "vocabulary_id" value: assume it is a free tag')
135 135 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
136 136 elif type_option == 'user':
137 137 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
138 138 elif type_option == 'job':
139 139 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
140 140 else:
141 141 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
142 142 except:
143 143 _, exc_value, _ = sys.exc_info()
144 144 return exc_value
145 145 else:
146 146 return 'ERROR:: "type_option" must be a str'
147 147
148 148 def search(self, type_option, query=None, **kwargs):
149 149 '''
150 150 FINALIDAD:
151 151 Funcion personalizada para busquedas que satisfagan algun criterio.
152 152
153 153 PARAMETROS DISPONIBLES:
154 154 CONSULTAR: "GUIA DE SCRIPT.pdf"
155 155
156 156 ESTRUCTURA:
157 157 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
158 158 '''
159 159 if type(type_option) is str:
160 160 try:
161 161 if type_option == 'dataset':
162 162 key_replace = ['fq', 'fq_list', 'include_private']
163 163 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
164 164 for key1, value1 in kwargs.items():
165 165 if not key1 in key_replace:
166 166 if key1 in key_point:
167 167 self.dict[key1.replace('_', '.')] = value1
168 168 else:
169 169 self.dict[key1] = value1
170 170
171 171 if query is not None:
172 172 if type(query) is dict:
173 173 self.dict['fq_list'] = []
174 174 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
175 175 #----------------------------------------------------#
176 176 if 'dataset_start_date' in query:
177 177 if type(query['dataset_start_date']) is str:
178 178 try:
179 179 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
180 180 if len(query['dataset_start_date']) != 10:
181 181 return '"dataset_start_date", must be: <YYYY-MM-DD>'
182 182 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
183 183 self.list.append('dataset_start_date')
184 184 except:
185 185 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
186 186 else:
187 187 return '"dataset_start_date" must be <str>'
188 188 #----------------------------------------------------#
189 189 if 'dataset_end_date' in query:
190 190 if type(query['dataset_end_date']) is str:
191 191 try:
192 192 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
193 193 if len(query['dataset_end_date']) != 10:
194 194 return '"dataset_end_date", must be: <YYYY-MM-DD>'
195 195
196 196 if 'dataset_start_date' in query:
197 197 if query['dataset_start_date'] > query['dataset_end_date']:
198 198 return '"dataset_end_date" must be greater than "dataset_start_date"'
199 199
200 200 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
201 201 self.list.append('dataset_end_date')
202 202 except:
203 203 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
204 204 else:
205 205 return '"dataset_end_date" must be <str>'
206 206 #----------------------------------------------------#
207 207 for key, value in query.items():
208 208 if value is not None and not key in self.list:
209 209 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
210 210 else:
211 211 return '"query" must be <dict>'
212 212
213 213 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
214 214
215 215 elif type_option == 'resource':
216 216 for key1, value1 in kwargs.items():
217 217 if key1 != 'fields':
218 218 self.dict[key1] = value1
219 219
220 220 if query is not None:
221 221 if type(query) is dict:
222 222 #----------------------------------------------------#
223 223 if 'file_date_min' in query:
224 224 if type(query['file_date_min']) is str:
225 225 try:
226 226 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
227 227 if len(query['file_date_min']) != 10:
228 228 return '"file_date_min", must be: <YYYY-MM-DD>'
229 229 except:
230 230 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
231 231 else:
232 232 return '"file_date_min" must be <str>'
233 233 #----------------------------------------------------#
234 234 if 'file_date_max' in query:
235 235 if type(query['file_date_max']) is str:
236 236 try:
237 237 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
238 238 if len(query['file_date_max']) != 10:
239 239 return '"file_date_max", must be: <YYYY-MM-DD>'
240 240
241 241 if 'file_date_min' in query:
242 242 if query['file_date_min'] > query['file_date_max']:
243 243 return '"file_date_max" must be greater than "file_date_min"'
244 244 except:
245 245 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
246 246 else:
247 247 return '"file_date_max" must be <str>'
248 248 #----------------------------------------------------#
249 249 self.dict['query'] = query
250 250 else:
251 251 return '"query" must be <dict>'
252 252 return getattr(self.ckan.action, 'resources_search')(**self.dict)
253 253
254 254 elif type_option == 'tag':
255 255 for key1, value1 in kwargs.items():
256 256 if key1 != 'fields':
257 257 self.dict[key1] = value1
258 258
259 259 if not 'vocabulary_id' in kwargs:
260 260 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
261 261 else:
262 262 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
263 263
264 264 if query is not None:
265 265 if type(query) is dict:
266 266 if 'search' in query:
267 267 if type(query['search']) is list or type(query['search']) is str:
268 268 self.dict['query'] = query['search']
269 269 else:
270 270 return '"search" must be <list> or <str>'
271 271 else:
272 272 return '"query" must be <dict>'
273 273 return getattr(self.ckan.action, 'tag_search')(**self.dict)
274 274
275 275 else:
276 276 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
277 277
278 278 except:
279 279 _, exc_value, _ = sys.exc_info()
280 280 return exc_value
281 281 else:
282 282 return 'ERROR:: "type_option" must be <str>'
283 283
284 284 def create(self, type_option, select=None, **kwargs):
285 285 '''
286 286 FINALIDAD:
287 287 Funcion personalizada para crear.
288 288
289 289 PARAMETROS DISPONIBLES:
290 290 CONSULTAR: "GUIA DE SCRIPT.pdf"
291 291
292 292 ESTRUCTURA:
293 293 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
294 294 '''
295 295 if type(type_option) is str:
296 296 try:
297 297 if type_option == 'dataset':
298 298 return getattr(self.ckan.action, 'package_create')(**kwargs)
299 299 if type_option == 'resource':
300 300 return resource.resource_create(self, **kwargs)
301 301 elif type_option == 'project':
302 302 return getattr(self.ckan.action, 'organization_create')(**kwargs)
303 303 elif type_option == 'member':
304 304 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
305 305 elif type_option == 'collaborator':
306 306 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
307 307 elif type_option == 'vocabulary':
308 308 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
309 309 elif type_option == 'tag':
310 310 return getattr(self.ckan.action, 'tag_create')(**kwargs)
311 311 elif type_option == 'user':
312 312 return getattr(self.ckan.action, 'user_create')(**kwargs)
313 313 elif type_option == 'views':
314 314 if 'resource' == select:
315 315 self.list = ['package']
316 316 for key1, value1 in kwargs.items():
317 317 if not key1 in self.list:
318 318 self.dict[key1] = value1
319 319 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
320 320 elif 'dataset' == select:
321 321 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
322 322 else:
323 323 return 'ERROR:: "select = %s" is not accepted' % (select)
324 324 else:
325 325 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
326 326 except:
327 327 _, exc_value, _ = sys.exc_info()
328 328 return exc_value
329 329 else:
330 330 return 'ERROR:: "type_option" must be <str>'
331 331
332 332 def patch(self, type_option, **kwargs):
333 333 '''
334 334 FINALIDAD:
335 335 Funciones personalizadas para actualizar
336 336
337 337 PARAMETROS DISPONIBLES:
338 338 CONSULTAR: "GUIA DE SCRIPT.pdf"
339 339
340 340 ESTRUCTURA:
341 341 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
342 342 '''
343 343 if type(type_option) is str:
344 344 try:
345 345 if type_option == 'dataset':
346 346 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
347 347 return getattr(self.ckan.action, 'package_patch')(**kwargs)
348 348 elif type_option == 'project':
349 349 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
350 350 elif type_option == 'resource':
351 351 return resource.resource_patch(self, **kwargs)
352 352 elif type_option == 'member':
353 353 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
354 354 elif type_option == 'collaborator':
355 355 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
356 356 else:
357 357 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
358 358 except:
359 359 _, exc_value, _ = sys.exc_info()
360 360 return exc_value
361 361 else:
362 362 return 'ERROR:: "type_option" must be <str>'
363 363
364 364 def delete(self, type_option, select=None, **kwargs):
365 365 '''
366 366 FINALIDAD:
367 367 Función personalizada para eliminar y/o purgar.
368 368
369 369 PARAMETROS DISPONIBLES:
370 370 CONSULTAR: "GUIA DE SCRIPT.pdf"
371 371
372 372 ESTRUCTURA:
373 373 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
374 374 '''
375 375 if type(type_option) is str:
376 376 try:
377 377 if type_option == 'dataset':
378 378 if select is None:
379 379 return 'ERROR:: "select" must not be "None"'
380 380 else:
381 381 if 'delete' == select:
382 382 return getattr(self.ckan.action, 'package_delete')(**kwargs)
383 383 elif 'purge' == select:
384 384 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
385 385 else:
386 386 return 'ERROR:: "select = %s" is not accepted' % (select)
387 387 elif type_option == 'project':
388 388 if select is None:
389 389 return 'ERROR:: "select" must not be "None"'
390 390 else:
391 391 if 'delete' == select:
392 392 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
393 393 elif 'purge' == select:
394 394 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
395 395 else:
396 396 return 'ERROR:: "select = %s" is not accepted' % (select)
397 397 elif type_option == 'resource':
398 398 if select is None:
399 399 return 'ERROR:: "select" must not be "None"'
400 400 else:
401 401 return resource.resource_delete(self, select, **kwargs)
402 402 elif type_option == 'vocabulary':
403 403 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
404 404 elif type_option == 'tag':
405 405 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
406 406 elif type_option == 'user':
407 407 return getattr(self.ckan.action, 'user_delete')(**kwargs)
408 408 else:
409 409 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
410 410 except:
411 411 _, exc_value, _ = sys.exc_info()
412 412 return exc_value
413 413 else:
414 414 return 'ERROR:: "type_option" must be <str>'
415 415
416 def download_files(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
416 def download_files(self, id, processes=1, path=os.path.expanduser("~"), **kwargs):
417 417 '''
418 418 FINALIDAD:
419 419 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
420 420
421 421 PARAMETROS DISPONIBLES:
422 422 CONSULTAR: "GUIA DE SCRIPT.pdf"
423 423
424 424 ESTRUCTURA:
425 <access_name>.download_files(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
425 <access_name>.download_files(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
426 426 '''
427 427 #------------------ PATH ----------------------#
428 428 if isinstance(path, str):
429 429 if os.path.isdir(path):
430 430 if not path.endswith(os.sep):
431 431 path = path + os.sep
432 432 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
433 433 try:
434 434 file_txt = open(test_txt, 'w')
435 435 file_txt.close()
436 436 os.remove(test_txt)
437 437 except:
438 438 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
439 439 else:
440 440 return 'ERROR:: "path" does not exist'
441 441 else:
442 442 return 'ERROR:: "path" must be: <class "str">'
443 443
444 444 #------------------ PROCESSES -----------------#
445 445 if not isinstance(processes, int):
446 446 return 'ERROR:: "processes" must be: <class "int">'
447 447
448 448 #------------------ ID OR NAME ----------------#
449 if isinstance(id_or_name, str):
450 id_or_name = [id_or_name]
451 elif isinstance(id_or_name, list):
452 id_or_name = list(map(str, id_or_name))
449 if isinstance(id, str):
450 id = [id]
451 elif isinstance(id, list):
452 id = list(map(str, id))
453 453 else:
454 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
454 return 'ERROR:: dataset "id" must be: <class "str" or "list">'
455 455 #----------------------------------------------#
456 456 arguments = {
457 457 '--apikey': self.Authorization,
458 458 '--ckan-user': None,
459 459 '--config': None,
460 460 '--datapackages': path,
461 461 '--datastore-fields': False,
462 462 '--get-request': False,
463 463 '--insecure': not self.verify,
464 464 '--processes': str(processes),
465 465 '--quiet': False,
466 466 '--remote': self.url,
467 467 '--worker': False,
468 468 #'--log': 'log.txt',
469 469 #'--all': False,
470 470 #'--gzip': False,
471 471 #'--output': None,
472 472 #'--max-records': None,
473 473 #'--output-json': False,
474 474 #'--output-jsonl': False,
475 475 #'--create-only': False,
476 476 #'--help': False,
477 477 #'--input': None,
478 478 #'--input-json': False,
479 479 #'--start-record': '1',
480 480 #'--update-only': False,
481 481 #'--upload-logo': False,
482 482 #'--upload-resources': False,
483 483 #'--version': False,
484 'ID_OR_NAME': id_or_name,
484 'ID_OR_NAME': id,
485 485 'datasets': True,
486 486 'dump': True,
487 487 #'ACTION_NAME': None,
488 488 #'KEY:JSON': [],
489 489 #'KEY=STRING': [],
490 490 #'KEY@FILE': [],
491 491 #'action': False,
492 492 #'delete': False,
493 493 #'groups': False,
494 494 #'load': False,
495 495 #'organizations': False,
496 496 #'related': False,
497 497 #'search': False,
498 498 #'users': False
499 499 }
500 500 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now