@@ -1,514 +1,515 | |||
|
1 | 1 | from ckanapi import RemoteCKAN |
|
2 | 2 | from datetime import datetime |
|
3 | 3 | from jrodb import download |
|
4 | 4 | from jrodb import resource |
|
5 | 5 | #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError |
|
6 | 6 | import sys |
|
7 | 7 | import platform |
|
8 | 8 | import os |
|
9 | 9 | import requests |
|
10 | 10 | |
|
11 | 11 | class Api(): |
|
12 | 12 | """ |
|
13 | 13 | FINALIDAD: |
|
14 | 14 | Script para administrar y obtener la data del repositorio por medio de APIs. |
|
15 | 15 | |
|
16 | 16 | REQUISITIOS PREVIOS: |
|
17 | 17 | - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado: |
|
18 | 18 | - Paso 2: Instalar los siguientes paquetes: |
|
19 | 19 | En Python 2 |
|
20 | 20 | - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb |
|
21 | 21 | En Python 3 |
|
22 | 22 | - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb |
|
23 | 23 | |
|
24 | 24 | FUNCIONES DISPONIBLES: |
|
25 | 25 | - action |
|
26 | 26 | - show |
|
27 | 27 | - search |
|
28 | 28 | - create |
|
29 | 29 | - patch |
|
30 | 30 | - delete |
|
31 | 31 | - download |
|
32 | 32 | |
|
33 | 33 | EJEMPLOS: |
|
34 | 34 | #1: |
|
35 | 35 | with Api('http://demo.example.com', Authorization='#########') as <access_name>: |
|
36 | 36 | ... some operation(s) ... |
|
37 | 37 | #2: |
|
38 | 38 | <access_name> = Api('http://example.com', Authorization='#########') |
|
39 | 39 | ... some operation(s) ... |
|
40 | 40 | <access_name>.ckan.close() |
|
41 | 41 | |
|
42 | 42 | REPORTAR ALGUN PROBLEMA: |
|
43 | 43 | Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos: |
|
44 | 44 | 1) Correo para contactarlo |
|
45 | 45 | 2) Descripcion del problema |
|
46 | 46 | 3) ¿En que paso o seccion encontro el problema? |
|
47 | 47 | 4) ¿Cual era el resultado que usted esperaba? |
|
48 | 48 | """ |
|
49 | 49 | def __init__(self, url, Authorization=None, secure=True): |
|
50 | 50 | #-------- Check Secure -------# |
|
51 | 51 | self.verify = secure |
|
52 | 52 | if not secure and isinstance(secure, bool): |
|
53 | 53 | session = requests.Session() |
|
54 | 54 | session.verify = False |
|
55 | 55 | else: |
|
56 | 56 | session = None |
|
57 | 57 | #------------------------------# |
|
58 | 58 | self.url = url |
|
59 | ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')' | |
|
59 | #ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')' | |
|
60 | self.ua = 'CKAN_JRO/2.9.2 (+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente)' | |
|
60 | 61 | #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' |
|
61 | self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session) | |
|
62 | self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=self.ua, session=session) | |
|
62 | 63 | #self.ckan = RemoteCKAN(self.url, apikey=Authorization) |
|
63 | 64 | self.Authorization = Authorization |
|
64 | 65 | # Change for --> self.separator = os.sep |
|
65 | 66 | if platform.system() == 'Windows': |
|
66 | 67 | self.separator = '\\' |
|
67 | 68 | else: |
|
68 | 69 | self.separator = '/' |
|
69 | 70 | |
|
70 | 71 | self.chunk_size = 1024 |
|
71 | 72 | self.list = [] |
|
72 | 73 | self.dict = {} |
|
73 | 74 | self.str = '' |
|
74 | 75 | self.check = 1 |
|
75 | 76 | self.cont = 0 |
|
76 | 77 | |
|
77 | 78 | def __enter__(self): |
|
78 | 79 | return self |
|
79 | 80 | |
|
80 | 81 | def __exit__(self, *args): |
|
81 | 82 | self.ckan.close() |
|
82 | 83 | |
|
83 | 84 | def action(self, action, **kwargs): |
|
84 | 85 | """ |
|
85 | 86 | FINALIDAD: |
|
86 | 87 | Funcion para llamar a las APIs disponibles |
|
87 | 88 | |
|
88 | 89 | APIs DISPONIBLES: |
|
89 | 90 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
90 | 91 | |
|
91 | 92 | EJEMPLO: |
|
92 | 93 | <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...) |
|
93 | 94 | """ |
|
94 | 95 | #--------------- CASE: PACKAGE SEARCH ---------------# |
|
95 | 96 | if kwargs is not None: |
|
96 | 97 | if action == 'package_search': |
|
97 | 98 | self.list = ['facet_mincount', 'facet_limit', 'facet_field'] |
|
98 | 99 | for facet in self.list: |
|
99 | 100 | if facet in kwargs: |
|
100 | 101 | kwargs[facet.replace('_', '.')] = kwargs[facet] |
|
101 | 102 | kwargs.pop(facet) |
|
102 | 103 | #----------------------------------------------------# |
|
103 | 104 | try: |
|
104 | 105 | return getattr(self.ckan.action, action)(**kwargs) |
|
105 | 106 | except: |
|
106 | 107 | _, exc_value, _ = sys.exc_info() |
|
107 | 108 | return exc_value |
|
108 | 109 | |
|
109 | 110 | def show(self, type_option, id, **kwargs): |
|
110 | 111 | ''' |
|
111 | 112 | FINALIDAD: |
|
112 | 113 | Funcion personalizada para una busqueda en especifico. |
|
113 | 114 | |
|
114 | 115 | PARAMETROS DISPONIBLES: |
|
115 | 116 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
116 | 117 | |
|
117 | 118 | ESTRUCTURA: |
|
118 | 119 | <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
119 | 120 | ''' |
|
120 | 121 | if type(type_option) is str: |
|
121 | 122 | try: |
|
122 | 123 | if type_option == 'dataset': |
|
123 | 124 | return getattr(self.ckan.action, 'package_show')(id=id, **kwargs) |
|
124 | 125 | elif type_option == 'resource': |
|
125 | 126 | return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs) |
|
126 | 127 | elif type_option == 'project': |
|
127 | 128 | return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs) |
|
128 | 129 | elif type_option == 'collaborator': |
|
129 | 130 | return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs) |
|
130 | 131 | elif type_option == 'member': |
|
131 | 132 | return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs) |
|
132 | 133 | elif type_option == 'vocabulary': |
|
133 | 134 | return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs) |
|
134 | 135 | elif type_option == 'tag': |
|
135 | 136 | if not 'vocabulary_id' in kwargs: |
|
136 | 137 | print('Missing "vocabulary_id" value: assume it is a free tag') |
|
137 | 138 | return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs) |
|
138 | 139 | elif type_option == 'user': |
|
139 | 140 | return getattr(self.ckan.action, 'user_show')(id=id, **kwargs) |
|
140 | 141 | elif type_option == 'job': |
|
141 | 142 | return getattr(self.ckan.action, 'job_show')(id=id, **kwargs) |
|
142 | 143 | else: |
|
143 | 144 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
144 | 145 | except: |
|
145 | 146 | _, exc_value, _ = sys.exc_info() |
|
146 | 147 | return exc_value |
|
147 | 148 | else: |
|
148 | 149 | return 'ERROR:: "type_option" must be a str' |
|
149 | 150 | |
|
150 | 151 | def search(self, type_option, query=None, **kwargs): |
|
151 | 152 | ''' |
|
152 | 153 | FINALIDAD: |
|
153 | 154 | Funcion personalizada para busquedas que satisfagan algun criterio. |
|
154 | 155 | |
|
155 | 156 | PARAMETROS DISPONIBLES: |
|
156 | 157 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
157 | 158 | |
|
158 | 159 | ESTRUCTURA: |
|
159 | 160 | <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...) |
|
160 | 161 | ''' |
|
161 | 162 | if type(type_option) is str: |
|
162 | 163 | try: |
|
163 | 164 | if type_option == 'dataset': |
|
164 | 165 | key_replace = ['fq', 'fq_list', 'include_private'] |
|
165 | 166 | key_point = ['facet_mincount', 'facet_limit', 'facet_field'] |
|
166 | 167 | for key1, value1 in kwargs.items(): |
|
167 | 168 | if not key1 in key_replace: |
|
168 | 169 | if key1 in key_point: |
|
169 | 170 | self.dict[key1.replace('_', '.')] = value1 |
|
170 | 171 | else: |
|
171 | 172 | self.dict[key1] = value1 |
|
172 | 173 | |
|
173 | 174 | if query is not None: |
|
174 | 175 | if type(query) is dict: |
|
175 | 176 | self.dict['fq_list'] = [] |
|
176 | 177 | #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX |
|
177 | 178 | #----------------------------------------------------# |
|
178 | 179 | if 'dataset_start_date' in query: |
|
179 | 180 | if type(query['dataset_start_date']) is str: |
|
180 | 181 | try: |
|
181 | 182 | datetime.strptime(query['dataset_start_date'], '%Y-%m-%d') |
|
182 | 183 | if len(query['dataset_start_date']) != 10: |
|
183 | 184 | return '"dataset_start_date", must be: <YYYY-MM-DD>' |
|
184 | 185 | self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"') |
|
185 | 186 | self.list.append('dataset_start_date') |
|
186 | 187 | except: |
|
187 | 188 | return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date']) |
|
188 | 189 | else: |
|
189 | 190 | return '"dataset_start_date" must be <str>' |
|
190 | 191 | #----------------------------------------------------# |
|
191 | 192 | if 'dataset_end_date' in query: |
|
192 | 193 | if type(query['dataset_end_date']) is str: |
|
193 | 194 | try: |
|
194 | 195 | datetime.strptime(query['dataset_end_date'], '%Y-%m-%d') |
|
195 | 196 | if len(query['dataset_end_date']) != 10: |
|
196 | 197 | return '"dataset_end_date", must be: <YYYY-MM-DD>' |
|
197 | 198 | |
|
198 | 199 | if 'dataset_start_date' in query: |
|
199 | 200 | if query['dataset_start_date'] > query['dataset_end_date']: |
|
200 | 201 | return '"dataset_end_date" must be greater than "dataset_start_date"' |
|
201 | 202 | |
|
202 | 203 | self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"') |
|
203 | 204 | self.list.append('dataset_end_date') |
|
204 | 205 | except: |
|
205 | 206 | return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date']) |
|
206 | 207 | else: |
|
207 | 208 | return '"dataset_end_date" must be <str>' |
|
208 | 209 | #----------------------------------------------------# |
|
209 | 210 | if 'tags' in query: |
|
210 | 211 | if isinstance(query['tags'], (int, float, str, list)): |
|
211 | 212 | if type(query['tags']) is list: |
|
212 | 213 | for u in query['tags']: |
|
213 | 214 | self.dict['fq_list'].append('tags:"'+str(u)+'"') |
|
214 | 215 | else: |
|
215 | 216 | self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"') |
|
216 | 217 | |
|
217 | 218 | self.list.append('tags') |
|
218 | 219 | else: |
|
219 | 220 | return '"tags" must be <list> or <float> or <int> or <str>' |
|
220 | 221 | #----------------------------------------------------# |
|
221 | 222 | for key, value in query.items(): |
|
222 | 223 | if value is not None and not key in self.list: |
|
223 | 224 | self.dict['fq_list'].append(str(key)+':"'+str(value)+'"') |
|
224 | 225 | else: |
|
225 | 226 | return '"query" must be <dict>' |
|
226 | 227 | |
|
227 | 228 | return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict) |
|
228 | 229 | |
|
229 | 230 | elif type_option == 'resource': |
|
230 | 231 | for key1, value1 in kwargs.items(): |
|
231 | 232 | if key1 != 'fields': |
|
232 | 233 | self.dict[key1] = value1 |
|
233 | 234 | |
|
234 | 235 | if query is not None: |
|
235 | 236 | if type(query) is dict: |
|
236 | 237 | #----------------------------------------------------# |
|
237 | 238 | if 'file_date_min' in query: |
|
238 | 239 | if type(query['file_date_min']) is str: |
|
239 | 240 | try: |
|
240 | 241 | datetime.strptime(query['file_date_min'], '%Y-%m-%d') |
|
241 | 242 | if len(query['file_date_min']) != 10: |
|
242 | 243 | return '"file_date_min", must be: <YYYY-MM-DD>' |
|
243 | 244 | except: |
|
244 | 245 | return '"file_date_min" incorrect: "%s"' % (query['file_date_min']) |
|
245 | 246 | else: |
|
246 | 247 | return '"file_date_min" must be <str>' |
|
247 | 248 | #----------------------------------------------------# |
|
248 | 249 | if 'file_date_max' in query: |
|
249 | 250 | if type(query['file_date_max']) is str: |
|
250 | 251 | try: |
|
251 | 252 | datetime.strptime(query['file_date_max'], '%Y-%m-%d') |
|
252 | 253 | if len(query['file_date_max']) != 10: |
|
253 | 254 | return '"file_date_max", must be: <YYYY-MM-DD>' |
|
254 | 255 | |
|
255 | 256 | if 'file_date_min' in query: |
|
256 | 257 | if query['file_date_min'] > query['file_date_max']: |
|
257 | 258 | return '"file_date_max" must be greater than "file_date_min"' |
|
258 | 259 | except: |
|
259 | 260 | return '"file_date_max" incorrect: "%s"' % (query['file_date_max']) |
|
260 | 261 | else: |
|
261 | 262 | return '"file_date_max" must be <str>' |
|
262 | 263 | #----------------------------------------------------# |
|
263 | 264 | self.dict['query'] = query |
|
264 | 265 | else: |
|
265 | 266 | return '"query" must be <dict>' |
|
266 | 267 | return getattr(self.ckan.action, 'resources_search')(**self.dict) |
|
267 | 268 | |
|
268 | 269 | elif type_option == 'tag': |
|
269 | 270 | for key1, value1 in kwargs.items(): |
|
270 | 271 | if key1 != 'fields': |
|
271 | 272 | self.dict[key1] = value1 |
|
272 | 273 | |
|
273 | 274 | if not 'vocabulary_id' in kwargs: |
|
274 | 275 | print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary') |
|
275 | 276 | else: |
|
276 | 277 | print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id'])) |
|
277 | 278 | |
|
278 | 279 | if query is not None: |
|
279 | 280 | if type(query) is dict: |
|
280 | 281 | if 'search' in query: |
|
281 | 282 | if type(query['search']) is list or type(query['search']) is str: |
|
282 | 283 | self.dict['query'] = query['search'] |
|
283 | 284 | else: |
|
284 | 285 | return '"search" must be <list> or <str>' |
|
285 | 286 | else: |
|
286 | 287 | return '"query" must be <dict>' |
|
287 | 288 | return getattr(self.ckan.action, 'tag_search')(**self.dict) |
|
288 | 289 | |
|
289 | 290 | else: |
|
290 | 291 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
291 | 292 | |
|
292 | 293 | except: |
|
293 | 294 | _, exc_value, _ = sys.exc_info() |
|
294 | 295 | return exc_value |
|
295 | 296 | else: |
|
296 | 297 | return 'ERROR:: "type_option" must be <str>' |
|
297 | 298 | |
|
298 | 299 | def create(self, type_option, select=None, **kwargs): |
|
299 | 300 | ''' |
|
300 | 301 | FINALIDAD: |
|
301 | 302 | Funcion personalizada para crear. |
|
302 | 303 | |
|
303 | 304 | PARAMETROS DISPONIBLES: |
|
304 | 305 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
305 | 306 | |
|
306 | 307 | ESTRUCTURA: |
|
307 | 308 | <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
308 | 309 | ''' |
|
309 | 310 | if type(type_option) is str: |
|
310 | 311 | try: |
|
311 | 312 | if type_option == 'dataset': |
|
312 | 313 | return getattr(self.ckan.action, 'package_create')(**kwargs) |
|
313 | 314 | if type_option == 'resource': |
|
314 | 315 | return resource.resource_create(self, **kwargs) |
|
315 | 316 | elif type_option == 'project': |
|
316 | 317 | return getattr(self.ckan.action, 'organization_create')(**kwargs) |
|
317 | 318 | elif type_option == 'member': |
|
318 | 319 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) |
|
319 | 320 | elif type_option == 'collaborator': |
|
320 | 321 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) |
|
321 | 322 | elif type_option == 'vocabulary': |
|
322 | 323 | return getattr(self.ckan.action, 'vocabulary_create')(**kwargs) |
|
323 | 324 | elif type_option == 'tag': |
|
324 | 325 | return getattr(self.ckan.action, 'tag_create')(**kwargs) |
|
325 | 326 | elif type_option == 'user': |
|
326 | 327 | return getattr(self.ckan.action, 'user_create')(**kwargs) |
|
327 | 328 | elif type_option == 'views': |
|
328 | 329 | if 'resource' == select: |
|
329 | 330 | self.list = ['package'] |
|
330 | 331 | for key1, value1 in kwargs.items(): |
|
331 | 332 | if not key1 in self.list: |
|
332 | 333 | self.dict[key1] = value1 |
|
333 | 334 | return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict) |
|
334 | 335 | elif 'dataset' == select: |
|
335 | 336 | return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs) |
|
336 | 337 | else: |
|
337 | 338 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
338 | 339 | else: |
|
339 | 340 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
340 | 341 | except: |
|
341 | 342 | _, exc_value, _ = sys.exc_info() |
|
342 | 343 | return exc_value |
|
343 | 344 | else: |
|
344 | 345 | return 'ERROR:: "type_option" must be <str>' |
|
345 | 346 | |
|
346 | 347 | def patch(self, type_option, **kwargs): |
|
347 | 348 | ''' |
|
348 | 349 | FINALIDAD: |
|
349 | 350 | Funciones personalizadas para actualizar |
|
350 | 351 | |
|
351 | 352 | PARAMETROS DISPONIBLES: |
|
352 | 353 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
353 | 354 | |
|
354 | 355 | ESTRUCTURA: |
|
355 | 356 | <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
356 | 357 | ''' |
|
357 | 358 | if type(type_option) is str: |
|
358 | 359 | try: |
|
359 | 360 | if type_option == 'dataset': |
|
360 | 361 | #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources |
|
361 | 362 | return getattr(self.ckan.action, 'package_patch')(**kwargs) |
|
362 | 363 | elif type_option == 'project': |
|
363 | 364 | return getattr(self.ckan.action, 'organization_patch')(**kwargs) |
|
364 | 365 | elif type_option == 'resource': |
|
365 | 366 | return resource.resource_patch(self, **kwargs) |
|
366 | 367 | elif type_option == 'member': |
|
367 | 368 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) |
|
368 | 369 | elif type_option == 'collaborator': |
|
369 | 370 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) |
|
370 | 371 | else: |
|
371 | 372 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
372 | 373 | except: |
|
373 | 374 | _, exc_value, _ = sys.exc_info() |
|
374 | 375 | return exc_value |
|
375 | 376 | else: |
|
376 | 377 | return 'ERROR:: "type_option" must be <str>' |
|
377 | 378 | |
|
378 | 379 | def delete(self, type_option, select=None, **kwargs): |
|
379 | 380 | ''' |
|
380 | 381 | FINALIDAD: |
|
381 | 382 | Función personalizada para eliminar y/o purgar. |
|
382 | 383 | |
|
383 | 384 | PARAMETROS DISPONIBLES: |
|
384 | 385 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
385 | 386 | |
|
386 | 387 | ESTRUCTURA: |
|
387 | 388 | <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
388 | 389 | ''' |
|
389 | 390 | if type(type_option) is str: |
|
390 | 391 | try: |
|
391 | 392 | if type_option == 'dataset': |
|
392 | 393 | if select is None: |
|
393 | 394 | return 'ERROR:: "select" must not be "None"' |
|
394 | 395 | else: |
|
395 | 396 | if 'delete' == select: |
|
396 | 397 | return getattr(self.ckan.action, 'package_delete')(**kwargs) |
|
397 | 398 | elif 'purge' == select: |
|
398 | 399 | return getattr(self.ckan.action, 'dataset_purge')(**kwargs) |
|
399 | 400 | else: |
|
400 | 401 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
401 | 402 | elif type_option == 'project': |
|
402 | 403 | if select is None: |
|
403 | 404 | return 'ERROR:: "select" must not be "None"' |
|
404 | 405 | else: |
|
405 | 406 | if 'delete' == select: |
|
406 | 407 | return getattr(self.ckan.action, 'organization_delete')(**kwargs) |
|
407 | 408 | elif 'purge' == select: |
|
408 | 409 | return getattr(self.ckan.action, 'organization_purge')(**kwargs) |
|
409 | 410 | else: |
|
410 | 411 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
411 | 412 | elif type_option == 'resource': |
|
412 | 413 | if select is None: |
|
413 | 414 | return 'ERROR:: "select" must not be "None"' |
|
414 | 415 | else: |
|
415 | 416 | return resource.resource_delete(self, select, **kwargs) |
|
416 | 417 | elif type_option == 'vocabulary': |
|
417 | 418 | return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs) |
|
418 | 419 | elif type_option == 'tag': |
|
419 | 420 | return getattr(self.ckan.action, 'tag_delete')(**kwargs) |
|
420 | 421 | elif type_option == 'user': |
|
421 | 422 | return getattr(self.ckan.action, 'user_delete')(**kwargs) |
|
422 | 423 | else: |
|
423 | 424 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
424 | 425 | except: |
|
425 | 426 | _, exc_value, _ = sys.exc_info() |
|
426 | 427 | return exc_value |
|
427 | 428 | else: |
|
428 | 429 | return 'ERROR:: "type_option" must be <str>' |
|
429 | 430 | |
|
430 | 431 | def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs): |
|
431 | 432 | ''' |
|
432 | 433 | FINALIDAD: |
|
433 | 434 | Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s). |
|
434 | 435 | |
|
435 | 436 | PARAMETROS DISPONIBLES: |
|
436 | 437 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
437 | 438 | |
|
438 | 439 | ESTRUCTURA: |
|
439 | 440 | <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...) |
|
440 | 441 | ''' |
|
441 | 442 | #------------------ PATH ----------------------# |
|
442 | 443 | if isinstance(path, str): |
|
443 | 444 | if os.path.isdir(path): |
|
444 | 445 | if not path.endswith(os.sep): |
|
445 | 446 | path = path + os.sep |
|
446 | 447 | test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt' |
|
447 | 448 | try: |
|
448 | 449 | file_txt = open(test_txt, 'w') |
|
449 | 450 | file_txt.close() |
|
450 | 451 | os.remove(test_txt) |
|
451 | 452 | except: |
|
452 | 453 | return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path) |
|
453 | 454 | else: |
|
454 | 455 | return 'ERROR:: "path" does not exist' |
|
455 | 456 | else: |
|
456 | 457 | return 'ERROR:: "path" must be: <class "str">' |
|
457 | 458 | |
|
458 | 459 | #------------------ PROCESSES -----------------# |
|
459 | 460 | if not isinstance(processes, int): |
|
460 | 461 | return 'ERROR:: "processes" must be: <class "int">' |
|
461 | 462 | |
|
462 | 463 | #------------------ ID OR NAME ----------------# |
|
463 | 464 | if isinstance(id, str): |
|
464 | 465 | id = [id] |
|
465 | 466 | elif isinstance(id, list): |
|
466 | 467 | id = list(map(str, id)) |
|
467 | 468 | else: |
|
468 | 469 | return 'ERROR:: dataset "id" must be: <class "str" or "list">' |
|
469 | 470 | #----------------------------------------------# |
|
470 | 471 | arguments = { |
|
471 | 472 | '--apikey': self.Authorization, |
|
472 | 473 | '--ckan-user': None, |
|
473 | 474 | '--config': None, |
|
474 | 475 | '--datapackages': path, |
|
475 | 476 | '--datastore-fields': False, |
|
476 | 477 | '--get-request': False, |
|
477 | 478 | '--insecure': not self.verify, |
|
478 | 479 | '--processes': str(processes), |
|
479 | 480 | '--quiet': False, |
|
480 | 481 | '--remote': self.url, |
|
481 | 482 | '--worker': False, |
|
482 | 483 | #'--log': 'log.txt', |
|
483 | 484 | #'--all': False, |
|
484 | 485 | #'--gzip': False, |
|
485 | 486 | #'--output': None, |
|
486 | 487 | #'--max-records': None, |
|
487 | 488 | #'--output-json': False, |
|
488 | 489 | #'--output-jsonl': False, |
|
489 | 490 | #'--create-only': False, |
|
490 | 491 | #'--help': False, |
|
491 | 492 | #'--input': None, |
|
492 | 493 | #'--input-json': False, |
|
493 | 494 | #'--start-record': '1', |
|
494 | 495 | #'--update-only': False, |
|
495 | 496 | #'--upload-logo': False, |
|
496 | 497 | #'--upload-resources': False, |
|
497 | 498 | #'--version': False, |
|
498 | 499 | 'ID_OR_NAME': id, |
|
499 | 500 | 'datasets': True, |
|
500 | 501 | 'dump': True, |
|
501 | 502 | #'ACTION_NAME': None, |
|
502 | 503 | #'KEY:JSON': [], |
|
503 | 504 | #'KEY=STRING': [], |
|
504 | 505 | #'KEY@FILE': [], |
|
505 | 506 | #'action': False, |
|
506 | 507 | #'delete': False, |
|
507 | 508 | #'groups': False, |
|
508 | 509 | #'load': False, |
|
509 | 510 | #'organizations': False, |
|
510 | 511 | #'related': False, |
|
511 | 512 | #'search': False, |
|
512 | 513 | #'users': False |
|
513 | 514 | } |
|
514 | return download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file | |
|
515 | return download.dump_things_change(self.ckan, 'datasets', arguments, self.ua, **kwargs) No newline at end of file |
@@ -1,236 +1,236 | |||
|
1 | 1 | #from ckanapi.datapackage import populate_schema_from_datastore |
|
2 | 2 | from ckanapi.cli import workers, dump |
|
3 | 3 | from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe |
|
4 | 4 | from datetime import datetime |
|
5 | 5 | from tqdm import tqdm |
|
6 | 6 | import sys |
|
7 | 7 | import json |
|
8 | 8 | import os |
|
9 | 9 | import requests |
|
10 | 10 | import six |
|
11 | 11 | |
|
12 | 12 | if sys.version_info.major == 3: |
|
13 | 13 | from urllib.parse import urlparse |
|
14 | 14 | else: |
|
15 | 15 | import urlparse |
|
16 | 16 | |
|
17 | 17 | DL_CHUNK_SIZE = 100 * 1024 |
|
18 | 18 | |
|
19 | def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs): | |
|
19 | def dump_things_change(ckan, thing, arguments, ua, worker_pool=None, stdout=None, stderr=None, **kwargs): | |
|
20 | 20 | if worker_pool is None: |
|
21 | 21 | worker_pool = workers.worker_pool |
|
22 | 22 | if stdout is None: |
|
23 | 23 | stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) |
|
24 | 24 | if stderr is None: |
|
25 | 25 | stderr = getattr(sys.stderr, 'buffer', sys.stderr) |
|
26 | 26 | |
|
27 | 27 | if arguments['--worker']: |
|
28 | 28 | return dump.dump_things_worker(ckan, thing, arguments) |
|
29 | 29 | ''' |
|
30 | 30 | log = None |
|
31 | 31 | if arguments['--log']: |
|
32 | 32 | log = open(arguments['--log'], 'a') |
|
33 | 33 | ''' |
|
34 | 34 | jsonl_output = stdout |
|
35 | 35 | if arguments['--datapackages']: |
|
36 | 36 | jsonl_output = open(os.devnull, 'wb') |
|
37 | 37 | |
|
38 | 38 | names = arguments['ID_OR_NAME'] |
|
39 | 39 | |
|
40 | 40 | if names and isinstance(names[0], dict): |
|
41 | 41 | names = [rec.get('name',rec.get('id')) for rec in names] |
|
42 | 42 | ''' |
|
43 | 43 | if arguments['--datapackages']: |
|
44 | 44 | arguments['--datastore-fields'] = True |
|
45 | 45 | ''' |
|
46 | 46 | #----------------------------# |
|
47 | 47 | filtered_urls = {} |
|
48 | 48 | for val in names: |
|
49 | 49 | try: |
|
50 | 50 | filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs) |
|
51 | 51 | except: |
|
52 | 52 | _, exc_value, _ = sys.exc_info() |
|
53 | 53 | return exc_value |
|
54 | 54 | #----------------------------# |
|
55 | 55 | |
|
56 | 56 | cmd = dump._worker_command_line(thing, arguments) |
|
57 | 57 | processes = int(arguments['--processes']) |
|
58 | 58 | if hasattr(ckan, 'parallel_limit'): |
|
59 | 59 | processes = min(processes, ckan.parallel_limit) |
|
60 | 60 | stats = completion_stats(processes) |
|
61 | 61 | pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names)) |
|
62 | 62 | |
|
63 | 63 | results = {} |
|
64 | 64 | expecting_number = 0 |
|
65 | 65 | with quiet_int_pipe() as errors: |
|
66 | 66 | for job_ids, finished, result in pool: |
|
67 | 67 | if not result: |
|
68 | 68 | return 1 |
|
69 | 69 | timestamp, error, record = json.loads(result.decode('utf-8')) |
|
70 | 70 | results[finished] = record |
|
71 | 71 | |
|
72 | 72 | #----------------------------------------# |
|
73 | 73 | datapackages_path = arguments['--datapackages'] |
|
74 | 74 | datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path) |
|
75 | 75 | #----------------------------------------# |
|
76 | 76 | if not arguments['--quiet']: |
|
77 | 77 | stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format( |
|
78 | 78 | finished, |
|
79 | 79 | job_ids, |
|
80 | 80 | next(stats), |
|
81 | 81 | error, |
|
82 | 82 | datapackage_dir, |
|
83 | 83 | record.get('name', '') if record else '', |
|
84 | 84 | ).encode('utf-8')) |
|
85 | 85 | ''' |
|
86 | 86 | if log: |
|
87 | 87 | log.write(compact_json([ |
|
88 | 88 | timestamp, |
|
89 | 89 | finished, |
|
90 | 90 | error, |
|
91 | 91 | record.get('name', '') if record else None, |
|
92 | 92 | ]) + b'\n') |
|
93 | 93 | ''' |
|
94 | 94 | if datapackages_path: |
|
95 | 95 | try: |
|
96 | 96 | filter_url = filtered_urls[record.get('name', '')] |
|
97 | 97 | except: |
|
98 | 98 | filter_url = filtered_urls[record.get('id', '')] |
|
99 | create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure']) | |
|
99 | create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'], ua) | |
|
100 | 100 | |
|
101 | 101 | while expecting_number in results: |
|
102 | 102 | record = results.pop(expecting_number) |
|
103 | 103 | if record: |
|
104 | 104 | jsonl_output.write(compact_json(record, sort_keys=True) + b'\n') |
|
105 | 105 | expecting_number += 1 |
|
106 | 106 | if 'pipe' in errors: |
|
107 | 107 | return 1 |
|
108 | 108 | if 'interrupt' in errors: |
|
109 | 109 | return 2 |
|
110 | 110 | |
|
111 | def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure): | |
|
111 | def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure, ua): | |
|
112 | 112 | resource_formats_to_ignore = ['API', 'api'] |
|
113 | 113 | |
|
114 | 114 | os.makedirs(os.path.join(datapackage_dir, 'data')) |
|
115 | 115 | record['path'] = datapackage_dir |
|
116 | 116 | |
|
117 | 117 | ckan_resources = [] |
|
118 | 118 | for resource in tqdm(record.get('resources', []), unit_scale=True): |
|
119 | 119 | #for resource in record.get('resources', []): |
|
120 | 120 | if resource['format'] in resource_formats_to_ignore: |
|
121 | 121 | continue |
|
122 | 122 | |
|
123 | 123 | if not {'name': resource['name'], 'url': resource['url']} in filtered_url: |
|
124 | 124 | continue |
|
125 | 125 | |
|
126 | 126 | if len(resource['url']) == 0: |
|
127 | 127 | continue |
|
128 | 128 | |
|
129 | 129 | filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource') |
|
130 | 130 | resource['path'] = os.path.join(datapackage_dir, 'data', filename) |
|
131 | 131 | |
|
132 | cres = create_resource_change(resource, stderr, apikey, host_url, insecure) | |
|
132 | cres = create_resource_change(resource, stderr, apikey, host_url, insecure, ua) | |
|
133 | 133 | if not cres: |
|
134 | 134 | continue |
|
135 | 135 | ''' |
|
136 | 136 | #----------------------------------------# |
|
137 | 137 | dres = {'path': os.path.join('data', filename), |
|
138 | 138 | 'description': cres.get('description', ''), |
|
139 | 139 | 'format': cres.get('format', ''), |
|
140 | 140 | 'name': cres.get('name', ''), |
|
141 | 141 | 'title': cres.get('name', '').title()} |
|
142 | 142 | #----------------------------------------# |
|
143 | 143 | populate_schema_from_datastore(cres, dres) |
|
144 | 144 | ''' |
|
145 | 145 | ckan_resources.append(resource) |
|
146 | 146 | |
|
147 | 147 | dataset = dict(record, resources=ckan_resources) |
|
148 | 148 | datapackage = dataset_to_datapackage_change(dataset) |
|
149 | 149 | |
|
150 | 150 | json_path = os.path.join(datapackage_dir, 'datapackage.json') |
|
151 | 151 | with open(json_path, 'wb') as out: |
|
152 | 152 | out.write(pretty_json(datapackage)) |
|
153 | 153 | |
|
154 | 154 | return datapackage_dir, datapackage, json_path |
|
155 | 155 | |
|
156 | def create_resource_change(resource, stderr, apikey, host_url, insecure): | |
|
156 | def create_resource_change(resource, stderr, apikey, host_url, insecure, ua): | |
|
157 | 157 | # ---------- REPLACE URL --------- # |
|
158 | 158 | if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe': |
|
159 | 159 | resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc, |
|
160 | 160 | urlparse(host_url).scheme + '://' + urlparse(host_url).netloc) |
|
161 | 161 | #----------------------------------# |
|
162 | 162 | try: |
|
163 | r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure) | |
|
163 | r = requests.get(resource['url'], headers={'Authorization': apikey, 'User-Agent': ua}, stream=True, verify=not insecure) | |
|
164 | 164 | #---------------------------------------# |
|
165 | 165 | try: |
|
166 | 166 | r.raise_for_status() |
|
167 | 167 | except requests.exceptions.HTTPError as e: |
|
168 | 168 | return False |
|
169 | 169 | #---------------------------------------# |
|
170 | 170 | with open(resource['path'], 'wb') as f: |
|
171 | 171 | for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE): |
|
172 | 172 | if chunk: |
|
173 | 173 | f.write(chunk) |
|
174 | 174 | |
|
175 | 175 | except requests.ConnectionError: |
|
176 | 176 | stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8')) |
|
177 | 177 | except requests.exceptions.RequestException as e: |
|
178 | 178 | stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) |
|
179 | 179 | except Exception as e: |
|
180 | 180 | stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) |
|
181 | 181 | return resource |
|
182 | 182 | |
|
183 | 183 | def dataset_to_datapackage_change(dataset_dict): |
|
184 | 184 | dp = {'name': dataset_dict['name'], |
|
185 | 185 | 'id': dataset_dict['id'], |
|
186 | 186 | 'path': dataset_dict['path'], |
|
187 | 187 | 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")} |
|
188 | 188 | |
|
189 | 189 | resources = dataset_dict.get('resources') |
|
190 | 190 | if resources: |
|
191 | 191 | dp['resources'] = [convert_to_datapackage_resource_change(r) |
|
192 | 192 | for r in resources] |
|
193 | 193 | return dp |
|
194 | 194 | |
|
195 | 195 | def convert_to_datapackage_resource_change(resource_dict): |
|
196 | 196 | resource = {} |
|
197 | 197 | |
|
198 | 198 | if resource_dict.get('id'): |
|
199 | 199 | resource['id'] = resource_dict['id'] |
|
200 | 200 | |
|
201 | 201 | if resource_dict.get('name'): |
|
202 | 202 | resource['name'] = resource_dict['name'] |
|
203 | 203 | |
|
204 | 204 | if resource_dict.get('path'): |
|
205 | 205 | if os.path.isfile(resource_dict['path']): |
|
206 | 206 | resource['path'] = resource_dict['path'] |
|
207 | 207 | else: |
|
208 | 208 | resource['url'] = resource_dict['url'] |
|
209 | 209 | |
|
210 | 210 | schema = resource_dict.get('schema') |
|
211 | 211 | if isinstance(schema, six.string_types): |
|
212 | 212 | try: |
|
213 | 213 | resource['schema'] = json.loads(schema) |
|
214 | 214 | except ValueError: |
|
215 | 215 | resource['schema'] = schema |
|
216 | 216 | elif isinstance(schema, dict): |
|
217 | 217 | resource['schema'] = schema |
|
218 | 218 | return resource |
|
219 | 219 | |
|
220 | 220 | def name_no_repetition(name, dir, option=''): |
|
221 | 221 | count = 0 |
|
222 | 222 | while True: |
|
223 | 223 | count = count + 1 |
|
224 | 224 | if not os.path.exists(os.path.join(dir, name)): |
|
225 | 225 | if option == 'resource': |
|
226 | 226 | return name |
|
227 | 227 | else: |
|
228 | 228 | return os.path.join(dir, name) |
|
229 | 229 | |
|
230 | 230 | elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)): |
|
231 | 231 | if option == 'resource': |
|
232 | 232 | return '('+str(count)+')'+name |
|
233 | 233 | else: |
|
234 | 234 | return os.path.join(dir, '('+str(count)+')'+name) |
|
235 | 235 | else: |
|
236 | 236 | pass No newline at end of file |
@@ -1,17 +1,17 | |||
|
1 | 1 | # encoding: utf-8 |
|
2 | 2 | from setuptools import setup |
|
3 | 3 | |
|
4 | 4 | setup( |
|
5 | 5 | name = "jrodb", |
|
6 |
version = "2.9.2. |
|
|
6 | version = "2.9.2.1", | |
|
7 | 7 | description = "Data Repository - JRO", |
|
8 | 8 | author = "Edson Ynilupu Mattos", |
|
9 | 9 | author_email = "eynilupu@igp.gob.pe", |
|
10 | 10 | url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente", |
|
11 | 11 | packages = ["jrodb"], |
|
12 | 12 | install_requires = [ |
|
13 | 13 | "ckanapi==4.7", |
|
14 | 14 | "requests", |
|
15 | 15 | "tqdm" |
|
16 | 16 | ], |
|
17 | 17 | ) No newline at end of file |
General Comments 0
You need to be logged in to leave comments.
Login now