##// END OF EJS Templates
v2.9.2 :: Update 'User-Agent' in URL parameters - download
eynilupu -
r23:7969fa062c2f master
parent child
Show More
@@ -1,514 +1,515
1 from ckanapi import RemoteCKAN
1 from ckanapi import RemoteCKAN
2 from datetime import datetime
2 from datetime import datetime
3 from jrodb import download
3 from jrodb import download
4 from jrodb import resource
4 from jrodb import resource
5 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
6 import sys
6 import sys
7 import platform
7 import platform
8 import os
8 import os
9 import requests
9 import requests
10
10
11 class Api():
11 class Api():
12 """
12 """
13 FINALIDAD:
13 FINALIDAD:
14 Script para administrar y obtener la data del repositorio por medio de APIs.
14 Script para administrar y obtener la data del repositorio por medio de APIs.
15
15
16 REQUISITIOS PREVIOS:
16 REQUISITIOS PREVIOS:
17 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
17 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
18 - Paso 2: Instalar los siguientes paquetes:
18 - Paso 2: Instalar los siguientes paquetes:
19 En Python 2
19 En Python 2
20 - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
20 - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
21 En Python 3
21 En Python 3
22 - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
22 - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
23
23
24 FUNCIONES DISPONIBLES:
24 FUNCIONES DISPONIBLES:
25 - action
25 - action
26 - show
26 - show
27 - search
27 - search
28 - create
28 - create
29 - patch
29 - patch
30 - delete
30 - delete
31 - download
31 - download
32
32
33 EJEMPLOS:
33 EJEMPLOS:
34 #1:
34 #1:
35 with Api('http://demo.example.com', Authorization='#########') as <access_name>:
35 with Api('http://demo.example.com', Authorization='#########') as <access_name>:
36 ... some operation(s) ...
36 ... some operation(s) ...
37 #2:
37 #2:
38 <access_name> = Api('http://example.com', Authorization='#########')
38 <access_name> = Api('http://example.com', Authorization='#########')
39 ... some operation(s) ...
39 ... some operation(s) ...
40 <access_name>.ckan.close()
40 <access_name>.ckan.close()
41
41
42 REPORTAR ALGUN PROBLEMA:
42 REPORTAR ALGUN PROBLEMA:
43 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
43 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
44 1) Correo para contactarlo
44 1) Correo para contactarlo
45 2) Descripcion del problema
45 2) Descripcion del problema
46 3) ¿En que paso o seccion encontro el problema?
46 3) ¿En que paso o seccion encontro el problema?
47 4) ¿Cual era el resultado que usted esperaba?
47 4) ¿Cual era el resultado que usted esperaba?
48 """
48 """
49 def __init__(self, url, Authorization=None, secure=True):
49 def __init__(self, url, Authorization=None, secure=True):
50 #-------- Check Secure -------#
50 #-------- Check Secure -------#
51 self.verify = secure
51 self.verify = secure
52 if not secure and isinstance(secure, bool):
52 if not secure and isinstance(secure, bool):
53 session = requests.Session()
53 session = requests.Session()
54 session.verify = False
54 session.verify = False
55 else:
55 else:
56 session = None
56 session = None
57 #------------------------------#
57 #------------------------------#
58 self.url = url
58 self.url = url
59 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
59 #ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
60 self.ua = 'CKAN_JRO/2.9.2 (+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente)'
60 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
61 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
61 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
62 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=self.ua, session=session)
62 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
63 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
63 self.Authorization = Authorization
64 self.Authorization = Authorization
64 # Change for --> self.separator = os.sep
65 # Change for --> self.separator = os.sep
65 if platform.system() == 'Windows':
66 if platform.system() == 'Windows':
66 self.separator = '\\'
67 self.separator = '\\'
67 else:
68 else:
68 self.separator = '/'
69 self.separator = '/'
69
70
70 self.chunk_size = 1024
71 self.chunk_size = 1024
71 self.list = []
72 self.list = []
72 self.dict = {}
73 self.dict = {}
73 self.str = ''
74 self.str = ''
74 self.check = 1
75 self.check = 1
75 self.cont = 0
76 self.cont = 0
76
77
77 def __enter__(self):
78 def __enter__(self):
78 return self
79 return self
79
80
80 def __exit__(self, *args):
81 def __exit__(self, *args):
81 self.ckan.close()
82 self.ckan.close()
82
83
83 def action(self, action, **kwargs):
84 def action(self, action, **kwargs):
84 """
85 """
85 FINALIDAD:
86 FINALIDAD:
86 Funcion para llamar a las APIs disponibles
87 Funcion para llamar a las APIs disponibles
87
88
88 APIs DISPONIBLES:
89 APIs DISPONIBLES:
89 CONSULTAR: "GUIA DE SCRIPT.pdf"
90 CONSULTAR: "GUIA DE SCRIPT.pdf"
90
91
91 EJEMPLO:
92 EJEMPLO:
92 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
93 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
93 """
94 """
94 #--------------- CASE: PACKAGE SEARCH ---------------#
95 #--------------- CASE: PACKAGE SEARCH ---------------#
95 if kwargs is not None:
96 if kwargs is not None:
96 if action == 'package_search':
97 if action == 'package_search':
97 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
98 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
98 for facet in self.list:
99 for facet in self.list:
99 if facet in kwargs:
100 if facet in kwargs:
100 kwargs[facet.replace('_', '.')] = kwargs[facet]
101 kwargs[facet.replace('_', '.')] = kwargs[facet]
101 kwargs.pop(facet)
102 kwargs.pop(facet)
102 #----------------------------------------------------#
103 #----------------------------------------------------#
103 try:
104 try:
104 return getattr(self.ckan.action, action)(**kwargs)
105 return getattr(self.ckan.action, action)(**kwargs)
105 except:
106 except:
106 _, exc_value, _ = sys.exc_info()
107 _, exc_value, _ = sys.exc_info()
107 return exc_value
108 return exc_value
108
109
109 def show(self, type_option, id, **kwargs):
110 def show(self, type_option, id, **kwargs):
110 '''
111 '''
111 FINALIDAD:
112 FINALIDAD:
112 Funcion personalizada para una busqueda en especifico.
113 Funcion personalizada para una busqueda en especifico.
113
114
114 PARAMETROS DISPONIBLES:
115 PARAMETROS DISPONIBLES:
115 CONSULTAR: "GUIA DE SCRIPT.pdf"
116 CONSULTAR: "GUIA DE SCRIPT.pdf"
116
117
117 ESTRUCTURA:
118 ESTRUCTURA:
118 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
119 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
119 '''
120 '''
120 if type(type_option) is str:
121 if type(type_option) is str:
121 try:
122 try:
122 if type_option == 'dataset':
123 if type_option == 'dataset':
123 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
124 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
124 elif type_option == 'resource':
125 elif type_option == 'resource':
125 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
126 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
126 elif type_option == 'project':
127 elif type_option == 'project':
127 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
128 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
128 elif type_option == 'collaborator':
129 elif type_option == 'collaborator':
129 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
130 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
130 elif type_option == 'member':
131 elif type_option == 'member':
131 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
132 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
132 elif type_option == 'vocabulary':
133 elif type_option == 'vocabulary':
133 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
134 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
134 elif type_option == 'tag':
135 elif type_option == 'tag':
135 if not 'vocabulary_id' in kwargs:
136 if not 'vocabulary_id' in kwargs:
136 print('Missing "vocabulary_id" value: assume it is a free tag')
137 print('Missing "vocabulary_id" value: assume it is a free tag')
137 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
138 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
138 elif type_option == 'user':
139 elif type_option == 'user':
139 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
140 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
140 elif type_option == 'job':
141 elif type_option == 'job':
141 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
142 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
142 else:
143 else:
143 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
144 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
144 except:
145 except:
145 _, exc_value, _ = sys.exc_info()
146 _, exc_value, _ = sys.exc_info()
146 return exc_value
147 return exc_value
147 else:
148 else:
148 return 'ERROR:: "type_option" must be a str'
149 return 'ERROR:: "type_option" must be a str'
149
150
150 def search(self, type_option, query=None, **kwargs):
151 def search(self, type_option, query=None, **kwargs):
151 '''
152 '''
152 FINALIDAD:
153 FINALIDAD:
153 Funcion personalizada para busquedas que satisfagan algun criterio.
154 Funcion personalizada para busquedas que satisfagan algun criterio.
154
155
155 PARAMETROS DISPONIBLES:
156 PARAMETROS DISPONIBLES:
156 CONSULTAR: "GUIA DE SCRIPT.pdf"
157 CONSULTAR: "GUIA DE SCRIPT.pdf"
157
158
158 ESTRUCTURA:
159 ESTRUCTURA:
159 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
160 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
160 '''
161 '''
161 if type(type_option) is str:
162 if type(type_option) is str:
162 try:
163 try:
163 if type_option == 'dataset':
164 if type_option == 'dataset':
164 key_replace = ['fq', 'fq_list', 'include_private']
165 key_replace = ['fq', 'fq_list', 'include_private']
165 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
166 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
166 for key1, value1 in kwargs.items():
167 for key1, value1 in kwargs.items():
167 if not key1 in key_replace:
168 if not key1 in key_replace:
168 if key1 in key_point:
169 if key1 in key_point:
169 self.dict[key1.replace('_', '.')] = value1
170 self.dict[key1.replace('_', '.')] = value1
170 else:
171 else:
171 self.dict[key1] = value1
172 self.dict[key1] = value1
172
173
173 if query is not None:
174 if query is not None:
174 if type(query) is dict:
175 if type(query) is dict:
175 self.dict['fq_list'] = []
176 self.dict['fq_list'] = []
176 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
177 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
177 #----------------------------------------------------#
178 #----------------------------------------------------#
178 if 'dataset_start_date' in query:
179 if 'dataset_start_date' in query:
179 if type(query['dataset_start_date']) is str:
180 if type(query['dataset_start_date']) is str:
180 try:
181 try:
181 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
182 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
182 if len(query['dataset_start_date']) != 10:
183 if len(query['dataset_start_date']) != 10:
183 return '"dataset_start_date", must be: <YYYY-MM-DD>'
184 return '"dataset_start_date", must be: <YYYY-MM-DD>'
184 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
185 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
185 self.list.append('dataset_start_date')
186 self.list.append('dataset_start_date')
186 except:
187 except:
187 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
188 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
188 else:
189 else:
189 return '"dataset_start_date" must be <str>'
190 return '"dataset_start_date" must be <str>'
190 #----------------------------------------------------#
191 #----------------------------------------------------#
191 if 'dataset_end_date' in query:
192 if 'dataset_end_date' in query:
192 if type(query['dataset_end_date']) is str:
193 if type(query['dataset_end_date']) is str:
193 try:
194 try:
194 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
195 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
195 if len(query['dataset_end_date']) != 10:
196 if len(query['dataset_end_date']) != 10:
196 return '"dataset_end_date", must be: <YYYY-MM-DD>'
197 return '"dataset_end_date", must be: <YYYY-MM-DD>'
197
198
198 if 'dataset_start_date' in query:
199 if 'dataset_start_date' in query:
199 if query['dataset_start_date'] > query['dataset_end_date']:
200 if query['dataset_start_date'] > query['dataset_end_date']:
200 return '"dataset_end_date" must be greater than "dataset_start_date"'
201 return '"dataset_end_date" must be greater than "dataset_start_date"'
201
202
202 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
203 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
203 self.list.append('dataset_end_date')
204 self.list.append('dataset_end_date')
204 except:
205 except:
205 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
206 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
206 else:
207 else:
207 return '"dataset_end_date" must be <str>'
208 return '"dataset_end_date" must be <str>'
208 #----------------------------------------------------#
209 #----------------------------------------------------#
209 if 'tags' in query:
210 if 'tags' in query:
210 if isinstance(query['tags'], (int, float, str, list)):
211 if isinstance(query['tags'], (int, float, str, list)):
211 if type(query['tags']) is list:
212 if type(query['tags']) is list:
212 for u in query['tags']:
213 for u in query['tags']:
213 self.dict['fq_list'].append('tags:"'+str(u)+'"')
214 self.dict['fq_list'].append('tags:"'+str(u)+'"')
214 else:
215 else:
215 self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"')
216 self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"')
216
217
217 self.list.append('tags')
218 self.list.append('tags')
218 else:
219 else:
219 return '"tags" must be <list> or <float> or <int> or <str>'
220 return '"tags" must be <list> or <float> or <int> or <str>'
220 #----------------------------------------------------#
221 #----------------------------------------------------#
221 for key, value in query.items():
222 for key, value in query.items():
222 if value is not None and not key in self.list:
223 if value is not None and not key in self.list:
223 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
224 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
224 else:
225 else:
225 return '"query" must be <dict>'
226 return '"query" must be <dict>'
226
227
227 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
228 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
228
229
229 elif type_option == 'resource':
230 elif type_option == 'resource':
230 for key1, value1 in kwargs.items():
231 for key1, value1 in kwargs.items():
231 if key1 != 'fields':
232 if key1 != 'fields':
232 self.dict[key1] = value1
233 self.dict[key1] = value1
233
234
234 if query is not None:
235 if query is not None:
235 if type(query) is dict:
236 if type(query) is dict:
236 #----------------------------------------------------#
237 #----------------------------------------------------#
237 if 'file_date_min' in query:
238 if 'file_date_min' in query:
238 if type(query['file_date_min']) is str:
239 if type(query['file_date_min']) is str:
239 try:
240 try:
240 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
241 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
241 if len(query['file_date_min']) != 10:
242 if len(query['file_date_min']) != 10:
242 return '"file_date_min", must be: <YYYY-MM-DD>'
243 return '"file_date_min", must be: <YYYY-MM-DD>'
243 except:
244 except:
244 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
245 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
245 else:
246 else:
246 return '"file_date_min" must be <str>'
247 return '"file_date_min" must be <str>'
247 #----------------------------------------------------#
248 #----------------------------------------------------#
248 if 'file_date_max' in query:
249 if 'file_date_max' in query:
249 if type(query['file_date_max']) is str:
250 if type(query['file_date_max']) is str:
250 try:
251 try:
251 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
252 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
252 if len(query['file_date_max']) != 10:
253 if len(query['file_date_max']) != 10:
253 return '"file_date_max", must be: <YYYY-MM-DD>'
254 return '"file_date_max", must be: <YYYY-MM-DD>'
254
255
255 if 'file_date_min' in query:
256 if 'file_date_min' in query:
256 if query['file_date_min'] > query['file_date_max']:
257 if query['file_date_min'] > query['file_date_max']:
257 return '"file_date_max" must be greater than "file_date_min"'
258 return '"file_date_max" must be greater than "file_date_min"'
258 except:
259 except:
259 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
260 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
260 else:
261 else:
261 return '"file_date_max" must be <str>'
262 return '"file_date_max" must be <str>'
262 #----------------------------------------------------#
263 #----------------------------------------------------#
263 self.dict['query'] = query
264 self.dict['query'] = query
264 else:
265 else:
265 return '"query" must be <dict>'
266 return '"query" must be <dict>'
266 return getattr(self.ckan.action, 'resources_search')(**self.dict)
267 return getattr(self.ckan.action, 'resources_search')(**self.dict)
267
268
268 elif type_option == 'tag':
269 elif type_option == 'tag':
269 for key1, value1 in kwargs.items():
270 for key1, value1 in kwargs.items():
270 if key1 != 'fields':
271 if key1 != 'fields':
271 self.dict[key1] = value1
272 self.dict[key1] = value1
272
273
273 if not 'vocabulary_id' in kwargs:
274 if not 'vocabulary_id' in kwargs:
274 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
275 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
275 else:
276 else:
276 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
277 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
277
278
278 if query is not None:
279 if query is not None:
279 if type(query) is dict:
280 if type(query) is dict:
280 if 'search' in query:
281 if 'search' in query:
281 if type(query['search']) is list or type(query['search']) is str:
282 if type(query['search']) is list or type(query['search']) is str:
282 self.dict['query'] = query['search']
283 self.dict['query'] = query['search']
283 else:
284 else:
284 return '"search" must be <list> or <str>'
285 return '"search" must be <list> or <str>'
285 else:
286 else:
286 return '"query" must be <dict>'
287 return '"query" must be <dict>'
287 return getattr(self.ckan.action, 'tag_search')(**self.dict)
288 return getattr(self.ckan.action, 'tag_search')(**self.dict)
288
289
289 else:
290 else:
290 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
291 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
291
292
292 except:
293 except:
293 _, exc_value, _ = sys.exc_info()
294 _, exc_value, _ = sys.exc_info()
294 return exc_value
295 return exc_value
295 else:
296 else:
296 return 'ERROR:: "type_option" must be <str>'
297 return 'ERROR:: "type_option" must be <str>'
297
298
298 def create(self, type_option, select=None, **kwargs):
299 def create(self, type_option, select=None, **kwargs):
299 '''
300 '''
300 FINALIDAD:
301 FINALIDAD:
301 Funcion personalizada para crear.
302 Funcion personalizada para crear.
302
303
303 PARAMETROS DISPONIBLES:
304 PARAMETROS DISPONIBLES:
304 CONSULTAR: "GUIA DE SCRIPT.pdf"
305 CONSULTAR: "GUIA DE SCRIPT.pdf"
305
306
306 ESTRUCTURA:
307 ESTRUCTURA:
307 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
308 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
308 '''
309 '''
309 if type(type_option) is str:
310 if type(type_option) is str:
310 try:
311 try:
311 if type_option == 'dataset':
312 if type_option == 'dataset':
312 return getattr(self.ckan.action, 'package_create')(**kwargs)
313 return getattr(self.ckan.action, 'package_create')(**kwargs)
313 if type_option == 'resource':
314 if type_option == 'resource':
314 return resource.resource_create(self, **kwargs)
315 return resource.resource_create(self, **kwargs)
315 elif type_option == 'project':
316 elif type_option == 'project':
316 return getattr(self.ckan.action, 'organization_create')(**kwargs)
317 return getattr(self.ckan.action, 'organization_create')(**kwargs)
317 elif type_option == 'member':
318 elif type_option == 'member':
318 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
319 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
319 elif type_option == 'collaborator':
320 elif type_option == 'collaborator':
320 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
321 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
321 elif type_option == 'vocabulary':
322 elif type_option == 'vocabulary':
322 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
323 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
323 elif type_option == 'tag':
324 elif type_option == 'tag':
324 return getattr(self.ckan.action, 'tag_create')(**kwargs)
325 return getattr(self.ckan.action, 'tag_create')(**kwargs)
325 elif type_option == 'user':
326 elif type_option == 'user':
326 return getattr(self.ckan.action, 'user_create')(**kwargs)
327 return getattr(self.ckan.action, 'user_create')(**kwargs)
327 elif type_option == 'views':
328 elif type_option == 'views':
328 if 'resource' == select:
329 if 'resource' == select:
329 self.list = ['package']
330 self.list = ['package']
330 for key1, value1 in kwargs.items():
331 for key1, value1 in kwargs.items():
331 if not key1 in self.list:
332 if not key1 in self.list:
332 self.dict[key1] = value1
333 self.dict[key1] = value1
333 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
334 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
334 elif 'dataset' == select:
335 elif 'dataset' == select:
335 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
336 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
336 else:
337 else:
337 return 'ERROR:: "select = %s" is not accepted' % (select)
338 return 'ERROR:: "select = %s" is not accepted' % (select)
338 else:
339 else:
339 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
340 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
340 except:
341 except:
341 _, exc_value, _ = sys.exc_info()
342 _, exc_value, _ = sys.exc_info()
342 return exc_value
343 return exc_value
343 else:
344 else:
344 return 'ERROR:: "type_option" must be <str>'
345 return 'ERROR:: "type_option" must be <str>'
345
346
346 def patch(self, type_option, **kwargs):
347 def patch(self, type_option, **kwargs):
347 '''
348 '''
348 FINALIDAD:
349 FINALIDAD:
349 Funciones personalizadas para actualizar
350 Funciones personalizadas para actualizar
350
351
351 PARAMETROS DISPONIBLES:
352 PARAMETROS DISPONIBLES:
352 CONSULTAR: "GUIA DE SCRIPT.pdf"
353 CONSULTAR: "GUIA DE SCRIPT.pdf"
353
354
354 ESTRUCTURA:
355 ESTRUCTURA:
355 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
356 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
356 '''
357 '''
357 if type(type_option) is str:
358 if type(type_option) is str:
358 try:
359 try:
359 if type_option == 'dataset':
360 if type_option == 'dataset':
360 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
361 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
361 return getattr(self.ckan.action, 'package_patch')(**kwargs)
362 return getattr(self.ckan.action, 'package_patch')(**kwargs)
362 elif type_option == 'project':
363 elif type_option == 'project':
363 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
364 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
364 elif type_option == 'resource':
365 elif type_option == 'resource':
365 return resource.resource_patch(self, **kwargs)
366 return resource.resource_patch(self, **kwargs)
366 elif type_option == 'member':
367 elif type_option == 'member':
367 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
368 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
368 elif type_option == 'collaborator':
369 elif type_option == 'collaborator':
369 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
370 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
370 else:
371 else:
371 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
372 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
372 except:
373 except:
373 _, exc_value, _ = sys.exc_info()
374 _, exc_value, _ = sys.exc_info()
374 return exc_value
375 return exc_value
375 else:
376 else:
376 return 'ERROR:: "type_option" must be <str>'
377 return 'ERROR:: "type_option" must be <str>'
377
378
378 def delete(self, type_option, select=None, **kwargs):
379 def delete(self, type_option, select=None, **kwargs):
379 '''
380 '''
380 FINALIDAD:
381 FINALIDAD:
381 Función personalizada para eliminar y/o purgar.
382 Función personalizada para eliminar y/o purgar.
382
383
383 PARAMETROS DISPONIBLES:
384 PARAMETROS DISPONIBLES:
384 CONSULTAR: "GUIA DE SCRIPT.pdf"
385 CONSULTAR: "GUIA DE SCRIPT.pdf"
385
386
386 ESTRUCTURA:
387 ESTRUCTURA:
387 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
388 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
388 '''
389 '''
389 if type(type_option) is str:
390 if type(type_option) is str:
390 try:
391 try:
391 if type_option == 'dataset':
392 if type_option == 'dataset':
392 if select is None:
393 if select is None:
393 return 'ERROR:: "select" must not be "None"'
394 return 'ERROR:: "select" must not be "None"'
394 else:
395 else:
395 if 'delete' == select:
396 if 'delete' == select:
396 return getattr(self.ckan.action, 'package_delete')(**kwargs)
397 return getattr(self.ckan.action, 'package_delete')(**kwargs)
397 elif 'purge' == select:
398 elif 'purge' == select:
398 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
399 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
399 else:
400 else:
400 return 'ERROR:: "select = %s" is not accepted' % (select)
401 return 'ERROR:: "select = %s" is not accepted' % (select)
401 elif type_option == 'project':
402 elif type_option == 'project':
402 if select is None:
403 if select is None:
403 return 'ERROR:: "select" must not be "None"'
404 return 'ERROR:: "select" must not be "None"'
404 else:
405 else:
405 if 'delete' == select:
406 if 'delete' == select:
406 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
407 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
407 elif 'purge' == select:
408 elif 'purge' == select:
408 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
409 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
409 else:
410 else:
410 return 'ERROR:: "select = %s" is not accepted' % (select)
411 return 'ERROR:: "select = %s" is not accepted' % (select)
411 elif type_option == 'resource':
412 elif type_option == 'resource':
412 if select is None:
413 if select is None:
413 return 'ERROR:: "select" must not be "None"'
414 return 'ERROR:: "select" must not be "None"'
414 else:
415 else:
415 return resource.resource_delete(self, select, **kwargs)
416 return resource.resource_delete(self, select, **kwargs)
416 elif type_option == 'vocabulary':
417 elif type_option == 'vocabulary':
417 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
418 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
418 elif type_option == 'tag':
419 elif type_option == 'tag':
419 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
420 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
420 elif type_option == 'user':
421 elif type_option == 'user':
421 return getattr(self.ckan.action, 'user_delete')(**kwargs)
422 return getattr(self.ckan.action, 'user_delete')(**kwargs)
422 else:
423 else:
423 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
424 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
424 except:
425 except:
425 _, exc_value, _ = sys.exc_info()
426 _, exc_value, _ = sys.exc_info()
426 return exc_value
427 return exc_value
427 else:
428 else:
428 return 'ERROR:: "type_option" must be <str>'
429 return 'ERROR:: "type_option" must be <str>'
429
430
430 def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs):
431 def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs):
431 '''
432 '''
432 FINALIDAD:
433 FINALIDAD:
433 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
434 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
434
435
435 PARAMETROS DISPONIBLES:
436 PARAMETROS DISPONIBLES:
436 CONSULTAR: "GUIA DE SCRIPT.pdf"
437 CONSULTAR: "GUIA DE SCRIPT.pdf"
437
438
438 ESTRUCTURA:
439 ESTRUCTURA:
439 <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
440 <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
440 '''
441 '''
441 #------------------ PATH ----------------------#
442 #------------------ PATH ----------------------#
442 if isinstance(path, str):
443 if isinstance(path, str):
443 if os.path.isdir(path):
444 if os.path.isdir(path):
444 if not path.endswith(os.sep):
445 if not path.endswith(os.sep):
445 path = path + os.sep
446 path = path + os.sep
446 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
447 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
447 try:
448 try:
448 file_txt = open(test_txt, 'w')
449 file_txt = open(test_txt, 'w')
449 file_txt.close()
450 file_txt.close()
450 os.remove(test_txt)
451 os.remove(test_txt)
451 except:
452 except:
452 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
453 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
453 else:
454 else:
454 return 'ERROR:: "path" does not exist'
455 return 'ERROR:: "path" does not exist'
455 else:
456 else:
456 return 'ERROR:: "path" must be: <class "str">'
457 return 'ERROR:: "path" must be: <class "str">'
457
458
458 #------------------ PROCESSES -----------------#
459 #------------------ PROCESSES -----------------#
459 if not isinstance(processes, int):
460 if not isinstance(processes, int):
460 return 'ERROR:: "processes" must be: <class "int">'
461 return 'ERROR:: "processes" must be: <class "int">'
461
462
462 #------------------ ID OR NAME ----------------#
463 #------------------ ID OR NAME ----------------#
463 if isinstance(id, str):
464 if isinstance(id, str):
464 id = [id]
465 id = [id]
465 elif isinstance(id, list):
466 elif isinstance(id, list):
466 id = list(map(str, id))
467 id = list(map(str, id))
467 else:
468 else:
468 return 'ERROR:: dataset "id" must be: <class "str" or "list">'
469 return 'ERROR:: dataset "id" must be: <class "str" or "list">'
469 #----------------------------------------------#
470 #----------------------------------------------#
470 arguments = {
471 arguments = {
471 '--apikey': self.Authorization,
472 '--apikey': self.Authorization,
472 '--ckan-user': None,
473 '--ckan-user': None,
473 '--config': None,
474 '--config': None,
474 '--datapackages': path,
475 '--datapackages': path,
475 '--datastore-fields': False,
476 '--datastore-fields': False,
476 '--get-request': False,
477 '--get-request': False,
477 '--insecure': not self.verify,
478 '--insecure': not self.verify,
478 '--processes': str(processes),
479 '--processes': str(processes),
479 '--quiet': False,
480 '--quiet': False,
480 '--remote': self.url,
481 '--remote': self.url,
481 '--worker': False,
482 '--worker': False,
482 #'--log': 'log.txt',
483 #'--log': 'log.txt',
483 #'--all': False,
484 #'--all': False,
484 #'--gzip': False,
485 #'--gzip': False,
485 #'--output': None,
486 #'--output': None,
486 #'--max-records': None,
487 #'--max-records': None,
487 #'--output-json': False,
488 #'--output-json': False,
488 #'--output-jsonl': False,
489 #'--output-jsonl': False,
489 #'--create-only': False,
490 #'--create-only': False,
490 #'--help': False,
491 #'--help': False,
491 #'--input': None,
492 #'--input': None,
492 #'--input-json': False,
493 #'--input-json': False,
493 #'--start-record': '1',
494 #'--start-record': '1',
494 #'--update-only': False,
495 #'--update-only': False,
495 #'--upload-logo': False,
496 #'--upload-logo': False,
496 #'--upload-resources': False,
497 #'--upload-resources': False,
497 #'--version': False,
498 #'--version': False,
498 'ID_OR_NAME': id,
499 'ID_OR_NAME': id,
499 'datasets': True,
500 'datasets': True,
500 'dump': True,
501 'dump': True,
501 #'ACTION_NAME': None,
502 #'ACTION_NAME': None,
502 #'KEY:JSON': [],
503 #'KEY:JSON': [],
503 #'KEY=STRING': [],
504 #'KEY=STRING': [],
504 #'KEY@FILE': [],
505 #'KEY@FILE': [],
505 #'action': False,
506 #'action': False,
506 #'delete': False,
507 #'delete': False,
507 #'groups': False,
508 #'groups': False,
508 #'load': False,
509 #'load': False,
509 #'organizations': False,
510 #'organizations': False,
510 #'related': False,
511 #'related': False,
511 #'search': False,
512 #'search': False,
512 #'users': False
513 #'users': False
513 }
514 }
514 return download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
515 return download.dump_things_change(self.ckan, 'datasets', arguments, self.ua, **kwargs) No newline at end of file
@@ -1,236 +1,236
1 #from ckanapi.datapackage import populate_schema_from_datastore
1 #from ckanapi.datapackage import populate_schema_from_datastore
2 from ckanapi.cli import workers, dump
2 from ckanapi.cli import workers, dump
3 from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe
3 from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe
4 from datetime import datetime
4 from datetime import datetime
5 from tqdm import tqdm
5 from tqdm import tqdm
6 import sys
6 import sys
7 import json
7 import json
8 import os
8 import os
9 import requests
9 import requests
10 import six
10 import six
11
11
12 if sys.version_info.major == 3:
12 if sys.version_info.major == 3:
13 from urllib.parse import urlparse
13 from urllib.parse import urlparse
14 else:
14 else:
15 import urlparse
15 import urlparse
16
16
17 DL_CHUNK_SIZE = 100 * 1024
17 DL_CHUNK_SIZE = 100 * 1024
18
18
19 def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs):
19 def dump_things_change(ckan, thing, arguments, ua, worker_pool=None, stdout=None, stderr=None, **kwargs):
20 if worker_pool is None:
20 if worker_pool is None:
21 worker_pool = workers.worker_pool
21 worker_pool = workers.worker_pool
22 if stdout is None:
22 if stdout is None:
23 stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__)
23 stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__)
24 if stderr is None:
24 if stderr is None:
25 stderr = getattr(sys.stderr, 'buffer', sys.stderr)
25 stderr = getattr(sys.stderr, 'buffer', sys.stderr)
26
26
27 if arguments['--worker']:
27 if arguments['--worker']:
28 return dump.dump_things_worker(ckan, thing, arguments)
28 return dump.dump_things_worker(ckan, thing, arguments)
29 '''
29 '''
30 log = None
30 log = None
31 if arguments['--log']:
31 if arguments['--log']:
32 log = open(arguments['--log'], 'a')
32 log = open(arguments['--log'], 'a')
33 '''
33 '''
34 jsonl_output = stdout
34 jsonl_output = stdout
35 if arguments['--datapackages']:
35 if arguments['--datapackages']:
36 jsonl_output = open(os.devnull, 'wb')
36 jsonl_output = open(os.devnull, 'wb')
37
37
38 names = arguments['ID_OR_NAME']
38 names = arguments['ID_OR_NAME']
39
39
40 if names and isinstance(names[0], dict):
40 if names and isinstance(names[0], dict):
41 names = [rec.get('name',rec.get('id')) for rec in names]
41 names = [rec.get('name',rec.get('id')) for rec in names]
42 '''
42 '''
43 if arguments['--datapackages']:
43 if arguments['--datapackages']:
44 arguments['--datastore-fields'] = True
44 arguments['--datastore-fields'] = True
45 '''
45 '''
46 #----------------------------#
46 #----------------------------#
47 filtered_urls = {}
47 filtered_urls = {}
48 for val in names:
48 for val in names:
49 try:
49 try:
50 filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs)
50 filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs)
51 except:
51 except:
52 _, exc_value, _ = sys.exc_info()
52 _, exc_value, _ = sys.exc_info()
53 return exc_value
53 return exc_value
54 #----------------------------#
54 #----------------------------#
55
55
56 cmd = dump._worker_command_line(thing, arguments)
56 cmd = dump._worker_command_line(thing, arguments)
57 processes = int(arguments['--processes'])
57 processes = int(arguments['--processes'])
58 if hasattr(ckan, 'parallel_limit'):
58 if hasattr(ckan, 'parallel_limit'):
59 processes = min(processes, ckan.parallel_limit)
59 processes = min(processes, ckan.parallel_limit)
60 stats = completion_stats(processes)
60 stats = completion_stats(processes)
61 pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names))
61 pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names))
62
62
63 results = {}
63 results = {}
64 expecting_number = 0
64 expecting_number = 0
65 with quiet_int_pipe() as errors:
65 with quiet_int_pipe() as errors:
66 for job_ids, finished, result in pool:
66 for job_ids, finished, result in pool:
67 if not result:
67 if not result:
68 return 1
68 return 1
69 timestamp, error, record = json.loads(result.decode('utf-8'))
69 timestamp, error, record = json.loads(result.decode('utf-8'))
70 results[finished] = record
70 results[finished] = record
71
71
72 #----------------------------------------#
72 #----------------------------------------#
73 datapackages_path = arguments['--datapackages']
73 datapackages_path = arguments['--datapackages']
74 datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path)
74 datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path)
75 #----------------------------------------#
75 #----------------------------------------#
76 if not arguments['--quiet']:
76 if not arguments['--quiet']:
77 stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format(
77 stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format(
78 finished,
78 finished,
79 job_ids,
79 job_ids,
80 next(stats),
80 next(stats),
81 error,
81 error,
82 datapackage_dir,
82 datapackage_dir,
83 record.get('name', '') if record else '',
83 record.get('name', '') if record else '',
84 ).encode('utf-8'))
84 ).encode('utf-8'))
85 '''
85 '''
86 if log:
86 if log:
87 log.write(compact_json([
87 log.write(compact_json([
88 timestamp,
88 timestamp,
89 finished,
89 finished,
90 error,
90 error,
91 record.get('name', '') if record else None,
91 record.get('name', '') if record else None,
92 ]) + b'\n')
92 ]) + b'\n')
93 '''
93 '''
94 if datapackages_path:
94 if datapackages_path:
95 try:
95 try:
96 filter_url = filtered_urls[record.get('name', '')]
96 filter_url = filtered_urls[record.get('name', '')]
97 except:
97 except:
98 filter_url = filtered_urls[record.get('id', '')]
98 filter_url = filtered_urls[record.get('id', '')]
99 create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'])
99 create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'], ua)
100
100
101 while expecting_number in results:
101 while expecting_number in results:
102 record = results.pop(expecting_number)
102 record = results.pop(expecting_number)
103 if record:
103 if record:
104 jsonl_output.write(compact_json(record, sort_keys=True) + b'\n')
104 jsonl_output.write(compact_json(record, sort_keys=True) + b'\n')
105 expecting_number += 1
105 expecting_number += 1
106 if 'pipe' in errors:
106 if 'pipe' in errors:
107 return 1
107 return 1
108 if 'interrupt' in errors:
108 if 'interrupt' in errors:
109 return 2
109 return 2
110
110
111 def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure):
111 def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure, ua):
112 resource_formats_to_ignore = ['API', 'api']
112 resource_formats_to_ignore = ['API', 'api']
113
113
114 os.makedirs(os.path.join(datapackage_dir, 'data'))
114 os.makedirs(os.path.join(datapackage_dir, 'data'))
115 record['path'] = datapackage_dir
115 record['path'] = datapackage_dir
116
116
117 ckan_resources = []
117 ckan_resources = []
118 for resource in tqdm(record.get('resources', []), unit_scale=True):
118 for resource in tqdm(record.get('resources', []), unit_scale=True):
119 #for resource in record.get('resources', []):
119 #for resource in record.get('resources', []):
120 if resource['format'] in resource_formats_to_ignore:
120 if resource['format'] in resource_formats_to_ignore:
121 continue
121 continue
122
122
123 if not {'name': resource['name'], 'url': resource['url']} in filtered_url:
123 if not {'name': resource['name'], 'url': resource['url']} in filtered_url:
124 continue
124 continue
125
125
126 if len(resource['url']) == 0:
126 if len(resource['url']) == 0:
127 continue
127 continue
128
128
129 filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource')
129 filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource')
130 resource['path'] = os.path.join(datapackage_dir, 'data', filename)
130 resource['path'] = os.path.join(datapackage_dir, 'data', filename)
131
131
132 cres = create_resource_change(resource, stderr, apikey, host_url, insecure)
132 cres = create_resource_change(resource, stderr, apikey, host_url, insecure, ua)
133 if not cres:
133 if not cres:
134 continue
134 continue
135 '''
135 '''
136 #----------------------------------------#
136 #----------------------------------------#
137 dres = {'path': os.path.join('data', filename),
137 dres = {'path': os.path.join('data', filename),
138 'description': cres.get('description', ''),
138 'description': cres.get('description', ''),
139 'format': cres.get('format', ''),
139 'format': cres.get('format', ''),
140 'name': cres.get('name', ''),
140 'name': cres.get('name', ''),
141 'title': cres.get('name', '').title()}
141 'title': cres.get('name', '').title()}
142 #----------------------------------------#
142 #----------------------------------------#
143 populate_schema_from_datastore(cres, dres)
143 populate_schema_from_datastore(cres, dres)
144 '''
144 '''
145 ckan_resources.append(resource)
145 ckan_resources.append(resource)
146
146
147 dataset = dict(record, resources=ckan_resources)
147 dataset = dict(record, resources=ckan_resources)
148 datapackage = dataset_to_datapackage_change(dataset)
148 datapackage = dataset_to_datapackage_change(dataset)
149
149
150 json_path = os.path.join(datapackage_dir, 'datapackage.json')
150 json_path = os.path.join(datapackage_dir, 'datapackage.json')
151 with open(json_path, 'wb') as out:
151 with open(json_path, 'wb') as out:
152 out.write(pretty_json(datapackage))
152 out.write(pretty_json(datapackage))
153
153
154 return datapackage_dir, datapackage, json_path
154 return datapackage_dir, datapackage, json_path
155
155
156 def create_resource_change(resource, stderr, apikey, host_url, insecure):
156 def create_resource_change(resource, stderr, apikey, host_url, insecure, ua):
157 # ---------- REPLACE URL --------- #
157 # ---------- REPLACE URL --------- #
158 if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe':
158 if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe':
159 resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc,
159 resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc,
160 urlparse(host_url).scheme + '://' + urlparse(host_url).netloc)
160 urlparse(host_url).scheme + '://' + urlparse(host_url).netloc)
161 #----------------------------------#
161 #----------------------------------#
162 try:
162 try:
163 r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure)
163 r = requests.get(resource['url'], headers={'Authorization': apikey, 'User-Agent': ua}, stream=True, verify=not insecure)
164 #---------------------------------------#
164 #---------------------------------------#
165 try:
165 try:
166 r.raise_for_status()
166 r.raise_for_status()
167 except requests.exceptions.HTTPError as e:
167 except requests.exceptions.HTTPError as e:
168 return False
168 return False
169 #---------------------------------------#
169 #---------------------------------------#
170 with open(resource['path'], 'wb') as f:
170 with open(resource['path'], 'wb') as f:
171 for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE):
171 for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE):
172 if chunk:
172 if chunk:
173 f.write(chunk)
173 f.write(chunk)
174
174
175 except requests.ConnectionError:
175 except requests.ConnectionError:
176 stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8'))
176 stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8'))
177 except requests.exceptions.RequestException as e:
177 except requests.exceptions.RequestException as e:
178 stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
178 stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
179 except Exception as e:
179 except Exception as e:
180 stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
180 stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
181 return resource
181 return resource
182
182
183 def dataset_to_datapackage_change(dataset_dict):
183 def dataset_to_datapackage_change(dataset_dict):
184 dp = {'name': dataset_dict['name'],
184 dp = {'name': dataset_dict['name'],
185 'id': dataset_dict['id'],
185 'id': dataset_dict['id'],
186 'path': dataset_dict['path'],
186 'path': dataset_dict['path'],
187 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")}
187 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")}
188
188
189 resources = dataset_dict.get('resources')
189 resources = dataset_dict.get('resources')
190 if resources:
190 if resources:
191 dp['resources'] = [convert_to_datapackage_resource_change(r)
191 dp['resources'] = [convert_to_datapackage_resource_change(r)
192 for r in resources]
192 for r in resources]
193 return dp
193 return dp
194
194
195 def convert_to_datapackage_resource_change(resource_dict):
195 def convert_to_datapackage_resource_change(resource_dict):
196 resource = {}
196 resource = {}
197
197
198 if resource_dict.get('id'):
198 if resource_dict.get('id'):
199 resource['id'] = resource_dict['id']
199 resource['id'] = resource_dict['id']
200
200
201 if resource_dict.get('name'):
201 if resource_dict.get('name'):
202 resource['name'] = resource_dict['name']
202 resource['name'] = resource_dict['name']
203
203
204 if resource_dict.get('path'):
204 if resource_dict.get('path'):
205 if os.path.isfile(resource_dict['path']):
205 if os.path.isfile(resource_dict['path']):
206 resource['path'] = resource_dict['path']
206 resource['path'] = resource_dict['path']
207 else:
207 else:
208 resource['url'] = resource_dict['url']
208 resource['url'] = resource_dict['url']
209
209
210 schema = resource_dict.get('schema')
210 schema = resource_dict.get('schema')
211 if isinstance(schema, six.string_types):
211 if isinstance(schema, six.string_types):
212 try:
212 try:
213 resource['schema'] = json.loads(schema)
213 resource['schema'] = json.loads(schema)
214 except ValueError:
214 except ValueError:
215 resource['schema'] = schema
215 resource['schema'] = schema
216 elif isinstance(schema, dict):
216 elif isinstance(schema, dict):
217 resource['schema'] = schema
217 resource['schema'] = schema
218 return resource
218 return resource
219
219
220 def name_no_repetition(name, dir, option=''):
220 def name_no_repetition(name, dir, option=''):
221 count = 0
221 count = 0
222 while True:
222 while True:
223 count = count + 1
223 count = count + 1
224 if not os.path.exists(os.path.join(dir, name)):
224 if not os.path.exists(os.path.join(dir, name)):
225 if option == 'resource':
225 if option == 'resource':
226 return name
226 return name
227 else:
227 else:
228 return os.path.join(dir, name)
228 return os.path.join(dir, name)
229
229
230 elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)):
230 elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)):
231 if option == 'resource':
231 if option == 'resource':
232 return '('+str(count)+')'+name
232 return '('+str(count)+')'+name
233 else:
233 else:
234 return os.path.join(dir, '('+str(count)+')'+name)
234 return os.path.join(dir, '('+str(count)+')'+name)
235 else:
235 else:
236 pass No newline at end of file
236 pass
@@ -1,17 +1,17
1 # encoding: utf-8
1 # encoding: utf-8
2 from setuptools import setup
2 from setuptools import setup
3
3
4 setup(
4 setup(
5 name = "jrodb",
5 name = "jrodb",
6 version = "2.9.2.0",
6 version = "2.9.2.1",
7 description = "Data Repository - JRO",
7 description = "Data Repository - JRO",
8 author = "Edson Ynilupu Mattos",
8 author = "Edson Ynilupu Mattos",
9 author_email = "eynilupu@igp.gob.pe",
9 author_email = "eynilupu@igp.gob.pe",
10 url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente",
10 url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente",
11 packages = ["jrodb"],
11 packages = ["jrodb"],
12 install_requires = [
12 install_requires = [
13 "ckanapi==4.7",
13 "ckanapi==4.7",
14 "requests",
14 "requests",
15 "tqdm"
15 "tqdm"
16 ],
16 ],
17 ) No newline at end of file
17 )
General Comments 0
You need to be logged in to leave comments. Login now