@@ -1,514 +1,515 | |||||
1 | from ckanapi import RemoteCKAN |
|
1 | from ckanapi import RemoteCKAN | |
2 | from datetime import datetime |
|
2 | from datetime import datetime | |
3 | from jrodb import download |
|
3 | from jrodb import download | |
4 | from jrodb import resource |
|
4 | from jrodb import resource | |
5 | #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError |
|
5 | #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError | |
6 | import sys |
|
6 | import sys | |
7 | import platform |
|
7 | import platform | |
8 | import os |
|
8 | import os | |
9 | import requests |
|
9 | import requests | |
10 |
|
10 | |||
11 | class Api(): |
|
11 | class Api(): | |
12 | """ |
|
12 | """ | |
13 | FINALIDAD: |
|
13 | FINALIDAD: | |
14 | Script para administrar y obtener la data del repositorio por medio de APIs. |
|
14 | Script para administrar y obtener la data del repositorio por medio de APIs. | |
15 |
|
15 | |||
16 | REQUISITIOS PREVIOS: |
|
16 | REQUISITIOS PREVIOS: | |
17 | - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado: |
|
17 | - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado: | |
18 | - Paso 2: Instalar los siguientes paquetes: |
|
18 | - Paso 2: Instalar los siguientes paquetes: | |
19 | En Python 2 |
|
19 | En Python 2 | |
20 | - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb |
|
20 | - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb | |
21 | En Python 3 |
|
21 | En Python 3 | |
22 | - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb |
|
22 | - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb | |
23 |
|
23 | |||
24 | FUNCIONES DISPONIBLES: |
|
24 | FUNCIONES DISPONIBLES: | |
25 | - action |
|
25 | - action | |
26 | - show |
|
26 | - show | |
27 | - search |
|
27 | - search | |
28 | - create |
|
28 | - create | |
29 | - patch |
|
29 | - patch | |
30 | - delete |
|
30 | - delete | |
31 | - download |
|
31 | - download | |
32 |
|
32 | |||
33 | EJEMPLOS: |
|
33 | EJEMPLOS: | |
34 | #1: |
|
34 | #1: | |
35 | with Api('http://demo.example.com', Authorization='#########') as <access_name>: |
|
35 | with Api('http://demo.example.com', Authorization='#########') as <access_name>: | |
36 | ... some operation(s) ... |
|
36 | ... some operation(s) ... | |
37 | #2: |
|
37 | #2: | |
38 | <access_name> = Api('http://example.com', Authorization='#########') |
|
38 | <access_name> = Api('http://example.com', Authorization='#########') | |
39 | ... some operation(s) ... |
|
39 | ... some operation(s) ... | |
40 | <access_name>.ckan.close() |
|
40 | <access_name>.ckan.close() | |
41 |
|
41 | |||
42 | REPORTAR ALGUN PROBLEMA: |
|
42 | REPORTAR ALGUN PROBLEMA: | |
43 | Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos: |
|
43 | Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos: | |
44 | 1) Correo para contactarlo |
|
44 | 1) Correo para contactarlo | |
45 | 2) Descripcion del problema |
|
45 | 2) Descripcion del problema | |
46 | 3) ¿En que paso o seccion encontro el problema? |
|
46 | 3) ¿En que paso o seccion encontro el problema? | |
47 | 4) ¿Cual era el resultado que usted esperaba? |
|
47 | 4) ¿Cual era el resultado que usted esperaba? | |
48 | """ |
|
48 | """ | |
49 | def __init__(self, url, Authorization=None, secure=True): |
|
49 | def __init__(self, url, Authorization=None, secure=True): | |
50 | #-------- Check Secure -------# |
|
50 | #-------- Check Secure -------# | |
51 | self.verify = secure |
|
51 | self.verify = secure | |
52 | if not secure and isinstance(secure, bool): |
|
52 | if not secure and isinstance(secure, bool): | |
53 | session = requests.Session() |
|
53 | session = requests.Session() | |
54 | session.verify = False |
|
54 | session.verify = False | |
55 | else: |
|
55 | else: | |
56 | session = None |
|
56 | session = None | |
57 | #------------------------------# |
|
57 | #------------------------------# | |
58 | self.url = url |
|
58 | self.url = url | |
59 | ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')' |
|
59 | #ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')' | |
|
60 | self.ua = 'CKAN_JRO/2.9.2 (+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente)' | |||
60 | #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' |
|
61 | #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' | |
61 | self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session) |
|
62 | self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=self.ua, session=session) | |
62 | #self.ckan = RemoteCKAN(self.url, apikey=Authorization) |
|
63 | #self.ckan = RemoteCKAN(self.url, apikey=Authorization) | |
63 | self.Authorization = Authorization |
|
64 | self.Authorization = Authorization | |
64 | # Change for --> self.separator = os.sep |
|
65 | # Change for --> self.separator = os.sep | |
65 | if platform.system() == 'Windows': |
|
66 | if platform.system() == 'Windows': | |
66 | self.separator = '\\' |
|
67 | self.separator = '\\' | |
67 | else: |
|
68 | else: | |
68 | self.separator = '/' |
|
69 | self.separator = '/' | |
69 |
|
70 | |||
70 | self.chunk_size = 1024 |
|
71 | self.chunk_size = 1024 | |
71 | self.list = [] |
|
72 | self.list = [] | |
72 | self.dict = {} |
|
73 | self.dict = {} | |
73 | self.str = '' |
|
74 | self.str = '' | |
74 | self.check = 1 |
|
75 | self.check = 1 | |
75 | self.cont = 0 |
|
76 | self.cont = 0 | |
76 |
|
77 | |||
77 | def __enter__(self): |
|
78 | def __enter__(self): | |
78 | return self |
|
79 | return self | |
79 |
|
80 | |||
80 | def __exit__(self, *args): |
|
81 | def __exit__(self, *args): | |
81 | self.ckan.close() |
|
82 | self.ckan.close() | |
82 |
|
83 | |||
83 | def action(self, action, **kwargs): |
|
84 | def action(self, action, **kwargs): | |
84 | """ |
|
85 | """ | |
85 | FINALIDAD: |
|
86 | FINALIDAD: | |
86 | Funcion para llamar a las APIs disponibles |
|
87 | Funcion para llamar a las APIs disponibles | |
87 |
|
88 | |||
88 | APIs DISPONIBLES: |
|
89 | APIs DISPONIBLES: | |
89 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
90 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
90 |
|
91 | |||
91 | EJEMPLO: |
|
92 | EJEMPLO: | |
92 | <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...) |
|
93 | <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...) | |
93 | """ |
|
94 | """ | |
94 | #--------------- CASE: PACKAGE SEARCH ---------------# |
|
95 | #--------------- CASE: PACKAGE SEARCH ---------------# | |
95 | if kwargs is not None: |
|
96 | if kwargs is not None: | |
96 | if action == 'package_search': |
|
97 | if action == 'package_search': | |
97 | self.list = ['facet_mincount', 'facet_limit', 'facet_field'] |
|
98 | self.list = ['facet_mincount', 'facet_limit', 'facet_field'] | |
98 | for facet in self.list: |
|
99 | for facet in self.list: | |
99 | if facet in kwargs: |
|
100 | if facet in kwargs: | |
100 | kwargs[facet.replace('_', '.')] = kwargs[facet] |
|
101 | kwargs[facet.replace('_', '.')] = kwargs[facet] | |
101 | kwargs.pop(facet) |
|
102 | kwargs.pop(facet) | |
102 | #----------------------------------------------------# |
|
103 | #----------------------------------------------------# | |
103 | try: |
|
104 | try: | |
104 | return getattr(self.ckan.action, action)(**kwargs) |
|
105 | return getattr(self.ckan.action, action)(**kwargs) | |
105 | except: |
|
106 | except: | |
106 | _, exc_value, _ = sys.exc_info() |
|
107 | _, exc_value, _ = sys.exc_info() | |
107 | return exc_value |
|
108 | return exc_value | |
108 |
|
109 | |||
109 | def show(self, type_option, id, **kwargs): |
|
110 | def show(self, type_option, id, **kwargs): | |
110 | ''' |
|
111 | ''' | |
111 | FINALIDAD: |
|
112 | FINALIDAD: | |
112 | Funcion personalizada para una busqueda en especifico. |
|
113 | Funcion personalizada para una busqueda en especifico. | |
113 |
|
114 | |||
114 | PARAMETROS DISPONIBLES: |
|
115 | PARAMETROS DISPONIBLES: | |
115 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
116 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
116 |
|
117 | |||
117 | ESTRUCTURA: |
|
118 | ESTRUCTURA: | |
118 | <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
119 | <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...) | |
119 | ''' |
|
120 | ''' | |
120 | if type(type_option) is str: |
|
121 | if type(type_option) is str: | |
121 | try: |
|
122 | try: | |
122 | if type_option == 'dataset': |
|
123 | if type_option == 'dataset': | |
123 | return getattr(self.ckan.action, 'package_show')(id=id, **kwargs) |
|
124 | return getattr(self.ckan.action, 'package_show')(id=id, **kwargs) | |
124 | elif type_option == 'resource': |
|
125 | elif type_option == 'resource': | |
125 | return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs) |
|
126 | return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs) | |
126 | elif type_option == 'project': |
|
127 | elif type_option == 'project': | |
127 | return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs) |
|
128 | return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs) | |
128 | elif type_option == 'collaborator': |
|
129 | elif type_option == 'collaborator': | |
129 | return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs) |
|
130 | return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs) | |
130 | elif type_option == 'member': |
|
131 | elif type_option == 'member': | |
131 | return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs) |
|
132 | return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs) | |
132 | elif type_option == 'vocabulary': |
|
133 | elif type_option == 'vocabulary': | |
133 | return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs) |
|
134 | return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs) | |
134 | elif type_option == 'tag': |
|
135 | elif type_option == 'tag': | |
135 | if not 'vocabulary_id' in kwargs: |
|
136 | if not 'vocabulary_id' in kwargs: | |
136 | print('Missing "vocabulary_id" value: assume it is a free tag') |
|
137 | print('Missing "vocabulary_id" value: assume it is a free tag') | |
137 | return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs) |
|
138 | return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs) | |
138 | elif type_option == 'user': |
|
139 | elif type_option == 'user': | |
139 | return getattr(self.ckan.action, 'user_show')(id=id, **kwargs) |
|
140 | return getattr(self.ckan.action, 'user_show')(id=id, **kwargs) | |
140 | elif type_option == 'job': |
|
141 | elif type_option == 'job': | |
141 | return getattr(self.ckan.action, 'job_show')(id=id, **kwargs) |
|
142 | return getattr(self.ckan.action, 'job_show')(id=id, **kwargs) | |
142 | else: |
|
143 | else: | |
143 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
144 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) | |
144 | except: |
|
145 | except: | |
145 | _, exc_value, _ = sys.exc_info() |
|
146 | _, exc_value, _ = sys.exc_info() | |
146 | return exc_value |
|
147 | return exc_value | |
147 | else: |
|
148 | else: | |
148 | return 'ERROR:: "type_option" must be a str' |
|
149 | return 'ERROR:: "type_option" must be a str' | |
149 |
|
150 | |||
150 | def search(self, type_option, query=None, **kwargs): |
|
151 | def search(self, type_option, query=None, **kwargs): | |
151 | ''' |
|
152 | ''' | |
152 | FINALIDAD: |
|
153 | FINALIDAD: | |
153 | Funcion personalizada para busquedas que satisfagan algun criterio. |
|
154 | Funcion personalizada para busquedas que satisfagan algun criterio. | |
154 |
|
155 | |||
155 | PARAMETROS DISPONIBLES: |
|
156 | PARAMETROS DISPONIBLES: | |
156 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
157 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
157 |
|
158 | |||
158 | ESTRUCTURA: |
|
159 | ESTRUCTURA: | |
159 | <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...) |
|
160 | <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...) | |
160 | ''' |
|
161 | ''' | |
161 | if type(type_option) is str: |
|
162 | if type(type_option) is str: | |
162 | try: |
|
163 | try: | |
163 | if type_option == 'dataset': |
|
164 | if type_option == 'dataset': | |
164 | key_replace = ['fq', 'fq_list', 'include_private'] |
|
165 | key_replace = ['fq', 'fq_list', 'include_private'] | |
165 | key_point = ['facet_mincount', 'facet_limit', 'facet_field'] |
|
166 | key_point = ['facet_mincount', 'facet_limit', 'facet_field'] | |
166 | for key1, value1 in kwargs.items(): |
|
167 | for key1, value1 in kwargs.items(): | |
167 | if not key1 in key_replace: |
|
168 | if not key1 in key_replace: | |
168 | if key1 in key_point: |
|
169 | if key1 in key_point: | |
169 | self.dict[key1.replace('_', '.')] = value1 |
|
170 | self.dict[key1.replace('_', '.')] = value1 | |
170 | else: |
|
171 | else: | |
171 | self.dict[key1] = value1 |
|
172 | self.dict[key1] = value1 | |
172 |
|
173 | |||
173 | if query is not None: |
|
174 | if query is not None: | |
174 | if type(query) is dict: |
|
175 | if type(query) is dict: | |
175 | self.dict['fq_list'] = [] |
|
176 | self.dict['fq_list'] = [] | |
176 | #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX |
|
177 | #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX | |
177 | #----------------------------------------------------# |
|
178 | #----------------------------------------------------# | |
178 | if 'dataset_start_date' in query: |
|
179 | if 'dataset_start_date' in query: | |
179 | if type(query['dataset_start_date']) is str: |
|
180 | if type(query['dataset_start_date']) is str: | |
180 | try: |
|
181 | try: | |
181 | datetime.strptime(query['dataset_start_date'], '%Y-%m-%d') |
|
182 | datetime.strptime(query['dataset_start_date'], '%Y-%m-%d') | |
182 | if len(query['dataset_start_date']) != 10: |
|
183 | if len(query['dataset_start_date']) != 10: | |
183 | return '"dataset_start_date", must be: <YYYY-MM-DD>' |
|
184 | return '"dataset_start_date", must be: <YYYY-MM-DD>' | |
184 | self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"') |
|
185 | self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"') | |
185 | self.list.append('dataset_start_date') |
|
186 | self.list.append('dataset_start_date') | |
186 | except: |
|
187 | except: | |
187 | return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date']) |
|
188 | return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date']) | |
188 | else: |
|
189 | else: | |
189 | return '"dataset_start_date" must be <str>' |
|
190 | return '"dataset_start_date" must be <str>' | |
190 | #----------------------------------------------------# |
|
191 | #----------------------------------------------------# | |
191 | if 'dataset_end_date' in query: |
|
192 | if 'dataset_end_date' in query: | |
192 | if type(query['dataset_end_date']) is str: |
|
193 | if type(query['dataset_end_date']) is str: | |
193 | try: |
|
194 | try: | |
194 | datetime.strptime(query['dataset_end_date'], '%Y-%m-%d') |
|
195 | datetime.strptime(query['dataset_end_date'], '%Y-%m-%d') | |
195 | if len(query['dataset_end_date']) != 10: |
|
196 | if len(query['dataset_end_date']) != 10: | |
196 | return '"dataset_end_date", must be: <YYYY-MM-DD>' |
|
197 | return '"dataset_end_date", must be: <YYYY-MM-DD>' | |
197 |
|
198 | |||
198 | if 'dataset_start_date' in query: |
|
199 | if 'dataset_start_date' in query: | |
199 | if query['dataset_start_date'] > query['dataset_end_date']: |
|
200 | if query['dataset_start_date'] > query['dataset_end_date']: | |
200 | return '"dataset_end_date" must be greater than "dataset_start_date"' |
|
201 | return '"dataset_end_date" must be greater than "dataset_start_date"' | |
201 |
|
202 | |||
202 | self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"') |
|
203 | self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"') | |
203 | self.list.append('dataset_end_date') |
|
204 | self.list.append('dataset_end_date') | |
204 | except: |
|
205 | except: | |
205 | return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date']) |
|
206 | return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date']) | |
206 | else: |
|
207 | else: | |
207 | return '"dataset_end_date" must be <str>' |
|
208 | return '"dataset_end_date" must be <str>' | |
208 | #----------------------------------------------------# |
|
209 | #----------------------------------------------------# | |
209 | if 'tags' in query: |
|
210 | if 'tags' in query: | |
210 | if isinstance(query['tags'], (int, float, str, list)): |
|
211 | if isinstance(query['tags'], (int, float, str, list)): | |
211 | if type(query['tags']) is list: |
|
212 | if type(query['tags']) is list: | |
212 | for u in query['tags']: |
|
213 | for u in query['tags']: | |
213 | self.dict['fq_list'].append('tags:"'+str(u)+'"') |
|
214 | self.dict['fq_list'].append('tags:"'+str(u)+'"') | |
214 | else: |
|
215 | else: | |
215 | self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"') |
|
216 | self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"') | |
216 |
|
217 | |||
217 | self.list.append('tags') |
|
218 | self.list.append('tags') | |
218 | else: |
|
219 | else: | |
219 | return '"tags" must be <list> or <float> or <int> or <str>' |
|
220 | return '"tags" must be <list> or <float> or <int> or <str>' | |
220 | #----------------------------------------------------# |
|
221 | #----------------------------------------------------# | |
221 | for key, value in query.items(): |
|
222 | for key, value in query.items(): | |
222 | if value is not None and not key in self.list: |
|
223 | if value is not None and not key in self.list: | |
223 | self.dict['fq_list'].append(str(key)+':"'+str(value)+'"') |
|
224 | self.dict['fq_list'].append(str(key)+':"'+str(value)+'"') | |
224 | else: |
|
225 | else: | |
225 | return '"query" must be <dict>' |
|
226 | return '"query" must be <dict>' | |
226 |
|
227 | |||
227 | return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict) |
|
228 | return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict) | |
228 |
|
229 | |||
229 | elif type_option == 'resource': |
|
230 | elif type_option == 'resource': | |
230 | for key1, value1 in kwargs.items(): |
|
231 | for key1, value1 in kwargs.items(): | |
231 | if key1 != 'fields': |
|
232 | if key1 != 'fields': | |
232 | self.dict[key1] = value1 |
|
233 | self.dict[key1] = value1 | |
233 |
|
234 | |||
234 | if query is not None: |
|
235 | if query is not None: | |
235 | if type(query) is dict: |
|
236 | if type(query) is dict: | |
236 | #----------------------------------------------------# |
|
237 | #----------------------------------------------------# | |
237 | if 'file_date_min' in query: |
|
238 | if 'file_date_min' in query: | |
238 | if type(query['file_date_min']) is str: |
|
239 | if type(query['file_date_min']) is str: | |
239 | try: |
|
240 | try: | |
240 | datetime.strptime(query['file_date_min'], '%Y-%m-%d') |
|
241 | datetime.strptime(query['file_date_min'], '%Y-%m-%d') | |
241 | if len(query['file_date_min']) != 10: |
|
242 | if len(query['file_date_min']) != 10: | |
242 | return '"file_date_min", must be: <YYYY-MM-DD>' |
|
243 | return '"file_date_min", must be: <YYYY-MM-DD>' | |
243 | except: |
|
244 | except: | |
244 | return '"file_date_min" incorrect: "%s"' % (query['file_date_min']) |
|
245 | return '"file_date_min" incorrect: "%s"' % (query['file_date_min']) | |
245 | else: |
|
246 | else: | |
246 | return '"file_date_min" must be <str>' |
|
247 | return '"file_date_min" must be <str>' | |
247 | #----------------------------------------------------# |
|
248 | #----------------------------------------------------# | |
248 | if 'file_date_max' in query: |
|
249 | if 'file_date_max' in query: | |
249 | if type(query['file_date_max']) is str: |
|
250 | if type(query['file_date_max']) is str: | |
250 | try: |
|
251 | try: | |
251 | datetime.strptime(query['file_date_max'], '%Y-%m-%d') |
|
252 | datetime.strptime(query['file_date_max'], '%Y-%m-%d') | |
252 | if len(query['file_date_max']) != 10: |
|
253 | if len(query['file_date_max']) != 10: | |
253 | return '"file_date_max", must be: <YYYY-MM-DD>' |
|
254 | return '"file_date_max", must be: <YYYY-MM-DD>' | |
254 |
|
255 | |||
255 | if 'file_date_min' in query: |
|
256 | if 'file_date_min' in query: | |
256 | if query['file_date_min'] > query['file_date_max']: |
|
257 | if query['file_date_min'] > query['file_date_max']: | |
257 | return '"file_date_max" must be greater than "file_date_min"' |
|
258 | return '"file_date_max" must be greater than "file_date_min"' | |
258 | except: |
|
259 | except: | |
259 | return '"file_date_max" incorrect: "%s"' % (query['file_date_max']) |
|
260 | return '"file_date_max" incorrect: "%s"' % (query['file_date_max']) | |
260 | else: |
|
261 | else: | |
261 | return '"file_date_max" must be <str>' |
|
262 | return '"file_date_max" must be <str>' | |
262 | #----------------------------------------------------# |
|
263 | #----------------------------------------------------# | |
263 | self.dict['query'] = query |
|
264 | self.dict['query'] = query | |
264 | else: |
|
265 | else: | |
265 | return '"query" must be <dict>' |
|
266 | return '"query" must be <dict>' | |
266 | return getattr(self.ckan.action, 'resources_search')(**self.dict) |
|
267 | return getattr(self.ckan.action, 'resources_search')(**self.dict) | |
267 |
|
268 | |||
268 | elif type_option == 'tag': |
|
269 | elif type_option == 'tag': | |
269 | for key1, value1 in kwargs.items(): |
|
270 | for key1, value1 in kwargs.items(): | |
270 | if key1 != 'fields': |
|
271 | if key1 != 'fields': | |
271 | self.dict[key1] = value1 |
|
272 | self.dict[key1] = value1 | |
272 |
|
273 | |||
273 | if not 'vocabulary_id' in kwargs: |
|
274 | if not 'vocabulary_id' in kwargs: | |
274 | print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary') |
|
275 | print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary') | |
275 | else: |
|
276 | else: | |
276 | print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id'])) |
|
277 | print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id'])) | |
277 |
|
278 | |||
278 | if query is not None: |
|
279 | if query is not None: | |
279 | if type(query) is dict: |
|
280 | if type(query) is dict: | |
280 | if 'search' in query: |
|
281 | if 'search' in query: | |
281 | if type(query['search']) is list or type(query['search']) is str: |
|
282 | if type(query['search']) is list or type(query['search']) is str: | |
282 | self.dict['query'] = query['search'] |
|
283 | self.dict['query'] = query['search'] | |
283 | else: |
|
284 | else: | |
284 | return '"search" must be <list> or <str>' |
|
285 | return '"search" must be <list> or <str>' | |
285 | else: |
|
286 | else: | |
286 | return '"query" must be <dict>' |
|
287 | return '"query" must be <dict>' | |
287 | return getattr(self.ckan.action, 'tag_search')(**self.dict) |
|
288 | return getattr(self.ckan.action, 'tag_search')(**self.dict) | |
288 |
|
289 | |||
289 | else: |
|
290 | else: | |
290 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
291 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) | |
291 |
|
292 | |||
292 | except: |
|
293 | except: | |
293 | _, exc_value, _ = sys.exc_info() |
|
294 | _, exc_value, _ = sys.exc_info() | |
294 | return exc_value |
|
295 | return exc_value | |
295 | else: |
|
296 | else: | |
296 | return 'ERROR:: "type_option" must be <str>' |
|
297 | return 'ERROR:: "type_option" must be <str>' | |
297 |
|
298 | |||
298 | def create(self, type_option, select=None, **kwargs): |
|
299 | def create(self, type_option, select=None, **kwargs): | |
299 | ''' |
|
300 | ''' | |
300 | FINALIDAD: |
|
301 | FINALIDAD: | |
301 | Funcion personalizada para crear. |
|
302 | Funcion personalizada para crear. | |
302 |
|
303 | |||
303 | PARAMETROS DISPONIBLES: |
|
304 | PARAMETROS DISPONIBLES: | |
304 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
305 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
305 |
|
306 | |||
306 | ESTRUCTURA: |
|
307 | ESTRUCTURA: | |
307 | <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
308 | <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) | |
308 | ''' |
|
309 | ''' | |
309 | if type(type_option) is str: |
|
310 | if type(type_option) is str: | |
310 | try: |
|
311 | try: | |
311 | if type_option == 'dataset': |
|
312 | if type_option == 'dataset': | |
312 | return getattr(self.ckan.action, 'package_create')(**kwargs) |
|
313 | return getattr(self.ckan.action, 'package_create')(**kwargs) | |
313 | if type_option == 'resource': |
|
314 | if type_option == 'resource': | |
314 | return resource.resource_create(self, **kwargs) |
|
315 | return resource.resource_create(self, **kwargs) | |
315 | elif type_option == 'project': |
|
316 | elif type_option == 'project': | |
316 | return getattr(self.ckan.action, 'organization_create')(**kwargs) |
|
317 | return getattr(self.ckan.action, 'organization_create')(**kwargs) | |
317 | elif type_option == 'member': |
|
318 | elif type_option == 'member': | |
318 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) |
|
319 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) | |
319 | elif type_option == 'collaborator': |
|
320 | elif type_option == 'collaborator': | |
320 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) |
|
321 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) | |
321 | elif type_option == 'vocabulary': |
|
322 | elif type_option == 'vocabulary': | |
322 | return getattr(self.ckan.action, 'vocabulary_create')(**kwargs) |
|
323 | return getattr(self.ckan.action, 'vocabulary_create')(**kwargs) | |
323 | elif type_option == 'tag': |
|
324 | elif type_option == 'tag': | |
324 | return getattr(self.ckan.action, 'tag_create')(**kwargs) |
|
325 | return getattr(self.ckan.action, 'tag_create')(**kwargs) | |
325 | elif type_option == 'user': |
|
326 | elif type_option == 'user': | |
326 | return getattr(self.ckan.action, 'user_create')(**kwargs) |
|
327 | return getattr(self.ckan.action, 'user_create')(**kwargs) | |
327 | elif type_option == 'views': |
|
328 | elif type_option == 'views': | |
328 | if 'resource' == select: |
|
329 | if 'resource' == select: | |
329 | self.list = ['package'] |
|
330 | self.list = ['package'] | |
330 | for key1, value1 in kwargs.items(): |
|
331 | for key1, value1 in kwargs.items(): | |
331 | if not key1 in self.list: |
|
332 | if not key1 in self.list: | |
332 | self.dict[key1] = value1 |
|
333 | self.dict[key1] = value1 | |
333 | return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict) |
|
334 | return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict) | |
334 | elif 'dataset' == select: |
|
335 | elif 'dataset' == select: | |
335 | return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs) |
|
336 | return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs) | |
336 | else: |
|
337 | else: | |
337 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
338 | return 'ERROR:: "select = %s" is not accepted' % (select) | |
338 | else: |
|
339 | else: | |
339 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
340 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) | |
340 | except: |
|
341 | except: | |
341 | _, exc_value, _ = sys.exc_info() |
|
342 | _, exc_value, _ = sys.exc_info() | |
342 | return exc_value |
|
343 | return exc_value | |
343 | else: |
|
344 | else: | |
344 | return 'ERROR:: "type_option" must be <str>' |
|
345 | return 'ERROR:: "type_option" must be <str>' | |
345 |
|
346 | |||
346 | def patch(self, type_option, **kwargs): |
|
347 | def patch(self, type_option, **kwargs): | |
347 | ''' |
|
348 | ''' | |
348 | FINALIDAD: |
|
349 | FINALIDAD: | |
349 | Funciones personalizadas para actualizar |
|
350 | Funciones personalizadas para actualizar | |
350 |
|
351 | |||
351 | PARAMETROS DISPONIBLES: |
|
352 | PARAMETROS DISPONIBLES: | |
352 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
353 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
353 |
|
354 | |||
354 | ESTRUCTURA: |
|
355 | ESTRUCTURA: | |
355 | <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
356 | <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) | |
356 | ''' |
|
357 | ''' | |
357 | if type(type_option) is str: |
|
358 | if type(type_option) is str: | |
358 | try: |
|
359 | try: | |
359 | if type_option == 'dataset': |
|
360 | if type_option == 'dataset': | |
360 | #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources |
|
361 | #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources | |
361 | return getattr(self.ckan.action, 'package_patch')(**kwargs) |
|
362 | return getattr(self.ckan.action, 'package_patch')(**kwargs) | |
362 | elif type_option == 'project': |
|
363 | elif type_option == 'project': | |
363 | return getattr(self.ckan.action, 'organization_patch')(**kwargs) |
|
364 | return getattr(self.ckan.action, 'organization_patch')(**kwargs) | |
364 | elif type_option == 'resource': |
|
365 | elif type_option == 'resource': | |
365 | return resource.resource_patch(self, **kwargs) |
|
366 | return resource.resource_patch(self, **kwargs) | |
366 | elif type_option == 'member': |
|
367 | elif type_option == 'member': | |
367 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) |
|
368 | return getattr(self.ckan.action, 'organization_member_create')(**kwargs) | |
368 | elif type_option == 'collaborator': |
|
369 | elif type_option == 'collaborator': | |
369 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) |
|
370 | return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs) | |
370 | else: |
|
371 | else: | |
371 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
372 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) | |
372 | except: |
|
373 | except: | |
373 | _, exc_value, _ = sys.exc_info() |
|
374 | _, exc_value, _ = sys.exc_info() | |
374 | return exc_value |
|
375 | return exc_value | |
375 | else: |
|
376 | else: | |
376 | return 'ERROR:: "type_option" must be <str>' |
|
377 | return 'ERROR:: "type_option" must be <str>' | |
377 |
|
378 | |||
378 | def delete(self, type_option, select=None, **kwargs): |
|
379 | def delete(self, type_option, select=None, **kwargs): | |
379 | ''' |
|
380 | ''' | |
380 | FINALIDAD: |
|
381 | FINALIDAD: | |
381 | Función personalizada para eliminar y/o purgar. |
|
382 | Función personalizada para eliminar y/o purgar. | |
382 |
|
383 | |||
383 | PARAMETROS DISPONIBLES: |
|
384 | PARAMETROS DISPONIBLES: | |
384 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
385 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
385 |
|
386 | |||
386 | ESTRUCTURA: |
|
387 | ESTRUCTURA: | |
387 | <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) |
|
388 | <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...) | |
388 | ''' |
|
389 | ''' | |
389 | if type(type_option) is str: |
|
390 | if type(type_option) is str: | |
390 | try: |
|
391 | try: | |
391 | if type_option == 'dataset': |
|
392 | if type_option == 'dataset': | |
392 | if select is None: |
|
393 | if select is None: | |
393 | return 'ERROR:: "select" must not be "None"' |
|
394 | return 'ERROR:: "select" must not be "None"' | |
394 | else: |
|
395 | else: | |
395 | if 'delete' == select: |
|
396 | if 'delete' == select: | |
396 | return getattr(self.ckan.action, 'package_delete')(**kwargs) |
|
397 | return getattr(self.ckan.action, 'package_delete')(**kwargs) | |
397 | elif 'purge' == select: |
|
398 | elif 'purge' == select: | |
398 | return getattr(self.ckan.action, 'dataset_purge')(**kwargs) |
|
399 | return getattr(self.ckan.action, 'dataset_purge')(**kwargs) | |
399 | else: |
|
400 | else: | |
400 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
401 | return 'ERROR:: "select = %s" is not accepted' % (select) | |
401 | elif type_option == 'project': |
|
402 | elif type_option == 'project': | |
402 | if select is None: |
|
403 | if select is None: | |
403 | return 'ERROR:: "select" must not be "None"' |
|
404 | return 'ERROR:: "select" must not be "None"' | |
404 | else: |
|
405 | else: | |
405 | if 'delete' == select: |
|
406 | if 'delete' == select: | |
406 | return getattr(self.ckan.action, 'organization_delete')(**kwargs) |
|
407 | return getattr(self.ckan.action, 'organization_delete')(**kwargs) | |
407 | elif 'purge' == select: |
|
408 | elif 'purge' == select: | |
408 | return getattr(self.ckan.action, 'organization_purge')(**kwargs) |
|
409 | return getattr(self.ckan.action, 'organization_purge')(**kwargs) | |
409 | else: |
|
410 | else: | |
410 | return 'ERROR:: "select = %s" is not accepted' % (select) |
|
411 | return 'ERROR:: "select = %s" is not accepted' % (select) | |
411 | elif type_option == 'resource': |
|
412 | elif type_option == 'resource': | |
412 | if select is None: |
|
413 | if select is None: | |
413 | return 'ERROR:: "select" must not be "None"' |
|
414 | return 'ERROR:: "select" must not be "None"' | |
414 | else: |
|
415 | else: | |
415 | return resource.resource_delete(self, select, **kwargs) |
|
416 | return resource.resource_delete(self, select, **kwargs) | |
416 | elif type_option == 'vocabulary': |
|
417 | elif type_option == 'vocabulary': | |
417 | return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs) |
|
418 | return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs) | |
418 | elif type_option == 'tag': |
|
419 | elif type_option == 'tag': | |
419 | return getattr(self.ckan.action, 'tag_delete')(**kwargs) |
|
420 | return getattr(self.ckan.action, 'tag_delete')(**kwargs) | |
420 | elif type_option == 'user': |
|
421 | elif type_option == 'user': | |
421 | return getattr(self.ckan.action, 'user_delete')(**kwargs) |
|
422 | return getattr(self.ckan.action, 'user_delete')(**kwargs) | |
422 | else: |
|
423 | else: | |
423 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) |
|
424 | return 'ERROR:: "type_option = %s" is not accepted' % (type_option) | |
424 | except: |
|
425 | except: | |
425 | _, exc_value, _ = sys.exc_info() |
|
426 | _, exc_value, _ = sys.exc_info() | |
426 | return exc_value |
|
427 | return exc_value | |
427 | else: |
|
428 | else: | |
428 | return 'ERROR:: "type_option" must be <str>' |
|
429 | return 'ERROR:: "type_option" must be <str>' | |
429 |
|
430 | |||
430 | def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs): |
|
431 | def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs): | |
431 | ''' |
|
432 | ''' | |
432 | FINALIDAD: |
|
433 | FINALIDAD: | |
433 | Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s). |
|
434 | Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s). | |
434 |
|
435 | |||
435 | PARAMETROS DISPONIBLES: |
|
436 | PARAMETROS DISPONIBLES: | |
436 | CONSULTAR: "GUIA DE SCRIPT.pdf" |
|
437 | CONSULTAR: "GUIA DE SCRIPT.pdf" | |
437 |
|
438 | |||
438 | ESTRUCTURA: |
|
439 | ESTRUCTURA: | |
439 | <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...) |
|
440 | <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...) | |
440 | ''' |
|
441 | ''' | |
441 | #------------------ PATH ----------------------# |
|
442 | #------------------ PATH ----------------------# | |
442 | if isinstance(path, str): |
|
443 | if isinstance(path, str): | |
443 | if os.path.isdir(path): |
|
444 | if os.path.isdir(path): | |
444 | if not path.endswith(os.sep): |
|
445 | if not path.endswith(os.sep): | |
445 | path = path + os.sep |
|
446 | path = path + os.sep | |
446 | test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt' |
|
447 | test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt' | |
447 | try: |
|
448 | try: | |
448 | file_txt = open(test_txt, 'w') |
|
449 | file_txt = open(test_txt, 'w') | |
449 | file_txt.close() |
|
450 | file_txt.close() | |
450 | os.remove(test_txt) |
|
451 | os.remove(test_txt) | |
451 | except: |
|
452 | except: | |
452 | return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path) |
|
453 | return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path) | |
453 | else: |
|
454 | else: | |
454 | return 'ERROR:: "path" does not exist' |
|
455 | return 'ERROR:: "path" does not exist' | |
455 | else: |
|
456 | else: | |
456 | return 'ERROR:: "path" must be: <class "str">' |
|
457 | return 'ERROR:: "path" must be: <class "str">' | |
457 |
|
458 | |||
458 | #------------------ PROCESSES -----------------# |
|
459 | #------------------ PROCESSES -----------------# | |
459 | if not isinstance(processes, int): |
|
460 | if not isinstance(processes, int): | |
460 | return 'ERROR:: "processes" must be: <class "int">' |
|
461 | return 'ERROR:: "processes" must be: <class "int">' | |
461 |
|
462 | |||
462 | #------------------ ID OR NAME ----------------# |
|
463 | #------------------ ID OR NAME ----------------# | |
463 | if isinstance(id, str): |
|
464 | if isinstance(id, str): | |
464 | id = [id] |
|
465 | id = [id] | |
465 | elif isinstance(id, list): |
|
466 | elif isinstance(id, list): | |
466 | id = list(map(str, id)) |
|
467 | id = list(map(str, id)) | |
467 | else: |
|
468 | else: | |
468 | return 'ERROR:: dataset "id" must be: <class "str" or "list">' |
|
469 | return 'ERROR:: dataset "id" must be: <class "str" or "list">' | |
469 | #----------------------------------------------# |
|
470 | #----------------------------------------------# | |
470 | arguments = { |
|
471 | arguments = { | |
471 | '--apikey': self.Authorization, |
|
472 | '--apikey': self.Authorization, | |
472 | '--ckan-user': None, |
|
473 | '--ckan-user': None, | |
473 | '--config': None, |
|
474 | '--config': None, | |
474 | '--datapackages': path, |
|
475 | '--datapackages': path, | |
475 | '--datastore-fields': False, |
|
476 | '--datastore-fields': False, | |
476 | '--get-request': False, |
|
477 | '--get-request': False, | |
477 | '--insecure': not self.verify, |
|
478 | '--insecure': not self.verify, | |
478 | '--processes': str(processes), |
|
479 | '--processes': str(processes), | |
479 | '--quiet': False, |
|
480 | '--quiet': False, | |
480 | '--remote': self.url, |
|
481 | '--remote': self.url, | |
481 | '--worker': False, |
|
482 | '--worker': False, | |
482 | #'--log': 'log.txt', |
|
483 | #'--log': 'log.txt', | |
483 | #'--all': False, |
|
484 | #'--all': False, | |
484 | #'--gzip': False, |
|
485 | #'--gzip': False, | |
485 | #'--output': None, |
|
486 | #'--output': None, | |
486 | #'--max-records': None, |
|
487 | #'--max-records': None, | |
487 | #'--output-json': False, |
|
488 | #'--output-json': False, | |
488 | #'--output-jsonl': False, |
|
489 | #'--output-jsonl': False, | |
489 | #'--create-only': False, |
|
490 | #'--create-only': False, | |
490 | #'--help': False, |
|
491 | #'--help': False, | |
491 | #'--input': None, |
|
492 | #'--input': None, | |
492 | #'--input-json': False, |
|
493 | #'--input-json': False, | |
493 | #'--start-record': '1', |
|
494 | #'--start-record': '1', | |
494 | #'--update-only': False, |
|
495 | #'--update-only': False, | |
495 | #'--upload-logo': False, |
|
496 | #'--upload-logo': False, | |
496 | #'--upload-resources': False, |
|
497 | #'--upload-resources': False, | |
497 | #'--version': False, |
|
498 | #'--version': False, | |
498 | 'ID_OR_NAME': id, |
|
499 | 'ID_OR_NAME': id, | |
499 | 'datasets': True, |
|
500 | 'datasets': True, | |
500 | 'dump': True, |
|
501 | 'dump': True, | |
501 | #'ACTION_NAME': None, |
|
502 | #'ACTION_NAME': None, | |
502 | #'KEY:JSON': [], |
|
503 | #'KEY:JSON': [], | |
503 | #'KEY=STRING': [], |
|
504 | #'KEY=STRING': [], | |
504 | #'KEY@FILE': [], |
|
505 | #'KEY@FILE': [], | |
505 | #'action': False, |
|
506 | #'action': False, | |
506 | #'delete': False, |
|
507 | #'delete': False, | |
507 | #'groups': False, |
|
508 | #'groups': False, | |
508 | #'load': False, |
|
509 | #'load': False, | |
509 | #'organizations': False, |
|
510 | #'organizations': False, | |
510 | #'related': False, |
|
511 | #'related': False, | |
511 | #'search': False, |
|
512 | #'search': False, | |
512 | #'users': False |
|
513 | #'users': False | |
513 | } |
|
514 | } | |
514 | return download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file |
|
515 | return download.dump_things_change(self.ckan, 'datasets', arguments, self.ua, **kwargs) No newline at end of file |
@@ -1,236 +1,236 | |||||
1 | #from ckanapi.datapackage import populate_schema_from_datastore |
|
1 | #from ckanapi.datapackage import populate_schema_from_datastore | |
2 | from ckanapi.cli import workers, dump |
|
2 | from ckanapi.cli import workers, dump | |
3 | from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe |
|
3 | from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe | |
4 | from datetime import datetime |
|
4 | from datetime import datetime | |
5 | from tqdm import tqdm |
|
5 | from tqdm import tqdm | |
6 | import sys |
|
6 | import sys | |
7 | import json |
|
7 | import json | |
8 | import os |
|
8 | import os | |
9 | import requests |
|
9 | import requests | |
10 | import six |
|
10 | import six | |
11 |
|
11 | |||
12 | if sys.version_info.major == 3: |
|
12 | if sys.version_info.major == 3: | |
13 | from urllib.parse import urlparse |
|
13 | from urllib.parse import urlparse | |
14 | else: |
|
14 | else: | |
15 | import urlparse |
|
15 | import urlparse | |
16 |
|
16 | |||
17 | DL_CHUNK_SIZE = 100 * 1024 |
|
17 | DL_CHUNK_SIZE = 100 * 1024 | |
18 |
|
18 | |||
19 | def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs): |
|
19 | def dump_things_change(ckan, thing, arguments, ua, worker_pool=None, stdout=None, stderr=None, **kwargs): | |
20 | if worker_pool is None: |
|
20 | if worker_pool is None: | |
21 | worker_pool = workers.worker_pool |
|
21 | worker_pool = workers.worker_pool | |
22 | if stdout is None: |
|
22 | if stdout is None: | |
23 | stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) |
|
23 | stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__) | |
24 | if stderr is None: |
|
24 | if stderr is None: | |
25 | stderr = getattr(sys.stderr, 'buffer', sys.stderr) |
|
25 | stderr = getattr(sys.stderr, 'buffer', sys.stderr) | |
26 |
|
26 | |||
27 | if arguments['--worker']: |
|
27 | if arguments['--worker']: | |
28 | return dump.dump_things_worker(ckan, thing, arguments) |
|
28 | return dump.dump_things_worker(ckan, thing, arguments) | |
29 | ''' |
|
29 | ''' | |
30 | log = None |
|
30 | log = None | |
31 | if arguments['--log']: |
|
31 | if arguments['--log']: | |
32 | log = open(arguments['--log'], 'a') |
|
32 | log = open(arguments['--log'], 'a') | |
33 | ''' |
|
33 | ''' | |
34 | jsonl_output = stdout |
|
34 | jsonl_output = stdout | |
35 | if arguments['--datapackages']: |
|
35 | if arguments['--datapackages']: | |
36 | jsonl_output = open(os.devnull, 'wb') |
|
36 | jsonl_output = open(os.devnull, 'wb') | |
37 |
|
37 | |||
38 | names = arguments['ID_OR_NAME'] |
|
38 | names = arguments['ID_OR_NAME'] | |
39 |
|
39 | |||
40 | if names and isinstance(names[0], dict): |
|
40 | if names and isinstance(names[0], dict): | |
41 | names = [rec.get('name',rec.get('id')) for rec in names] |
|
41 | names = [rec.get('name',rec.get('id')) for rec in names] | |
42 | ''' |
|
42 | ''' | |
43 | if arguments['--datapackages']: |
|
43 | if arguments['--datapackages']: | |
44 | arguments['--datastore-fields'] = True |
|
44 | arguments['--datastore-fields'] = True | |
45 | ''' |
|
45 | ''' | |
46 | #----------------------------# |
|
46 | #----------------------------# | |
47 | filtered_urls = {} |
|
47 | filtered_urls = {} | |
48 | for val in names: |
|
48 | for val in names: | |
49 | try: |
|
49 | try: | |
50 | filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs) |
|
50 | filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs) | |
51 | except: |
|
51 | except: | |
52 | _, exc_value, _ = sys.exc_info() |
|
52 | _, exc_value, _ = sys.exc_info() | |
53 | return exc_value |
|
53 | return exc_value | |
54 | #----------------------------# |
|
54 | #----------------------------# | |
55 |
|
55 | |||
56 | cmd = dump._worker_command_line(thing, arguments) |
|
56 | cmd = dump._worker_command_line(thing, arguments) | |
57 | processes = int(arguments['--processes']) |
|
57 | processes = int(arguments['--processes']) | |
58 | if hasattr(ckan, 'parallel_limit'): |
|
58 | if hasattr(ckan, 'parallel_limit'): | |
59 | processes = min(processes, ckan.parallel_limit) |
|
59 | processes = min(processes, ckan.parallel_limit) | |
60 | stats = completion_stats(processes) |
|
60 | stats = completion_stats(processes) | |
61 | pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names)) |
|
61 | pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names)) | |
62 |
|
62 | |||
63 | results = {} |
|
63 | results = {} | |
64 | expecting_number = 0 |
|
64 | expecting_number = 0 | |
65 | with quiet_int_pipe() as errors: |
|
65 | with quiet_int_pipe() as errors: | |
66 | for job_ids, finished, result in pool: |
|
66 | for job_ids, finished, result in pool: | |
67 | if not result: |
|
67 | if not result: | |
68 | return 1 |
|
68 | return 1 | |
69 | timestamp, error, record = json.loads(result.decode('utf-8')) |
|
69 | timestamp, error, record = json.loads(result.decode('utf-8')) | |
70 | results[finished] = record |
|
70 | results[finished] = record | |
71 |
|
71 | |||
72 | #----------------------------------------# |
|
72 | #----------------------------------------# | |
73 | datapackages_path = arguments['--datapackages'] |
|
73 | datapackages_path = arguments['--datapackages'] | |
74 | datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path) |
|
74 | datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path) | |
75 | #----------------------------------------# |
|
75 | #----------------------------------------# | |
76 | if not arguments['--quiet']: |
|
76 | if not arguments['--quiet']: | |
77 | stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format( |
|
77 | stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format( | |
78 | finished, |
|
78 | finished, | |
79 | job_ids, |
|
79 | job_ids, | |
80 | next(stats), |
|
80 | next(stats), | |
81 | error, |
|
81 | error, | |
82 | datapackage_dir, |
|
82 | datapackage_dir, | |
83 | record.get('name', '') if record else '', |
|
83 | record.get('name', '') if record else '', | |
84 | ).encode('utf-8')) |
|
84 | ).encode('utf-8')) | |
85 | ''' |
|
85 | ''' | |
86 | if log: |
|
86 | if log: | |
87 | log.write(compact_json([ |
|
87 | log.write(compact_json([ | |
88 | timestamp, |
|
88 | timestamp, | |
89 | finished, |
|
89 | finished, | |
90 | error, |
|
90 | error, | |
91 | record.get('name', '') if record else None, |
|
91 | record.get('name', '') if record else None, | |
92 | ]) + b'\n') |
|
92 | ]) + b'\n') | |
93 | ''' |
|
93 | ''' | |
94 | if datapackages_path: |
|
94 | if datapackages_path: | |
95 | try: |
|
95 | try: | |
96 | filter_url = filtered_urls[record.get('name', '')] |
|
96 | filter_url = filtered_urls[record.get('name', '')] | |
97 | except: |
|
97 | except: | |
98 | filter_url = filtered_urls[record.get('id', '')] |
|
98 | filter_url = filtered_urls[record.get('id', '')] | |
99 | create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure']) |
|
99 | create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'], ua) | |
100 |
|
100 | |||
101 | while expecting_number in results: |
|
101 | while expecting_number in results: | |
102 | record = results.pop(expecting_number) |
|
102 | record = results.pop(expecting_number) | |
103 | if record: |
|
103 | if record: | |
104 | jsonl_output.write(compact_json(record, sort_keys=True) + b'\n') |
|
104 | jsonl_output.write(compact_json(record, sort_keys=True) + b'\n') | |
105 | expecting_number += 1 |
|
105 | expecting_number += 1 | |
106 | if 'pipe' in errors: |
|
106 | if 'pipe' in errors: | |
107 | return 1 |
|
107 | return 1 | |
108 | if 'interrupt' in errors: |
|
108 | if 'interrupt' in errors: | |
109 | return 2 |
|
109 | return 2 | |
110 |
|
110 | |||
111 | def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure): |
|
111 | def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure, ua): | |
112 | resource_formats_to_ignore = ['API', 'api'] |
|
112 | resource_formats_to_ignore = ['API', 'api'] | |
113 |
|
113 | |||
114 | os.makedirs(os.path.join(datapackage_dir, 'data')) |
|
114 | os.makedirs(os.path.join(datapackage_dir, 'data')) | |
115 | record['path'] = datapackage_dir |
|
115 | record['path'] = datapackage_dir | |
116 |
|
116 | |||
117 | ckan_resources = [] |
|
117 | ckan_resources = [] | |
118 | for resource in tqdm(record.get('resources', []), unit_scale=True): |
|
118 | for resource in tqdm(record.get('resources', []), unit_scale=True): | |
119 | #for resource in record.get('resources', []): |
|
119 | #for resource in record.get('resources', []): | |
120 | if resource['format'] in resource_formats_to_ignore: |
|
120 | if resource['format'] in resource_formats_to_ignore: | |
121 | continue |
|
121 | continue | |
122 |
|
122 | |||
123 | if not {'name': resource['name'], 'url': resource['url']} in filtered_url: |
|
123 | if not {'name': resource['name'], 'url': resource['url']} in filtered_url: | |
124 | continue |
|
124 | continue | |
125 |
|
125 | |||
126 | if len(resource['url']) == 0: |
|
126 | if len(resource['url']) == 0: | |
127 | continue |
|
127 | continue | |
128 |
|
128 | |||
129 | filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource') |
|
129 | filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource') | |
130 | resource['path'] = os.path.join(datapackage_dir, 'data', filename) |
|
130 | resource['path'] = os.path.join(datapackage_dir, 'data', filename) | |
131 |
|
131 | |||
132 | cres = create_resource_change(resource, stderr, apikey, host_url, insecure) |
|
132 | cres = create_resource_change(resource, stderr, apikey, host_url, insecure, ua) | |
133 | if not cres: |
|
133 | if not cres: | |
134 | continue |
|
134 | continue | |
135 | ''' |
|
135 | ''' | |
136 | #----------------------------------------# |
|
136 | #----------------------------------------# | |
137 | dres = {'path': os.path.join('data', filename), |
|
137 | dres = {'path': os.path.join('data', filename), | |
138 | 'description': cres.get('description', ''), |
|
138 | 'description': cres.get('description', ''), | |
139 | 'format': cres.get('format', ''), |
|
139 | 'format': cres.get('format', ''), | |
140 | 'name': cres.get('name', ''), |
|
140 | 'name': cres.get('name', ''), | |
141 | 'title': cres.get('name', '').title()} |
|
141 | 'title': cres.get('name', '').title()} | |
142 | #----------------------------------------# |
|
142 | #----------------------------------------# | |
143 | populate_schema_from_datastore(cres, dres) |
|
143 | populate_schema_from_datastore(cres, dres) | |
144 | ''' |
|
144 | ''' | |
145 | ckan_resources.append(resource) |
|
145 | ckan_resources.append(resource) | |
146 |
|
146 | |||
147 | dataset = dict(record, resources=ckan_resources) |
|
147 | dataset = dict(record, resources=ckan_resources) | |
148 | datapackage = dataset_to_datapackage_change(dataset) |
|
148 | datapackage = dataset_to_datapackage_change(dataset) | |
149 |
|
149 | |||
150 | json_path = os.path.join(datapackage_dir, 'datapackage.json') |
|
150 | json_path = os.path.join(datapackage_dir, 'datapackage.json') | |
151 | with open(json_path, 'wb') as out: |
|
151 | with open(json_path, 'wb') as out: | |
152 | out.write(pretty_json(datapackage)) |
|
152 | out.write(pretty_json(datapackage)) | |
153 |
|
153 | |||
154 | return datapackage_dir, datapackage, json_path |
|
154 | return datapackage_dir, datapackage, json_path | |
155 |
|
155 | |||
156 | def create_resource_change(resource, stderr, apikey, host_url, insecure): |
|
156 | def create_resource_change(resource, stderr, apikey, host_url, insecure, ua): | |
157 | # ---------- REPLACE URL --------- # |
|
157 | # ---------- REPLACE URL --------- # | |
158 | if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe': |
|
158 | if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe': | |
159 | resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc, |
|
159 | resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc, | |
160 | urlparse(host_url).scheme + '://' + urlparse(host_url).netloc) |
|
160 | urlparse(host_url).scheme + '://' + urlparse(host_url).netloc) | |
161 | #----------------------------------# |
|
161 | #----------------------------------# | |
162 | try: |
|
162 | try: | |
163 | r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure) |
|
163 | r = requests.get(resource['url'], headers={'Authorization': apikey, 'User-Agent': ua}, stream=True, verify=not insecure) | |
164 | #---------------------------------------# |
|
164 | #---------------------------------------# | |
165 | try: |
|
165 | try: | |
166 | r.raise_for_status() |
|
166 | r.raise_for_status() | |
167 | except requests.exceptions.HTTPError as e: |
|
167 | except requests.exceptions.HTTPError as e: | |
168 | return False |
|
168 | return False | |
169 | #---------------------------------------# |
|
169 | #---------------------------------------# | |
170 | with open(resource['path'], 'wb') as f: |
|
170 | with open(resource['path'], 'wb') as f: | |
171 | for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE): |
|
171 | for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE): | |
172 | if chunk: |
|
172 | if chunk: | |
173 | f.write(chunk) |
|
173 | f.write(chunk) | |
174 |
|
174 | |||
175 | except requests.ConnectionError: |
|
175 | except requests.ConnectionError: | |
176 | stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8')) |
|
176 | stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8')) | |
177 | except requests.exceptions.RequestException as e: |
|
177 | except requests.exceptions.RequestException as e: | |
178 | stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) |
|
178 | stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) | |
179 | except Exception as e: |
|
179 | except Exception as e: | |
180 | stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) |
|
180 | stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8')) | |
181 | return resource |
|
181 | return resource | |
182 |
|
182 | |||
183 | def dataset_to_datapackage_change(dataset_dict): |
|
183 | def dataset_to_datapackage_change(dataset_dict): | |
184 | dp = {'name': dataset_dict['name'], |
|
184 | dp = {'name': dataset_dict['name'], | |
185 | 'id': dataset_dict['id'], |
|
185 | 'id': dataset_dict['id'], | |
186 | 'path': dataset_dict['path'], |
|
186 | 'path': dataset_dict['path'], | |
187 | 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")} |
|
187 | 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")} | |
188 |
|
188 | |||
189 | resources = dataset_dict.get('resources') |
|
189 | resources = dataset_dict.get('resources') | |
190 | if resources: |
|
190 | if resources: | |
191 | dp['resources'] = [convert_to_datapackage_resource_change(r) |
|
191 | dp['resources'] = [convert_to_datapackage_resource_change(r) | |
192 | for r in resources] |
|
192 | for r in resources] | |
193 | return dp |
|
193 | return dp | |
194 |
|
194 | |||
195 | def convert_to_datapackage_resource_change(resource_dict): |
|
195 | def convert_to_datapackage_resource_change(resource_dict): | |
196 | resource = {} |
|
196 | resource = {} | |
197 |
|
197 | |||
198 | if resource_dict.get('id'): |
|
198 | if resource_dict.get('id'): | |
199 | resource['id'] = resource_dict['id'] |
|
199 | resource['id'] = resource_dict['id'] | |
200 |
|
200 | |||
201 | if resource_dict.get('name'): |
|
201 | if resource_dict.get('name'): | |
202 | resource['name'] = resource_dict['name'] |
|
202 | resource['name'] = resource_dict['name'] | |
203 |
|
203 | |||
204 | if resource_dict.get('path'): |
|
204 | if resource_dict.get('path'): | |
205 | if os.path.isfile(resource_dict['path']): |
|
205 | if os.path.isfile(resource_dict['path']): | |
206 | resource['path'] = resource_dict['path'] |
|
206 | resource['path'] = resource_dict['path'] | |
207 | else: |
|
207 | else: | |
208 | resource['url'] = resource_dict['url'] |
|
208 | resource['url'] = resource_dict['url'] | |
209 |
|
209 | |||
210 | schema = resource_dict.get('schema') |
|
210 | schema = resource_dict.get('schema') | |
211 | if isinstance(schema, six.string_types): |
|
211 | if isinstance(schema, six.string_types): | |
212 | try: |
|
212 | try: | |
213 | resource['schema'] = json.loads(schema) |
|
213 | resource['schema'] = json.loads(schema) | |
214 | except ValueError: |
|
214 | except ValueError: | |
215 | resource['schema'] = schema |
|
215 | resource['schema'] = schema | |
216 | elif isinstance(schema, dict): |
|
216 | elif isinstance(schema, dict): | |
217 | resource['schema'] = schema |
|
217 | resource['schema'] = schema | |
218 | return resource |
|
218 | return resource | |
219 |
|
219 | |||
220 | def name_no_repetition(name, dir, option=''): |
|
220 | def name_no_repetition(name, dir, option=''): | |
221 | count = 0 |
|
221 | count = 0 | |
222 | while True: |
|
222 | while True: | |
223 | count = count + 1 |
|
223 | count = count + 1 | |
224 | if not os.path.exists(os.path.join(dir, name)): |
|
224 | if not os.path.exists(os.path.join(dir, name)): | |
225 | if option == 'resource': |
|
225 | if option == 'resource': | |
226 | return name |
|
226 | return name | |
227 | else: |
|
227 | else: | |
228 | return os.path.join(dir, name) |
|
228 | return os.path.join(dir, name) | |
229 |
|
229 | |||
230 | elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)): |
|
230 | elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)): | |
231 | if option == 'resource': |
|
231 | if option == 'resource': | |
232 | return '('+str(count)+')'+name |
|
232 | return '('+str(count)+')'+name | |
233 | else: |
|
233 | else: | |
234 | return os.path.join(dir, '('+str(count)+')'+name) |
|
234 | return os.path.join(dir, '('+str(count)+')'+name) | |
235 | else: |
|
235 | else: | |
236 | pass No newline at end of file |
|
236 | pass |
@@ -1,17 +1,17 | |||||
1 | # encoding: utf-8 |
|
1 | # encoding: utf-8 | |
2 | from setuptools import setup |
|
2 | from setuptools import setup | |
3 |
|
3 | |||
4 | setup( |
|
4 | setup( | |
5 | name = "jrodb", |
|
5 | name = "jrodb", | |
6 |
version = "2.9.2. |
|
6 | version = "2.9.2.1", | |
7 | description = "Data Repository - JRO", |
|
7 | description = "Data Repository - JRO", | |
8 | author = "Edson Ynilupu Mattos", |
|
8 | author = "Edson Ynilupu Mattos", | |
9 | author_email = "eynilupu@igp.gob.pe", |
|
9 | author_email = "eynilupu@igp.gob.pe", | |
10 | url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente", |
|
10 | url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente", | |
11 | packages = ["jrodb"], |
|
11 | packages = ["jrodb"], | |
12 | install_requires = [ |
|
12 | install_requires = [ | |
13 | "ckanapi==4.7", |
|
13 | "ckanapi==4.7", | |
14 | "requests", |
|
14 | "requests", | |
15 | "tqdm" |
|
15 | "tqdm" | |
16 | ], |
|
16 | ], | |
17 | ) No newline at end of file |
|
17 | ) |
General Comments 0
You need to be logged in to leave comments.
Login now