##// END OF EJS Templates
v2.9.2 :: Add 'create, patch and delete' resource
eynilupu -
r15:7e5d53ddbf91
parent child
Show More
@@ -0,0 +1,343
1 import json
2 import uuid
3 import os
4 import sys
5
6 def resource_create(self, package_id, file_type, others='', max_size=100, max_count=500, ignore_repetition=False, **kwargs):
7 #---------------------------------------------------------------------#
8 kwargs['voc_file_type'] = file_type
9 kwargs['others'] = others
10
11 if 'clear_upload' in kwargs:
12 del kwargs['clear_upload']
13 #---------------------------------------------------------------------#
14
15 url_upload = {}
16 if 'upload' in kwargs and 'url' in kwargs:
17 return 'ERROR:: Choose one: "upload" or "url" parameters'
18 elif 'upload' in kwargs:
19 url_upload['upload'] = kwargs['upload']
20 elif 'url' in kwargs:
21 url_upload['url'] = kwargs['url']
22 else:
23 return 'ERROR:: Missing value: "upload" or "url" parameters'
24
25 value_u = list(url_upload.values())[0]
26 key_u = list(url_upload.keys())[0]
27
28 if not isinstance(value_u, list) and not isinstance(value_u, str):
29 return 'ERROR:: "%s" must be <str> or <list>' % (key_u)
30 #---------------------------------------------------------------------#
31
32 if isinstance(value_u, str):
33 if len(value_u) != 0:
34 if key_u == 'upload':
35 if os.path.isdir(value_u):
36 path_order = [f for f in os.listdir(value_u) if os.path.isfile(os.path.join(value_u, f))]
37 path_order.sort()
38 if path_order:
39 kwargs['upload'] = []
40 for name in path_order:
41 kwargs['upload'].append(os.path.join(value_u, name))
42 else:
43 return "ERROR:: There aren't files in this directory"
44 else:
45 return 'ERROR:: Directory or File does not exist'
46 else:
47 return 'ERROR:: "path_list is empty"'
48 #---------------------------------------------------------------------#
49
50 if not isinstance(kwargs[key_u], list):
51 kwargs[key_u] = [kwargs[key_u]]
52
53 if kwargs.get('upload', None):
54 if len(kwargs['upload']) != len(set(kwargs['upload'])):
55 return 'Duplicate files found in "upload" parameter'
56 #---------------------------------------------------------------------#
57
58 change_kwargs = {}
59 for key1, value1 in kwargs.items():
60 if key1 == 'others':
61 if isinstance(value1, tuple):
62 if len(value1) != len(kwargs[key_u]):
63 return 'ERROR:: "%s" value(s) must be same length as "%s" value(s)' % (key1, key_u)
64 else:
65 change_kwargs[key1] = value1
66 else:
67 change_kwargs[key1] = (value1,) * len(kwargs[key_u])
68 else:
69 if isinstance(value1, list):
70 if len(value1) != len(kwargs[key_u]):
71 return 'ERROR:: "%s" value(s) must be same length as "%s" value(s)' % (key1, key_u)
72 else:
73 change_kwargs[key1] = value1
74 else:
75 change_kwargs[key1] = [value1] * len(kwargs[key_u])
76 #---------------------------------------------------------------------#
77
78 try:
79 dataset_show = getattr(self.ckan.action, 'package_show')(id=package_id)['resources']
80 resources_name = []
81 for u in dataset_show:
82 resources_name.append(u['name'].lower())
83 except:
84 _, exc_value, _ = sys.exc_info()
85 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
86 return exc_value
87 #---------------------------------------------------------------------#
88
89 for c1 in range(len(kwargs[key_u])):
90 new_kwargs = {}
91 for k2, v2 in change_kwargs.items():
92 #-------------------------------------------------------------#
93 if k2 == 'upload':
94 if not os.path.isfile(v2[c1]):
95 return 'File "%s" does not exist' % (v2[c1])
96
97 if not kwargs.get('size', None):
98 new_kwargs['size'] = os.stat(v2[c1]).st_size
99 #-------------------------------------------------------------#
100 new_kwargs[k2] = v2[c1]
101 #-----------------------------------------------------------------#
102
103 if not kwargs.get('name', None):
104 new_kwargs['name'] = os.path.basename(new_kwargs[key_u])
105
106 if new_kwargs['name'].lower() in resources_name:
107 if not ignore_repetition:
108 return 'ERROR:: "%s" resource already exist in this dataset' % (new_kwargs['name'])
109 print('WARRING:: "'+ new_kwargs['name'] +'" resource was ignored because already exist in this dataset')
110 else:
111 self.list.append(new_kwargs)
112 #---------------------------------------------------------------------#
113
114 try:
115 uuid.UUID(package_id, version=4)
116 self.dict['match'] = json.dumps({'id': package_id})
117 except ValueError:
118 self.dict['match'] = json.dumps({'name': package_id})
119 #---------------------------------------------------------------------#
120
121 if kwargs.get('upload', None):
122 blocks = [[]]
123 size_file = 0
124 count_file = 0
125 inter_num = 0
126 for value in self.list:
127 if value['size'] > 1024 * 1024 * float(max_size):
128 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
129 if not 1 <= int(max_count) <= 999:
130 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
131
132 size_file = size_file + value['size']
133 count_file = count_file + 1
134 if size_file > 1024 * 1024 * float(max_size) or count_file > int(max_count):
135 inter_num = inter_num + 1
136 size_file = value['size']
137 count_file = 1
138 blocks.append([])
139
140 del value['size']
141 blocks[inter_num].append(value)
142 #------------------------------------------------------------#
143
144 if len(blocks[0]) > 0:
145 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
146
147 for count1, block in enumerate(blocks):
148 upload_files = {}
149 for count2, value2 in enumerate(block):
150 upload_files['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], open(value2['upload'], 'rb'))
151 del value2['upload']
152
153 print('---- BLOCK NΒ°{} ----'.format(count1 + 1))
154 print('BLOCK NΒ°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
155
156 try:
157 result = self.ckan.call_action('package_revise', {'match': self.dict['match'], 'update__resources__extend': json.dumps(block)}, files=upload_files)
158 print('BLOCK NΒ°{} :: Uploaded file(s) successfully'.format(count1 + 1))
159 if len(blocks) == count1 + 1:
160 return result
161 except:
162 print('ERROR :: Use the "print" for more information')
163 _, exc_value, _ = sys.exc_info()
164 return exc_value
165 else:
166 return "ERROR:: No file(s) found to upload"
167 else:
168 if len(self.list) > 0:
169 return self.ckan.call_action('package_revise', {'match': self.dict['match'], 'update__resources__extend': json.dumps(self.list)})
170 else:
171 return "ERROR:: No resource(s) found to create"
172
173
174 def resource_patch(self, id, package_id, max_size=100, max_count=500, **kwargs):
175 #Cambiar el nombre al actualizar con un nuevo archivo
176
177 if 'file_type' in kwargs:
178 kwargs['voc_file_type'] = kwargs['file_type']
179 del kwargs['file_type']
180
181 if 'upload' in kwargs and 'url' in kwargs:
182 return 'ERROR:: Choose one: "upload" or "url" parameters'
183 #---------------------------------------------------------------------#
184
185 if not isinstance(id, list) and not isinstance(id, str):
186 return 'ERROR:: "id" must be <str> or <list>'
187
188 if isinstance(id, str):
189 id = [id]
190
191 change_kwargs = {}
192 for key1, value1 in kwargs.items():
193 if key1 == 'others':
194 if isinstance(value1, tuple):
195 if len(value1) != len(id):
196 return 'ERROR:: "%s" value(s) must be same length as "id" value(s)' % (key1)
197 else:
198 change_kwargs[key1] = value1
199 else:
200 change_kwargs[key1] = (value1,) * len(id)
201 else:
202 if isinstance(value1, list):
203 if len(value1) != len(id):
204 return 'ERROR:: "%s" value(s) must be same length as "id" value(s)' % (key1)
205 else:
206 change_kwargs[key1] = value1
207 else:
208 change_kwargs[key1] = [value1] * len(id)
209 #---------------------------------------------------------------------#
210
211 for c1, v1 in enumerate(id):
212 new_kwargs = {}
213 for k2, v2 in change_kwargs.items():
214 #-------------------------------------------------------------#
215 if k2 == 'upload':
216 if not os.path.isfile(v2[c1]):
217 return 'File "%s" does not exist' % (v2[c1])
218
219 new_kwargs['size'] = os.stat(v2[c1]).st_size
220
221 if k2 == 'url':
222 new_kwargs['clear_upload'] = True
223 new_kwargs['size'] = 0
224 new_kwargs['mimetype'] = None
225 #-------------------------------------------------------------#
226 new_kwargs[k2] = v2[c1]
227
228 if new_kwargs.get('upload', None):
229 #-------------------------------------------------------------#
230 if new_kwargs.get('clear_upload', None):
231 del new_kwargs['clear_upload']
232 #-------------------------------------------------------------#
233 self.dict['update__resources__'+v1[:6]] = new_kwargs
234 else:
235 self.dict['update__resources__'+v1[:6]] = json.dumps(new_kwargs)
236 #---------------------------------------------------------------------#
237
238 try:
239 uuid.UUID(package_id, version=4)
240 package_id = json.dumps({'id': package_id})
241 except ValueError:
242 package_id = json.dumps({'name': package_id})
243 #---------------------------------------------------------------------#
244
245 if kwargs.get('upload', None):
246 blocks = [{}]
247 upload_files = [{}]
248 size_file = 0
249 count_file = 0
250 inter_num = 0
251
252 for dict_key, dict_value in self.dict.items():
253 if dict_value['size'] > 1024 * 1024 * float(max_size):
254 return 'ERROR:: "%s" size out of limit' % (dict_value['upload'])
255
256 if not 1 <= int(max_count) <= 999:
257 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
258
259 size_file = size_file + dict_value['size']
260 count_file = count_file + 1
261 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
262 upload_files[inter_num][dict_key+'__upload'] = (os.path.basename(dict_value['upload']), open(dict_value['upload'], 'rb'))
263
264 else:
265 inter_num = inter_num + 1
266 size_file = dict_value['size']
267 count_file = 1
268 upload_files.append({dict_key+'__upload': (os.path.basename(dict_value['upload']), open(dict_value['upload'], 'rb'))})
269 blocks.append({})
270
271 del dict_value['upload']
272 del dict_value['size']
273 blocks[inter_num]['match'] = package_id
274 blocks[inter_num][dict_key] = json.dumps(dict_value)
275
276 #------------------------------------------------------------#
277 if len(blocks[0]) > 0:
278 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
279 for count1, block in enumerate(blocks):
280 print('---- BLOCK NΒ°{} ----'.format(count1 + 1))
281 print('BLOCK NΒ°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)-1))
282 try:
283 result = self.ckan.call_action('package_revise', block, files=upload_files[count1])
284
285 print('BLOCK NΒ°{} :: Uploaded file(s) successfully'.format(count1 + 1))
286 if len(blocks) == count1 + 1:
287 return result
288 except:
289 print('ERROR :: Use the "print" for more information')
290 _, exc_value, _ = sys.exc_info()
291 return exc_value
292 else:
293 return "ERROR:: No file(s) found to upload"
294 else:
295 self.dict['match'] = package_id
296 return self.ckan.call_action('package_revise', self.dict)
297
298
299 def resource_delete(self, select, id, **kwargs):
300
301 if not isinstance(id, list) and not isinstance(id, str):
302 return 'ERROR:: "id" must be <str> or <list>'
303
304 if isinstance(id, list):
305 if not 'package_id' in kwargs:
306 return "ERROR:: 'package_id' parameter is empty"
307 #---------------------------------------------------------------------#
308
309 if 'delete' == select:
310 if kwargs.get('package_id', None):
311 pkg_dict = getattr(self.ckan.action, 'package_show')(id=kwargs['package_id'])
312
313 if pkg_dict.get('resources'):
314 pkg_dict['resources'] = [res for res in pkg_dict['resources'] if not
315 res['id'] in id]
316 if pkg_dict['num_resources'] - len(pkg_dict['resources']) == len(id):
317 return getattr(self.ckan.action, 'package_update')(**pkg_dict)
318 else:
319 return "ERROR:: No changes have been applied"
320 else:
321 return getattr(self.ckan.action, 'resource_delete')(id=id)
322
323 elif 'purge' == select:
324 if kwargs.get('package_id', None):
325 pkg_dict = getattr(self.ckan.action, 'package_show')(id=kwargs['package_id'])
326 if pkg_dict.get('resources'):
327 pkg_dict['resources'] = [res for res in pkg_dict['resources'] if not
328 res['id'] in id]
329 if pkg_dict['num_resources'] - len(pkg_dict['resources']) == len(id):
330 print('[DELETING FILES]')
331 resource_patch(self=self, id=id, package_id=kwargs['package_id'], clear_upload=True)
332 print('[DELETING RESOURCES]')
333 return getattr(self.ckan.action, 'package_update')(**pkg_dict)
334 else:
335 return "ERROR:: No changes have been applied, please check 'id' parameter"
336
337 else:
338 print('[DELETING FILE]')
339 getattr(self.ckan.action, 'resource_patch')(id=id, clear_upload=True)
340 print('[DELETING RESOURCE]')
341 return getattr(self.ckan.action, 'resource_delete')(id=id)
342 else:
343 return 'ERROR:: "select = %s" is not accepted' % (select) No newline at end of file
1 NO CONTENT: modified file, binary diff hidden
@@ -1,1037 +1,1046
1 1 from ckanapi import RemoteCKAN
2 2 from datetime import datetime
3 3 from tqdm import tqdm
4 4 from CKAN_JRO import logic_download
5 from CKAN_JRO import resource
5 6 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
6 7 import sys
7 8 import platform
8 9 import os
9 10 import tempfile
10 11 import shutil
11 12 import zipfile
12 13 import concurrent.futures
13 14 import requests
14 15 import json
15 16 #import pathlib
16 17 import uuid
17 18
18 19 if sys.version_info.major == 3:
19 20 from urllib.parse import urlparse
20 21 else:
21 22 import urlparse
22 23
23 24 class JROAPI():
24 25 """
25 26 FINALIDAD:
26 27 Script para administrar y obtener la data del repositorio por medio de APIs.
27 28
28 29 REQUISITIOS PREVIOS:
29 30 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
30 31 - Paso 2: Instalar lo siguiente como admininstrador:
31 32 En Python 2
32 33 - pip install ckanapi==4.5
33 34 - pip install requests
34 35 - pip install futures
35 36 - pip install tqdm
36 37 En Python > 3
37 38 - pip3 install ckanapi==4.5
38 39 - pip3 install requests
39 40 - pip3 install tqdm
40 41
41 42 FUNCIONES DISPONIBLES:
42 43 - action
43 44 - upload_file
44 45 - upload_multiple_files
45 46 - upload_multiple_files_advance
46 47 - show
47 48 - search
48 49 - create
49 50 - patch
50 51 - delete
51 52 - download_files
52 53
53 54 EJEMPLOS:
54 55 #1:
55 56 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
56 57 ... some operation(s) ...
57 58 #2:
58 59 <access_name> = JROAPI('http://example.com', Authorization='#########')
59 60 ... some operation(s) ...
60 61 <access_name>.ckan.close()
61 62
62 63 REPORTAR ALGUN PROBLEMA:
63 64 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
64 65 1) Correo para contactarlo
65 66 2) Descripcion del problema
66 67 3) ΒΏEn que paso o seccion encontro el problema?
67 68 4) ΒΏCual era el resultado que usted esperaba?
68 69 """
69 70 def __init__(self, url, Authorization=None, secure=True):
70 71 #-------- Check Secure -------#
71 72 self.verify = secure
72 73 if not secure and isinstance(secure, bool):
73 74 session = requests.Session()
74 75 session.verify = False
75 76 else:
76 77 session = None
77 78 #------------------------------#
78 79 self.url = url
79 80 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
80 81 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
81 82 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
82 83 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
83 84 self.Authorization = Authorization
84 85 # Change for --> self.separator = os.sep
85 86 if platform.system() == 'Windows':
86 87 self.separator = '\\'
87 88 else:
88 89 self.separator = '/'
89 90
90 91 self.chunk_size = 1024
91 92 self.list = []
92 93 self.dict = {}
93 94 self.str = ''
94 95 self.check = 1
95 96 self.cont = 0
96 97
97 98 def __enter__(self):
98 99 return self
99 100
100 101 def __exit__(self, *args):
101 102 self.ckan.close()
102 103
103 104 def action(self, action, **kwargs):
104 105 """
105 106 FINALIDAD:
106 107 Funcion para llamar a las APIs disponibles
107 108
108 109 APIs DISPONIBLES:
109 110 CONSULTAR: "GUIA DE SCRIPT.pdf"
110 111
111 112 EJEMPLO:
112 113 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
113 114 """
114 115 #--------------- CASE: PACKAGE SEARCH ---------------#
115 116 if kwargs is not None:
116 117 if action == 'package_search':
117 118 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
118 119 for facet in self.list:
119 120 if facet in kwargs:
120 121 kwargs[facet.replace('_', '.')] = kwargs[facet]
121 122 kwargs.pop(facet)
122 123 #----------------------------------------------------#
123 124 try:
124 125 return getattr(self.ckan.action, action)(**kwargs)
125 126 except:
126 127 _, exc_value, _ = sys.exc_info()
127 128 return exc_value
128 129
129 130 def upload_file(self, dataset_id, file_date, file_type, file_path=False, url_or_path=False, ignore_repetition=False, **kwargs):
130 131 # Agregar si es interruptido por teclado
131 132 '''
132 133 FINALIDAD:
133 134 Funcion para crear un unico recurso (puede incluir un archivo asociado) al repositorio del ROJ.
134 135
135 136 PARAMETROS DISPONIBLES:
136 137 CONSULTAR: "GUIA DE SCRIPT.pdf"
137 138
138 139 ESTRUCTURA:
139 140 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_type = <class 'str'>, file_path = <class 'str'>, url_or_path = <class 'str'>, param_1 = <class 'param_1'>, ...)
140 141 '''
141 142 #self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
142 143 self.list = ['package_id', 'upload', 'voc_file_type'] #file_date
143 144 for key1, value1 in kwargs.items():
144 145 if not key1 in self.list:
145 146 self.dict[key1] = value1
146 147
147 148 #---------------------------#
148 149 if not 'others' in kwargs:
149 150 self.dict['others'] = ''
150 151 else:
151 152 if isinstance(kwargs['others'], list):
152 153 self.dict['others'] = json.dumps(kwargs['others'])
153 154 #---------------------------#
154 155
155 156 if isinstance(file_path, str) and isinstance(url_or_path, str):
156 157 return 'ERROR:: Choose one: "file_path" or "url_or_path" parameters'
157 158
158 159 if isinstance(file_path, str):
159 160 if not os.path.isfile(file_path):
160 161 return 'File "%s" not exist' % (file_path)
161 162
162 163 self.dict['upload'] = open(file_path, 'rb')
163 164 self.dict['name'] = os.path.basename(file_path)
164 165 elif isinstance(url_or_path, str):
165 166 self.dict['url'] = url_or_path
166 167 if not 'name' in self.dict:
167 168 self.dict['name'] = os.path.basename(url_or_path)
168 169 else:
169 170 return 'ERROR: Verify "file_path" or "url_or_path" parameters: <class "str"> or choose one'
170 171
171 172 #if not 'format' in self.dict:
172 173 # self.str = ''.join(pathlib.Path(file_path).suffixes)
173 174 # if len(self.str) > 0:
174 175 # self.dict['format'] = self.str.upper()[1:]
175 176
176 177 #-------------------------PACKAGE SHOW-----------------------#
177 178 try:
178 179 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
179 180 except:
180 181 _, exc_value, _ = sys.exc_info()
181 182 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
182 183 return exc_value
183 184
184 185 resources_name = []
185 186 for u in dataset_show:
186 187 resources_name.append(u['name'].lower())
187 188
188 189 if self.dict['name'].lower() in resources_name:
189 190 if not ignore_repetition:
190 191 return 'ERROR:: "%s" resource already exist in this dataset' % (self.dict['name'])
191 192 print('WARRING:: "'+ str(self.dict['name']) +'" resource already exist in this dataset')
192 193 #------------------------------------------------------------#
193 194 try:
194 195 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, voc_file_type=file_type, **self.dict)
195 196 except:
196 197 _, exc_value, _ = sys.exc_info()
197 198 return exc_value
198 199
199 200 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
200 201 # Agregar si es interruptido por teclado
201 202 '''
202 203 FINALIDAD:
203 204 Funcion para subir multiples archivos al repositorio del ROJ.
204 205
205 206 PARAMETROS DISPONIBLES:
206 207 CONSULTAR: "GUIA DE SCRIPT.pdf"
207 208
208 209 ESTRUCTURA:
209 210 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
210 211 '''
211 212 #-------------------------PACKAGE SHOW-----------------------#
212 213 try:
213 214 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
214 215 except:
215 216 _, exc_value, _ = sys.exc_info()
216 217 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
217 218 return exc_value
218 219 #------------------------------------------------------------#
219 220 resources_name = []
220 221 for u in dataset_show:
221 222 resources_name.append(u['name'].lower())
222 223 #------------------------------------------------------------#
223 224 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
224 225 for key1, value1 in kwargs.items():
225 226 if not key1 in self.list:
226 227 self.dict[key1] = value1
227 228 #------------------------------------------------------------#
228 229 if not 'others' in kwargs:
229 230 self.dict['others'] = ''
230 231 else:
231 232 if isinstance(kwargs['others'], list):
232 233 self.dict['others'] = json.dumps(kwargs['others'])
233 234 #------------------------------------------------------------#
234 235 total_list = []
235 236 #---------------CASO : "path" or "path_list"-----------------#
236 237 if type(path_files) is list:
237 238 if len(path_files) != 0:
238 239 path_files.sort()
239 240 for u in path_files:
240 241 if os.path.isfile(u):
241 242 if os.path.basename(u).lower() in resources_name:
242 243 if not ignore_repetition:
243 244 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
244 245 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
245 246 else:
246 247 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
247 248 else:
248 249 return 'File "%s" does not exist' % (u)
249 250 else:
250 251 return 'ERROR:: "path_list is empty"'
251 252
252 253 elif type(path_files) is str:
253 254 if os.path.isdir(path_files):
254 255 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
255 256 path_order.sort()
256 257 if path_order:
257 258 for name in path_order:
258 259 if name.lower() in resources_name:
259 260 if not ignore_repetition:
260 261 return 'ERROR:: "%s" file already exist in this dataset' % (name)
261 262 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
262 263 else:
263 264 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
264 265 else:
265 266 return "ERROR:: There aren't files in this directory"
266 267 else:
267 268 return 'ERROR:: Directory "%s" does not exist' % (path_files)
268 269 else:
269 270 return 'ERROR:: "path_files" must be a str or list'
270 271 #------------------------------------------------------------#
271 272 try:
272 273 uuid.UUID(str(dataset_id), version=4)
273 274 package_id_or_name = '"id": "' + str(dataset_id) + '"'
274 275 except ValueError:
275 276 package_id_or_name = '"name": "' + str(dataset_id) + '"'
276 277 #------------------------------------------------------------#
277 278 blocks = [[]]
278 279 size_file = 0
279 280 count_file = 0
280 281 inter_num = 0
281 282 for value in total_list:
282 283 if value['size'] > 1024 * 1024 * float(max_size):
283 284 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
284 285 if not 1 <= int(max_count) <= 999:
285 286 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
286 287
287 288 size_file = size_file + value['size']
288 289 count_file = count_file + 1
289 290 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
290 291 del value['size']
291 292 blocks[inter_num].append(value)
292 293 else:
293 294 inter_num = inter_num + 1
294 295 size_file = value['size']
295 296 count_file = 1
296 297 blocks.append([])
297 298 del value['size']
298 299 blocks[inter_num].append(value)
299 300 #------------------------------------------------------------#
300 301 if len(blocks[0]) > 0:
301 302 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
302 303 for count1, block in enumerate(blocks):
303 304 print('---- BLOCK NΒ°{} ----'.format(count1 + 1))
304 305 resource_extend = []
305 306 files_dict = {}
306 307 for count2, value2 in enumerate(block):
307 308 value2['file_date'] = file_date
308 309 value2['voc_file_type'] = file_type
309 310 value2.update(self.dict)
310 311
311 312 #if not 'format' in value2:
312 313 # format = ''.join(pathlib.Path(value2['name']).suffixes)
313 314 # if len(format) > 0:
314 315 # value2['format'] = format.upper()[1:]
315 316
316 317 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
317 318 del value2['upload']
318 319 resource_extend.append(value2)
319 320
320 321 print('BLOCK NΒ°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
321 try:
322 result = self.ckan.call_action(
323 'package_revise',
324 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
325 files=files_dict
326 )
327 print('BLOCK NΒ°{} :: Uploaded file(s) successfully'.format(count1 + 1))
328 if len(blocks) == count1 + 1:
329 return result
330 except:
331 print('ERROR :: Use the "print" for more information')
332 _, exc_value, _ = sys.exc_info()
333 return exc_value
322 print(resource_extend)
323 print(files_dict)
324 #try:
325 # result = self.ckan.call_action(
326 # 'package_revise',
327 # {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
328 # files=files_dict
329 # )
330 # print('BLOCK NΒ°{} :: Uploaded file(s) successfully'.format(count1 + 1))
331 # if len(blocks) == count1 + 1:
332 # return result
333 #except:
334 # print('ERROR :: Use the "print" for more information')
335 # _, exc_value, _ = sys.exc_info()
336 # return exc_value
334 337 else:
335 338 return "ERROR:: No file(s) found to upload"
336 339
337 340 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
338 341 # Agregar si es interruptido por teclado
339 342 '''
340 343 FINALIDAD:
341 344 Funcion para subir multiples archivos al repositorio del ROJ.
342 345
343 346 PARAMETROS DISPONIBLES:
344 347 CONSULTAR: "GUIA DE SCRIPT.pdf"
345 348
346 349 ESTRUCTURA:
347 350 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
348 351 '''
349 352 #-------------------------PACKAGE SHOW-----------------------#
350 353 try:
351 354 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
352 355 except:
353 356 _, exc_value, _ = sys.exc_info()
354 357 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
355 358 return exc_value
356 359 #------------------------------------------------------------#
357 360 resources_name = []
358 361 for u in dataset_show:
359 362 resources_name.append(u['name'].lower())
360 363 #------------------------------------------------------------#
361 364
362 365 params_dict = {'upload':[], 'name':[]}
363 366 #if not 'format' in kwargs:
364 367 # params_dict.update({'format':[]})
365 368 #---------------CASO : "path" or "path_list"-----------------#
366 369 if type(path_files) is list:
367 370 if len(path_files) != 0:
368 371 path_files.sort()
369 372 for u in path_files:
370 373 if os.path.isfile(u):
371 374 if os.path.basename(u).lower() in resources_name:
372 375 if not ignore_repetition:
373 376 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
374 377 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
375 378 else:
376 379 params_dict['upload'].append(open(u, 'rb'))
377 380 params_dict['name'].append(os.path.basename(u))
378 381 #if not 'format' in kwargs:
379 382 # format = ''.join(pathlib.Path(u).suffixes)
380 383 # if len(format) > 0:
381 384 # params_dict['format'].append(format.upper()[1:])
382 385 # else:
383 386 # params_dict['format'].append('')
384 387 else:
385 388 return 'File "%s" does not exist' % (u)
386 389 else:
387 390 return 'ERROR:: "path_list is empty"'
388 391 elif type(path_files) is str:
389 392 if os.path.isdir(path_files):
390 393 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
391 394 path_order.sort()
392 395 if path_order:
393 396 for name in path_order:
394 397 if name.lower() in resources_name:
395 398 if not ignore_repetition:
396 399 return 'ERROR:: "%s" file already exist in this dataset' % (name)
397 400 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
398 401 else:
399 402 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
400 403 params_dict['name'].append(name)
401 404 #if not 'format' in kwargs:
402 405 # format = ''.join(pathlib.Path(name).suffixes)
403 406 # if len(format) > 0:
404 407 # params_dict['format'].append(format.upper()[1:])
405 408 # else:
406 409 # params_dict['format'].append('')
407 410 else:
408 411 return "ERROR:: There aren't files in this directory"
409 412 else:
410 413 return 'ERROR:: Directory "%s" does not exist' % (path_files)
411 414 else:
412 415 return 'ERROR:: "path_files" must be a str or list'
413 416 #------------------------------------------------------------#
414 417 params_no_dict = {'package_id': dataset_id}
415 418 if type(date_files) is list:
416 419 params_dict['file_date'] = date_files
417 420 else:
418 421 params_no_dict['file_date'] = date_files
419 422
420 423 if type(type_files) is list:
421 424 params_dict['voc_file_type'] = type_files
422 425 else:
423 426 params_no_dict['voc_file_type'] = type_files
424 427
425 428 for key1, value1 in kwargs.items():
426 429 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
427 430 if type(value1) is list:
428 431 params_dict[key1] = value1
429 432 else:
430 433 params_no_dict[key1] = value1
431 434 #------------------------------------------#
432 435 if not 'others' in kwargs:
433 436 params_no_dict['others'] = ''
434 437 else:
435 438 if isinstance(kwargs['others'], tuple):
436 439 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
437 440 elif isinstance(kwargs['others'], list):
438 441 params_no_dict['others'] = json.dumps(kwargs['others'])
439 442 elif isinstance(kwargs['others'], str):
440 443 params_no_dict['others'] = kwargs['others']
441 444 else:
442 445 return 'ERROR:: "others" must be a tuple, list or str'
443 446 #------------------------------------------#
444 447 len_params_dict = []
445 448 for value2 in params_dict.values():
446 449 len_params_dict.append(len(value2))
447 450
448 451 if len(list(set(len_params_dict))) > 1:
449 452 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
450 453 #------------------------------------------------------------#
451 454 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
452 455 for v in range(len(params_dict['name'])):
453 456 try:
454 457 send = {}
455 458 for key_dict, value_dict in params_dict.items():
456 459 send[key_dict] = value_dict[v]
457 460 for key_no_dict, value_no_dict in params_no_dict.items():
458 461 send[key_no_dict] = value_no_dict
459 462
460 463 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
461 464 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
462 465 except:
463 466 _, exc_value, _ = sys.exc_info()
464 467 self.list.append(exc_value)
465 468 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
466 469 return self.list
467 470 #------------------------------------------------------------#
468 471
469 472 def show(self, type_option, id, **kwargs):
470 473 '''
471 474 FINALIDAD:
472 475 Funcion personalizada para una busqueda en especifico.
473 476
474 477 PARAMETROS DISPONIBLES:
475 478 CONSULTAR: "GUIA DE SCRIPT.pdf"
476 479
477 480 ESTRUCTURA:
478 481 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
479 482 '''
480 483 if type(type_option) is str:
481 484 try:
482 485 if type_option == 'dataset':
483 486 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
484 487 elif type_option == 'resource':
485 488 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
486 489 elif type_option == 'project':
487 490 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
488 491 elif type_option == 'collaborator':
489 492 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
490 493 elif type_option == 'member':
491 494 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
492 495 elif type_option == 'vocabulary':
493 496 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
494 497 elif type_option == 'tag':
495 498 if not 'vocabulary_id' in kwargs:
496 499 print('Missing "vocabulary_id" value: assume it is a free tag')
497 500 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
498 501 elif type_option == 'user':
499 502 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
500 503 elif type_option == 'job':
501 504 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
502 505 else:
503 506 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
504 507 except:
505 508 _, exc_value, _ = sys.exc_info()
506 509 return exc_value
507 510 else:
508 511 return 'ERROR:: "type_option" must be a str'
509 512
510 513 def search(self, type_option, query=None, **kwargs):
511 514 '''
512 515 FINALIDAD:
513 516 Funcion personalizada para busquedas que satisfagan algun criterio.
514 517
515 518 PARAMETROS DISPONIBLES:
516 519 CONSULTAR: "GUIA DE SCRIPT.pdf"
517 520
518 521 ESTRUCTURA:
519 522 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
520 523 '''
521 524 if type(type_option) is str:
522 525 try:
523 526 if type_option == 'dataset':
524 527 key_replace = ['fq', 'fq_list', 'include_private']
525 528 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
526 529 for key1, value1 in kwargs.items():
527 530 if not key1 in key_replace:
528 531 if key1 in key_point:
529 532 self.dict[key1.replace('_', '.')] = value1
530 533 else:
531 534 self.dict[key1] = value1
532 535
533 536 if query is not None:
534 537 if type(query) is dict:
535 538 self.dict['fq_list'] = []
536 539 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
537 540 #----------------------------------------------------#
538 541 if 'dataset_start_date' in query:
539 542 if type(query['dataset_start_date']) is str:
540 543 try:
541 544 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
542 545 if len(query['dataset_start_date']) != 10:
543 546 return '"dataset_start_date", must be: <YYYY-MM-DD>'
544 547 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
545 548 self.list.append('dataset_start_date')
546 549 except:
547 550 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
548 551 else:
549 552 return '"dataset_start_date" must be <str>'
550 553 #----------------------------------------------------#
551 554 if 'dataset_end_date' in query:
552 555 if type(query['dataset_end_date']) is str:
553 556 try:
554 557 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
555 558 if len(query['dataset_end_date']) != 10:
556 559 return '"dataset_end_date", must be: <YYYY-MM-DD>'
557 560
558 561 if 'dataset_start_date' in query:
559 562 if query['dataset_start_date'] > query['dataset_end_date']:
560 563 return '"dataset_end_date" must be greater than "dataset_start_date"'
561 564
562 565 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
563 566 self.list.append('dataset_end_date')
564 567 except:
565 568 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
566 569 else:
567 570 return '"dataset_end_date" must be <str>'
568 571 #----------------------------------------------------#
569 572 for key, value in query.items():
570 573 if value is not None and not key in self.list:
571 574 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
572 575 else:
573 576 return '"query" must be <dict>'
574 577
575 578 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
576 579
577 580 elif type_option == 'resource':
578 581 for key1, value1 in kwargs.items():
579 582 if key1 != 'fields':
580 583 self.dict[key1] = value1
581 584
582 585 if query is not None:
583 586 if type(query) is dict:
584 587 #----------------------------------------------------#
585 588 if 'file_date_min' in query:
586 589 if type(query['file_date_min']) is str:
587 590 try:
588 591 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
589 592 if len(query['file_date_min']) != 10:
590 593 return '"file_date_min", must be: <YYYY-MM-DD>'
591 594 except:
592 595 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
593 596 else:
594 597 return '"file_date_min" must be <str>'
595 598 #----------------------------------------------------#
596 599 if 'file_date_max' in query:
597 600 if type(query['file_date_max']) is str:
598 601 try:
599 602 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
600 603 if len(query['file_date_max']) != 10:
601 604 return '"file_date_max", must be: <YYYY-MM-DD>'
602 605
603 606 if 'file_date_min' in query:
604 607 if query['file_date_min'] > query['file_date_max']:
605 608 return '"file_date_max" must be greater than "file_date_min"'
606 609 except:
607 610 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
608 611 else:
609 612 return '"file_date_max" must be <str>'
610 613 #----------------------------------------------------#
611 614 self.dict['query'] = query
612 615 else:
613 616 return '"query" must be <dict>'
614 617 return getattr(self.ckan.action, 'resources_search')(**self.dict)
615 618
616 619 elif type_option == 'tag':
617 620 for key1, value1 in kwargs.items():
618 621 if key1 != 'fields':
619 622 self.dict[key1] = value1
620 623
621 624 if not 'vocabulary_id' in kwargs:
622 625 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
623 626 else:
624 627 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
625 628
626 629 if query is not None:
627 630 if type(query) is dict:
628 631 if 'search' in query:
629 632 if type(query['search']) is list or type(query['search']) is str:
630 633 self.dict['query'] = query['search']
631 634 else:
632 635 return '"search" must be <list> or <str>'
633 636 else:
634 637 return '"query" must be <dict>'
635 638 return getattr(self.ckan.action, 'tag_search')(**self.dict)
636 639
637 640 else:
638 641 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
639 642
640 643 except:
641 644 _, exc_value, _ = sys.exc_info()
642 645 return exc_value
643 646 else:
644 647 return 'ERROR:: "type_option" must be <str>'
645 648
646 649 def create(self, type_option, select=None, **kwargs):
647 650 '''
648 651 FINALIDAD:
649 652 Funcion personalizada para crear.
650 653
651 654 PARAMETROS DISPONIBLES:
652 655 CONSULTAR: "GUIA DE SCRIPT.pdf"
653 656
654 657 ESTRUCTURA:
655 658 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
656 659 '''
657 660 if type(type_option) is str:
658 661 try:
659 662 if type_option == 'dataset':
660 663 return getattr(self.ckan.action, 'package_create')(**kwargs)
664 if type_option == 'resource':
665 return resource.resource_create(self, **kwargs)
661 666 elif type_option == 'project':
662 667 return getattr(self.ckan.action, 'organization_create')(**kwargs)
663 668 elif type_option == 'member':
664 669 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
665 670 elif type_option == 'collaborator':
666 671 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
667 672 elif type_option == 'vocabulary':
668 673 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
669 674 elif type_option == 'tag':
670 675 return getattr(self.ckan.action, 'tag_create')(**kwargs)
671 676 elif type_option == 'user':
672 677 return getattr(self.ckan.action, 'user_create')(**kwargs)
673 678 elif type_option == 'views':
674 679 if 'resource' == select:
675 680 self.list = ['package']
676 681 for key1, value1 in kwargs.items():
677 682 if not key1 in self.list:
678 683 self.dict[key1] = value1
679 684 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
680 685 elif 'dataset' == select:
681 686 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
682 687 else:
683 688 return 'ERROR:: "select = %s" is not accepted' % (select)
684 689 else:
685 690 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
686 691 except:
687 692 _, exc_value, _ = sys.exc_info()
688 693 return exc_value
689 694 else:
690 695 return 'ERROR:: "type_option" must be <str>'
691 696
692 697 def patch(self, type_option, **kwargs):
693 698 '''
694 699 FINALIDAD:
695 700 Funciones personalizadas para actualizar
696 701
697 702 PARAMETROS DISPONIBLES:
698 703 CONSULTAR: "GUIA DE SCRIPT.pdf"
699 704
700 705 ESTRUCTURA:
701 706 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
702 707 '''
703 708 if type(type_option) is str:
704 709 try:
705 710 if type_option == 'dataset':
711 #Agregar que solo se debe modificar parΓ‘metros del Dataset y que no incluya Resources
706 712 return getattr(self.ckan.action, 'package_patch')(**kwargs)
707 713 elif type_option == 'project':
708 714 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
709 715 elif type_option == 'resource':
710 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
716 return resource.resource_patch(self, **kwargs)
711 717 elif type_option == 'member':
712 718 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
713 719 elif type_option == 'collaborator':
714 720 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
715 721 else:
716 722 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
717 723 except:
718 724 _, exc_value, _ = sys.exc_info()
719 725 return exc_value
720 726 else:
721 727 return 'ERROR:: "type_option" must be <str>'
722 728
723 729 def delete(self, type_option, select=None, **kwargs):
724 730 '''
725 731 FINALIDAD:
726 732 FunciΓ³n personalizada para eliminar y/o purgar.
727 733
728 734 PARAMETROS DISPONIBLES:
729 735 CONSULTAR: "GUIA DE SCRIPT.pdf"
730 736
731 737 ESTRUCTURA:
732 738 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
733 739 '''
734 740 if type(type_option) is str:
735 741 try:
736 742 if type_option == 'dataset':
737 743 if select is None:
738 744 return 'ERROR:: "select" must not be "None"'
739 745 else:
740 746 if 'delete' == select:
741 747 return getattr(self.ckan.action, 'package_delete')(**kwargs)
742 748 elif 'purge' == select:
743 749 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
744 750 else:
745 751 return 'ERROR:: "select = %s" is not accepted' % (select)
746 752 elif type_option == 'project':
747 753 if select is None:
748 754 return 'ERROR:: "select" must not be "None"'
749 755 else:
750 756 if 'delete' == select:
751 757 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
752 758 elif 'purge' == select:
753 759 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
754 760 else:
755 761 return 'ERROR:: "select = %s" is not accepted' % (select)
756 762 elif type_option == 'resource':
757 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
763 if select is None:
764 return 'ERROR:: "select" must not be "None"'
765 else:
766 return resource.resource_delete(self, select, **kwargs)
758 767 elif type_option == 'vocabulary':
759 768 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
760 769 elif type_option == 'tag':
761 770 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
762 771 elif type_option == 'user':
763 772 return getattr(self.ckan.action, 'user_delete')(**kwargs)
764 773 else:
765 774 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
766 775 except:
767 776 _, exc_value, _ = sys.exc_info()
768 777 return exc_value
769 778 else:
770 779 return 'ERROR:: "type_option" must be <str>'
771 780
772 781 def f_status_note(self, total, result, path):
773 782 file_txt = open(path+'status_note.txt', 'w')
774 783 file_txt = open(path+'status_note.txt', 'a')
775 784
776 785 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
777 786 file_txt.write(''+ os.linesep)
778 787 for u in result['name']:
779 788 file_txt.write(' - '+ u + os.linesep)
780 789 file_txt.write(''+ os.linesep)
781 790
782 791 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
783 792 file_txt.write(''+ os.linesep)
784 793 if len(total['name'])-len(result['name']) != 0:
785 794 for u in total['name']:
786 795 if not u in result['name']:
787 796 file_txt.write(' - '+ u + os.linesep)
788 797 else:
789 798 file_txt.write(' "None"'+ os.linesep)
790 799
791 800 def f_name(self, name_dataset, ext, tempdir):
792 801 while self.check:
793 802 self.str = ''
794 803 if self.cont == 0:
795 804 if os.path.exists(tempdir + name_dataset + ext):
796 805 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
797 806 else:
798 807 self.check = self.check * 0
799 808 self.str = name_dataset + ext
800 809 else:
801 810 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
802 811 self.check = self.check * 0
803 812 self.str = name_dataset+'('+str(self.cont)+')'+ ext
804 813 self.cont = self.cont+1
805 814 return self.str
806 815
807 816 def f_zipdir(self, path, ziph, zip_name):
808 817 for root, _, files in os.walk(path):
809 818 print('.....')
810 819 print('Creating: "{}" >>'.format(zip_name))
811 820 for __file in tqdm(iterable=files, total=len(files)):
812 821 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
813 822 ziph.write(os.path.join(root, __file), new_dir)
814 823 print('Created >>')
815 824
816 825 def download_by_step(self, response, tempdir_name):
817 826 try:
818 827 # ---------- REPLACE URL --------- #
819 828 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
820 829 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
821 830 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
822 831 #----------------------------------#
823 832 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
824 833 if resp.status_code == 200:
825 834 with open(tempdir_name+response['name'], 'wb') as file:
826 835 for chunk in resp.iter_content(chunk_size = self.chunk_size):
827 836 if chunk:
828 837 file.write(chunk)
829 838 except requests.exceptions.RequestException:
830 839 pass
831 840
832 841 def download_files(self, **kwargs):
833 842 '''
834 843 FINALIDAD:
835 844 Funcion personalizada para la descarga de archivos existentes de un dataset.
836 845
837 846 PARAMETROS DISPONIBLES:
838 847 CONSULTAR: "GUIA DE SCRIPT.pdf"
839 848
840 849 ESTRUCTURA:
841 850 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
842 851 '''
843 852 dict_local = {}
844 853 #----------------------------------------------#
845 854 if 'zip' in kwargs:
846 855 if type(kwargs['zip']) is not bool:
847 856 return 'ERROR:: "zip" must be: <class "bool">'
848 857 else:
849 858 dict_local['zip'] = kwargs['zip']
850 859 else:
851 860 dict_local['zip'] = False
852 861 #----------------------------------------------#
853 862 if 'status_note' in kwargs:
854 863 if type(kwargs['status_note']) is not bool:
855 864 return 'ERROR:: "status_note" must be: <class "bool">'
856 865 else:
857 866 dict_local['status_note'] = kwargs['status_note']
858 867 else:
859 868 dict_local['status_note'] = False
860 869 #----------------------------------------------#
861 870 if 'path' in kwargs:
862 871 if type(kwargs['path']) is str:
863 872 if os.path.isdir(kwargs['path']) == False:
864 873 return 'ERROR:: "path" does not exist'
865 874 else:
866 875 if kwargs['path'][-1:] != self.separator:
867 876 dict_local['path'] = kwargs['path']+self.separator
868 877 else:
869 878 dict_local['path'] = kwargs['path']
870 879
871 880 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
872 881 if int(platform.python_version()[0]) == 3:
873 882 try:
874 883 file_txt = open(txt, 'w')
875 884 file_txt.close()
876 885 os.remove(txt)
877 886 except PermissionError:
878 887 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
879 888 else:
880 889 try:
881 890 file_txt = open(txt, 'w')
882 891 file_txt.close()
883 892 os.remove(txt)
884 893 except:
885 894 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
886 895 else:
887 896 return 'ERROR:: "path" must be: <class "str">'
888 897 else:
889 898 dict_local['path'] = ''
890 899 #----------------------------------------------#
891 900 for key, value in kwargs.items():
892 901 if not key in dict_local:
893 902 self.dict[key] = value
894 903 try:
895 904 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
896 905 except:
897 906 _, exc_value, _ = sys.exc_info()
898 907 return exc_value
899 908
900 909 if len(response) != 0:
901 910 #--------------TEMP PATH---------------#
902 911 if dict_local['zip']:
903 912 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
904 913 os.mkdir(tempdir+kwargs['id'])
905 914 dir_name = tempdir + kwargs['id'] + self.separator
906 915 else:
907 916 dir = self.f_name(kwargs['id'], '', dict_local['path'])
908 917 os.mkdir(dict_local['path'] + dir)
909 918 dir_name = dict_local['path'] + dir + self.separator
910 919 #-----------DOWNLOAD FILES-------------#
911 920 print('.....')
912 921 print('Downloading "{}" file(s) >>'.format(len(response)))
913 922 name_total = {'name': []}
914 923 with concurrent.futures.ThreadPoolExecutor() as executor:
915 924 for u in tqdm(iterable=response, total=len(response)):
916 925 name_total['name'].append(u['name'])
917 926 executor.submit(self.download_by_step, u, dir_name)
918 927 name_check = {}
919 928 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
920 929 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
921 930 #--------------------------------------#
922 931 if len(name_check['name']) != 0:
923 932 #----------Status Note---------#
924 933 if dict_local['status_note']:
925 934 print('.....')
926 935 print('Creating: "status_note.txt" >>')
927 936 self.f_status_note(name_total, name_check, dir_name)
928 937 print('Created>>')
929 938 #----------ZIP CREATE----------#
930 939 if dict_local['zip']:
931 940 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
932 941 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
933 942 self.f_zipdir(dir_name, ziph, zip_name)
934 943 ziph.close()
935 944 #Delete Temporal Path
936 945 if os.path.exists(tempdir[:-1]):
937 946 shutil.rmtree(tempdir[:-1])
938 947 #------------------------------#
939 948 print('.....')
940 949 return 'DOWNLOAD FINISHED'
941 950 else:
942 951 #Delete Temporal Path
943 952 if dict_local['zip']:
944 953 if os.path.exists(tempdir[:-1]):
945 954 shutil.rmtree(tempdir[:-1])
946 955 else:
947 956 if os.path.exists(dir_name[:-1]):
948 957 shutil.rmtree(dir_name[:-1])
949 958 return 'NO FILES WERE DOWNLOADED'
950 959 else:
951 960 return 'FILES NOT FOUND'
952 961
953 962 def download_files_advance(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
954 963 '''
955 964 FINALIDAD:
956 965 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
957 966
958 967 PARAMETROS DISPONIBLES:
959 968 CONSULTAR: "GUIA DE SCRIPT.pdf"
960 969
961 970 ESTRUCTURA:
962 971 <access_name>.download_files_advance(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
963 972 '''
964 973 #------------------ PATH ----------------------#
965 974 if isinstance(path, str):
966 975 if os.path.isdir(path):
967 976 if not path.endswith(os.sep):
968 977 path = path + os.sep
969 978 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
970 979 try:
971 980 file_txt = open(test_txt, 'w')
972 981 file_txt.close()
973 982 os.remove(test_txt)
974 983 except:
975 984 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
976 985 else:
977 986 return 'ERROR:: "path" does not exist'
978 987 else:
979 988 return 'ERROR:: "path" must be: <class "str">'
980 989
981 990 #------------------ PROCESSES -----------------#
982 991 if not isinstance(processes, int):
983 992 return 'ERROR:: "processes" must be: <class "int">'
984 993
985 994 #------------------ ID OR NAME ----------------#
986 995 if isinstance(id_or_name, str):
987 996 id_or_name = [id_or_name]
988 997 elif isinstance(id_or_name, list):
989 998 id_or_name = list(map(str, id_or_name))
990 999 else:
991 1000 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
992 1001 #----------------------------------------------#
993 1002 arguments = {
994 1003 '--apikey': self.Authorization,
995 1004 '--ckan-user': None,
996 1005 '--config': None,
997 1006 '--datapackages': path,
998 1007 '--datastore-fields': False,
999 1008 '--get-request': False,
1000 1009 '--insecure': not self.verify,
1001 1010 '--log': '/home/soporte/DUMP/download.txt',
1002 1011 '--processes': str(processes),
1003 1012 '--quiet': False,
1004 1013 '--remote': self.url,
1005 1014 '--worker': False,
1006 1015 #'--all': False,
1007 1016 #'--gzip': False,
1008 1017 #'--output': None,
1009 1018 #'--max-records': None,
1010 1019 #'--output-json': False,
1011 1020 #'--output-jsonl': False,
1012 1021 #'--create-only': False,
1013 1022 #'--help': False,
1014 1023 #'--input': None,
1015 1024 #'--input-json': False,
1016 1025 #'--start-record': '1',
1017 1026 #'--update-only': False,
1018 1027 #'--upload-logo': False,
1019 1028 #'--upload-resources': False,
1020 1029 #'--version': False,
1021 1030 'ID_OR_NAME': id_or_name,
1022 1031 'datasets': True,
1023 1032 'dump': True,
1024 1033 #'ACTION_NAME': None,
1025 1034 #'KEY:JSON': [],
1026 1035 #'KEY=STRING': [],
1027 1036 #'KEY@FILE': [],
1028 1037 #'action': False,
1029 1038 #'delete': False,
1030 1039 #'groups': False,
1031 1040 #'load': False,
1032 1041 #'organizations': False,
1033 1042 #'related': False,
1034 1043 #'search': False,
1035 1044 #'users': False
1036 1045 }
1037 1046 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
General Comments 0
You need to be logged in to leave comments. Login now