##// END OF EJS Templates
v2.9.2 :: Add 'download_files_advance' API function
eynilupu -
r12:76348ccca4d9
parent child
Show More
@@ -0,0 +1,228
1 #from ckanapi.datapackage import populate_schema_from_datastore
2 from ckanapi.cli import workers, dump
3 from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe
4 from datetime import datetime
5 import sys
6 import json
7 import os
8 import requests
9 import six
10
11 if sys.version_info.major == 3:
12 from urllib.parse import urlparse
13 else:
14 import urlparse
15
16 DL_CHUNK_SIZE = 100 * 1024
17
18 print()
19
20 def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs):
21 if worker_pool is None:
22 worker_pool = workers.worker_pool
23 if stdout is None:
24 stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__)
25 if stderr is None:
26 stderr = getattr(sys.stderr, 'buffer', sys.stderr)
27
28 if arguments['--worker']:
29 return dump.dump_things_worker(ckan, thing, arguments)
30 '''
31 log = None
32 if arguments['--log']:
33 log = open(arguments['--log'], 'a')
34 '''
35 jsonl_output = stdout
36 if arguments['--datapackages']:
37 jsonl_output = open(os.devnull, 'wb')
38
39 names = arguments['ID_OR_NAME']
40
41 if names and isinstance(names[0], dict):
42 names = [rec.get('name',rec.get('id')) for rec in names]
43 '''
44 if arguments['--datapackages']:
45 arguments['--datastore-fields'] = True
46 '''
47 #----------------------------#
48 filtered_urls = {}
49 for name in names:
50 try:
51 response = getattr(ckan.action, 'url_resources')(id=name, **kwargs)
52 except:
53 _, exc_value, _ = sys.exc_info()
54 return exc_value
55 filtered_urls[name] = response
56 #----------------------------#
57
58 cmd = dump._worker_command_line(thing, arguments)
59 processes = int(arguments['--processes'])
60 if hasattr(ckan, 'parallel_limit'):
61 processes = min(processes, ckan.parallel_limit)
62 stats = completion_stats(processes)
63 pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names))
64
65 results = {}
66 expecting_number = 0
67 with quiet_int_pipe() as errors:
68 for job_ids, finished, result in pool:
69 if not result:
70 return 1
71 timestamp, error, record = json.loads(result.decode('utf-8'))
72 results[finished] = record
73
74 if not arguments['--quiet']:
75 stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Dataset Name: {4}\n'.format(
76 finished,
77 job_ids,
78 next(stats),
79 error,
80 record.get('name', '') if record else '',
81 ).encode('utf-8'))
82 '''
83 if log:
84 log.write(compact_json([
85 timestamp,
86 finished,
87 error,
88 record.get('name', '') if record else None,
89 ]) + b'\n')
90 '''
91 datapackages_path = arguments['--datapackages']
92 if datapackages_path:
93 create_datapackage_change(record, filtered_urls[record.get('name', '')], datapackages_path, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'])
94 while expecting_number in results:
95 record = results.pop(expecting_number)
96 if record:
97 jsonl_output.write(compact_json(record, sort_keys=True) + b'\n')
98 expecting_number += 1
99 if 'pipe' in errors:
100 return 1
101 if 'interrupt' in errors:
102 return 2
103
104 def create_datapackage_change(record, filtered_url, base_path, stderr, apikey, host_url, insecure):
105 resource_formats_to_ignore = ['API', 'api']
106 #----------------------------------------#
107 datapackage_dir = name_no_repetition(record.get('name', ''), base_path)
108 #----------------------------------------#
109 os.makedirs(os.path.join(datapackage_dir, 'data'))
110 record['path'] = datapackage_dir
111
112 ckan_resources = []
113 for resource in record.get('resources', []):
114 if resource['format'] in resource_formats_to_ignore:
115 continue
116
117 if not {'name': resource['name'], 'url': resource['url']} in filtered_url:
118 continue
119
120 if len(resource['url']) == 0:
121 continue
122
123 filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource')
124 resource['path'] = os.path.join(datapackage_dir, 'data', filename)
125
126 cres = create_resource_change(resource, stderr, apikey, host_url, insecure)
127 if not cres:
128 continue
129 '''
130 #----------------------------------------#
131 dres = {'path': os.path.join('data', filename),
132 'description': cres.get('description', ''),
133 'format': cres.get('format', ''),
134 'name': cres.get('name', ''),
135 'title': cres.get('name', '').title()}
136 #----------------------------------------#
137 populate_schema_from_datastore(cres, dres)
138 '''
139 ckan_resources.append(resource)
140
141 dataset = dict(record, resources=ckan_resources)
142 datapackage = dataset_to_datapackage_change(dataset)
143
144 json_path = os.path.join(datapackage_dir, 'datapackage.json')
145 with open(json_path, 'wb') as out:
146 out.write(pretty_json(datapackage))
147
148 return datapackage_dir, datapackage, json_path
149
150 def create_resource_change(resource, stderr, apikey, host_url, insecure):
151 # ---------- REPLACE URL --------- #
152 if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe':
153 resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc,
154 urlparse(host_url).scheme + '://' + urlparse(host_url).netloc)
155 #----------------------------------#
156 try:
157 r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure)
158 #---------------------------------------#
159 try:
160 r.raise_for_status()
161 except requests.exceptions.HTTPError as e:
162 return False
163 #---------------------------------------#
164 with open(resource['path'], 'wb') as f:
165 for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE):
166 if chunk:
167 f.write(chunk)
168
169 except requests.ConnectionError:
170 stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8'))
171 except requests.exceptions.RequestException as e:
172 stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
173 except Exception as e:
174 stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
175 return resource
176
177 def dataset_to_datapackage_change(dataset_dict):
178 dp = {'name': dataset_dict['name'],
179 'id': dataset_dict['id'],
180 'path': dataset_dict['path'],
181 'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")}
182
183 resources = dataset_dict.get('resources')
184 if resources:
185 dp['resources'] = [convert_to_datapackage_resource_change(r)
186 for r in resources]
187 return dp
188
189 def convert_to_datapackage_resource_change(resource_dict):
190 resource = {}
191
192 if resource_dict.get('id'):
193 resource['id'] = resource_dict['id']
194
195 if resource_dict.get('name'):
196 resource['name'] = resource_dict['name']
197
198 if resource_dict.get('path'):
199 resource['path'] = resource_dict['path']
200
201 schema = resource_dict.get('schema')
202 if isinstance(schema, six.string_types):
203 try:
204 resource['schema'] = json.loads(schema)
205 except ValueError:
206 resource['schema'] = schema
207 elif isinstance(schema, dict):
208 resource['schema'] = schema
209
210 return resource
211
212 def name_no_repetition(name, dir, option=''):
213 count = 0
214 while True:
215 count = count + 1
216 if not os.path.exists(os.path.join(dir, name)):
217 if option == 'resource':
218 return name
219 else:
220 return os.path.join(dir, name)
221
222 elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)):
223 if option == 'resource':
224 return '('+str(count)+')'+name
225 else:
226 return os.path.join(dir, '('+str(count)+')'+name)
227 else:
228 pass No newline at end of file
1 NO CONTENT: modified file, binary diff hidden
NO CONTENT: modified file, binary diff hidden
@@ -1,935 +1,1022
1 from ckanapi import RemoteCKAN
1 from ckanapi import RemoteCKAN
2 from datetime import datetime
2 from datetime import datetime
3 from tqdm import tqdm
3 from tqdm import tqdm
4 from CKAN_JRO import logic_download
4 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
5 import sys
6 import sys
6 import platform
7 import platform
7 import os
8 import os
8 import tempfile
9 import tempfile
9 import shutil
10 import shutil
10 import zipfile
11 import zipfile
11 import concurrent.futures
12 import concurrent.futures
12 import requests
13 import requests
13 import json
14 import json
14 #import pathlib
15 #import pathlib
15 import uuid
16 import uuid
16
17
17 if sys.version_info.major == 3:
18 if sys.version_info.major == 3:
18 from urllib.parse import urlparse
19 from urllib.parse import urlparse
19 else:
20 else:
20 import urlparse
21 import urlparse
21
22
22 class JROAPI():
23 class JROAPI():
23 """
24 """
24 FINALIDAD:
25 FINALIDAD:
25 Script para administrar y obtener la data del repositorio por medio de APIs.
26 Script para administrar y obtener la data del repositorio por medio de APIs.
26
27
27 REQUISITIOS PREVIOS:
28 REQUISITIOS PREVIOS:
28 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
29 - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
29 - Paso 2: Instalar lo siguiente como admininstrador:
30 - Paso 2: Instalar lo siguiente como admininstrador:
30 En Python 2
31 En Python 2
31 - pip install ckanapi==4.5
32 - pip install ckanapi==4.5
32 - pip install requests
33 - pip install requests
33 - pip install futures
34 - pip install futures
34 - pip install tqdm
35 - pip install tqdm
35 En Python > 3
36 En Python > 3
36 - pip3 install ckanapi==4.5
37 - pip3 install ckanapi==4.5
37 - pip3 install requests
38 - pip3 install requests
38 - pip3 install tqdm
39 - pip3 install tqdm
39
40
40 FUNCIONES DISPONIBLES:
41 FUNCIONES DISPONIBLES:
41 - action
42 - action
42 - upload_file
43 - upload_file
43 - upload_multiple_files
44 - upload_multiple_files
44 - upload_multiple_files_advance
45 - upload_multiple_files_advance
45 - show
46 - show
46 - search
47 - search
47 - create
48 - create
48 - patch
49 - patch
49 - delete
50 - delete
50 - download_files
51 - download_files
51
52
52 EJEMPLOS:
53 EJEMPLOS:
53 #1:
54 #1:
54 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
55 with JROAPI('http://demo.example.com', Authorization='#########') as <access_name>:
55 ... some operation(s) ...
56 ... some operation(s) ...
56 #2:
57 #2:
57 <access_name> = JROAPI('http://example.com', Authorization='#########')
58 <access_name> = JROAPI('http://example.com', Authorization='#########')
58 ... some operation(s) ...
59 ... some operation(s) ...
59 <access_name>.ckan.close()
60 <access_name>.ckan.close()
60
61
61 REPORTAR ALGUN PROBLEMA:
62 REPORTAR ALGUN PROBLEMA:
62 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
63 Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
63 1) Correo para contactarlo
64 1) Correo para contactarlo
64 2) Descripcion del problema
65 2) Descripcion del problema
65 3) ¿En que paso o seccion encontro el problema?
66 3) ¿En que paso o seccion encontro el problema?
66 4) ¿Cual era el resultado que usted esperaba?
67 4) ¿Cual era el resultado que usted esperaba?
67 """
68 """
68 def __init__(self, url, Authorization=None, secure=True):
69 def __init__(self, url, Authorization=None, secure=True):
69 #-------- Check Secure -------#
70 #-------- Check Secure -------#
70 self.verify = secure
71 self.verify = secure
71 if not secure and isinstance(secure, bool):
72 if not secure and isinstance(secure, bool):
72 session = requests.Session()
73 session = requests.Session()
73 session.verify = False
74 session.verify = False
74 else:
75 else:
75 session = None
76 session = None
76 #------------------------------#
77 #------------------------------#
77 self.url = url
78 self.url = url
78 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
79 ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
79 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
80 #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
80 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
81 self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
81 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
82 #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
82 self.Authorization = Authorization
83 self.Authorization = Authorization
83 # Change for --> self.separator = os.sep
84 # Change for --> self.separator = os.sep
84 if platform.system() == 'Windows':
85 if platform.system() == 'Windows':
85 self.separator = '\\'
86 self.separator = '\\'
86 else:
87 else:
87 self.separator = '/'
88 self.separator = '/'
88
89
89 self.chunk_size = 1024
90 self.chunk_size = 1024
90 self.list = []
91 self.list = []
91 self.dict = {}
92 self.dict = {}
92 self.str = ''
93 self.str = ''
93 self.check = 1
94 self.check = 1
94 self.cont = 0
95 self.cont = 0
95
96
96 def __enter__(self):
97 def __enter__(self):
97 return self
98 return self
98
99
99 def __exit__(self, *args):
100 def __exit__(self, *args):
100 self.ckan.close()
101 self.ckan.close()
101
102
102 def action(self, action, **kwargs):
103 def action(self, action, **kwargs):
103 """
104 """
104 FINALIDAD:
105 FINALIDAD:
105 Funcion para llamar a las APIs disponibles
106 Funcion para llamar a las APIs disponibles
106
107
107 APIs DISPONIBLES:
108 APIs DISPONIBLES:
108 CONSULTAR: "GUIA DE SCRIPT.pdf"
109 CONSULTAR: "GUIA DE SCRIPT.pdf"
109
110
110 EJEMPLO:
111 EJEMPLO:
111 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
112 <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
112 """
113 """
113 #--------------- CASE: PACKAGE SEARCH ---------------#
114 #--------------- CASE: PACKAGE SEARCH ---------------#
114 if kwargs is not None:
115 if kwargs is not None:
115 if action == 'package_search':
116 if action == 'package_search':
116 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
117 self.list = ['facet_mincount', 'facet_limit', 'facet_field']
117 for facet in self.list:
118 for facet in self.list:
118 if facet in kwargs:
119 if facet in kwargs:
119 kwargs[facet.replace('_', '.')] = kwargs[facet]
120 kwargs[facet.replace('_', '.')] = kwargs[facet]
120 kwargs.pop(facet)
121 kwargs.pop(facet)
121 #----------------------------------------------------#
122 #----------------------------------------------------#
122 try:
123 try:
123 return getattr(self.ckan.action, action)(**kwargs)
124 return getattr(self.ckan.action, action)(**kwargs)
124 except:
125 except:
125 _, exc_value, _ = sys.exc_info()
126 _, exc_value, _ = sys.exc_info()
126 return exc_value
127 return exc_value
127
128
128 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
129 def upload_file(self, dataset_id, file_path, file_date, file_type, **kwargs):
129 # Agregar si es interruptido por teclado
130 # Agregar si es interruptido por teclado
130 '''
131 '''
131 FINALIDAD:
132 FINALIDAD:
132 Funcion para subir un unico archivo al repositorio del ROJ.
133 Funcion para subir un unico archivo al repositorio del ROJ.
133
134
134 PARAMETROS DISPONIBLES:
135 PARAMETROS DISPONIBLES:
135 CONSULTAR: "GUIA DE SCRIPT.pdf"
136 CONSULTAR: "GUIA DE SCRIPT.pdf"
136
137
137 ESTRUCTURA:
138 ESTRUCTURA:
138 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
139 <access_name>.upload_file(dataset_id = <class 'str'>, file_date = <class 'str'>, file_path = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
139 '''
140 '''
140 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
141 self.list = ['package_id', 'upload', 'voc_file_type', 'name'] #file_date
141 for key1, value1 in kwargs.items():
142 for key1, value1 in kwargs.items():
142 if not key1 in self.list:
143 if not key1 in self.list:
143 self.dict[key1] = value1
144 self.dict[key1] = value1
144
145
145 #---------------------------#
146 #---------------------------#
146 if not 'others' in kwargs:
147 if not 'others' in kwargs:
147 self.dict['others'] = ''
148 self.dict['others'] = ''
148 else:
149 else:
149 if isinstance(kwargs['others'], list):
150 if isinstance(kwargs['others'], list):
150 self.dict['others'] = json.dumps(kwargs['others'])
151 self.dict['others'] = json.dumps(kwargs['others'])
151 #---------------------------#
152 #---------------------------#
152
153
153 if not os.path.isfile(file_path):
154 if not os.path.isfile(file_path):
154 return 'File "%s" not exist' % (file_path)
155 return 'File "%s" not exist' % (file_path)
155
156
156 #if not 'format' in self.dict:
157 #if not 'format' in self.dict:
157 # self.str = ''.join(pathlib.Path(file_path).suffixes)
158 # self.str = ''.join(pathlib.Path(file_path).suffixes)
158 # if len(self.str) > 0:
159 # if len(self.str) > 0:
159 # self.dict['format'] = self.str.upper()[1:]
160 # self.dict['format'] = self.str.upper()[1:]
160
161
161 #-------------------------PACKAGE SHOW-----------------------#
162 #-------------------------PACKAGE SHOW-----------------------#
162 try:
163 try:
163 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
164 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
164 except:
165 except:
165 _, exc_value, _ = sys.exc_info()
166 _, exc_value, _ = sys.exc_info()
166 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
167 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
167 return exc_value
168 return exc_value
168
169
169 resources_name = []
170 resources_name = []
170 for u in dataset_show:
171 for u in dataset_show:
171 resources_name.append(u['name'].lower())
172 resources_name.append(u['name'].lower())
172
173
173 if os.path.basename(file_path).lower() in resources_name:
174 if os.path.basename(file_path).lower() in resources_name:
174 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(file_path))
175 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(file_path))
175 #------------------------------------------------------------#
176 #------------------------------------------------------------#
176
177
177 try:
178 try:
178 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
179 return getattr(self.ckan.action, 'resource_create')(package_id=dataset_id, file_date=file_date, upload=open(file_path, 'rb'), voc_file_type=file_type, name=os.path.basename(file_path), **self.dict)
179 except:
180 except:
180 _, exc_value, _ = sys.exc_info()
181 _, exc_value, _ = sys.exc_info()
181 return exc_value
182 return exc_value
182
183
183 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
184 def upload_multiple_files_advance(self, dataset_id, path_files, file_date, file_type, max_size=100, max_count=500, ignore_repetition=False, **kwargs):
184 # Agregar si es interruptido por teclado
185 # Agregar si es interruptido por teclado
185 '''
186 '''
186 FINALIDAD:
187 FINALIDAD:
187 Funcion para subir multiples archivos al repositorio del ROJ.
188 Funcion para subir multiples archivos al repositorio del ROJ.
188
189
189 PARAMETROS DISPONIBLES:
190 PARAMETROS DISPONIBLES:
190 CONSULTAR: "GUIA DE SCRIPT.pdf"
191 CONSULTAR: "GUIA DE SCRIPT.pdf"
191
192
192 ESTRUCTURA:
193 ESTRUCTURA:
193 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
194 <access_name>.upload_multiple_files_advance(dataset_id = <class 'str'>, path_files = <class 'list of strings'>, file_date = <class 'str'>, file_type = <class 'str'>, param_1 = <class 'param_1'>, ...)
194 '''
195 '''
195 #-------------------------PACKAGE SHOW-----------------------#
196 #-------------------------PACKAGE SHOW-----------------------#
196 try:
197 try:
197 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
198 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
198 except:
199 except:
199 _, exc_value, _ = sys.exc_info()
200 _, exc_value, _ = sys.exc_info()
200 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
201 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
201 return exc_value
202 return exc_value
202 #------------------------------------------------------------#
203 #------------------------------------------------------------#
203 resources_name = []
204 resources_name = []
204 for u in dataset_show:
205 for u in dataset_show:
205 resources_name.append(u['name'].lower())
206 resources_name.append(u['name'].lower())
206 #------------------------------------------------------------#
207 #------------------------------------------------------------#
207 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
208 self.list = ['package_id', 'upload', 'voc_file_type', 'name']
208 for key1, value1 in kwargs.items():
209 for key1, value1 in kwargs.items():
209 if not key1 in self.list:
210 if not key1 in self.list:
210 self.dict[key1] = value1
211 self.dict[key1] = value1
211 #------------------------------------------------------------#
212 #------------------------------------------------------------#
212 if not 'others' in kwargs:
213 if not 'others' in kwargs:
213 self.dict['others'] = ''
214 self.dict['others'] = ''
214 else:
215 else:
215 if isinstance(kwargs['others'], list):
216 if isinstance(kwargs['others'], list):
216 self.dict['others'] = json.dumps(kwargs['others'])
217 self.dict['others'] = json.dumps(kwargs['others'])
217 #------------------------------------------------------------#
218 #------------------------------------------------------------#
218 total_list = []
219 total_list = []
219 #---------------CASO : "path" or "path_list"-----------------#
220 #---------------CASO : "path" or "path_list"-----------------#
220 if type(path_files) is list:
221 if type(path_files) is list:
221 if len(path_files) != 0:
222 if len(path_files) != 0:
222 path_files.sort()
223 path_files.sort()
223 for u in path_files:
224 for u in path_files:
224 if os.path.isfile(u):
225 if os.path.isfile(u):
225 if os.path.basename(u).lower() in resources_name:
226 if os.path.basename(u).lower() in resources_name:
226 if not ignore_repetition:
227 if not ignore_repetition:
227 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
228 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
228 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
229 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
229 else:
230 else:
230 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
231 total_list.append({'name':os.path.basename(u), 'size': os.stat(u).st_size, 'upload':open(u, 'rb')})
231 else:
232 else:
232 return 'File "%s" does not exist' % (u)
233 return 'File "%s" does not exist' % (u)
233 else:
234 else:
234 return 'ERROR:: "path_list is empty"'
235 return 'ERROR:: "path_list is empty"'
235
236
236 elif type(path_files) is str:
237 elif type(path_files) is str:
237 if os.path.isdir(path_files):
238 if os.path.isdir(path_files):
238 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
239 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
239 path_order.sort()
240 path_order.sort()
240 if path_order:
241 if path_order:
241 for name in path_order:
242 for name in path_order:
242 if name.lower() in resources_name:
243 if name.lower() in resources_name:
243 if not ignore_repetition:
244 if not ignore_repetition:
244 return 'ERROR:: "%s" file already exist in this dataset' % (name)
245 return 'ERROR:: "%s" file already exist in this dataset' % (name)
245 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
246 print('WARRING:: "'+ name +'" file was ignored because already exist in this dataset')
246 else:
247 else:
247 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
248 total_list.append({'name':name, 'size': os.stat(os.path.join(path_files, name)).st_size, 'upload':open(os.path.join(path_files, name), 'rb')})
248 else:
249 else:
249 return "ERROR:: There aren't files in this directory"
250 return "ERROR:: There aren't files in this directory"
250 else:
251 else:
251 return 'ERROR:: Directory "%s" does not exist' % (path_files)
252 return 'ERROR:: Directory "%s" does not exist' % (path_files)
252 else:
253 else:
253 return 'ERROR:: "path_files" must be a str or list'
254 return 'ERROR:: "path_files" must be a str or list'
254 #------------------------------------------------------------#
255 #------------------------------------------------------------#
255 try:
256 try:
256 uuid.UUID(str(dataset_id), version=4)
257 uuid.UUID(str(dataset_id), version=4)
257 package_id_or_name = '"id": "' + str(dataset_id) + '"'
258 package_id_or_name = '"id": "' + str(dataset_id) + '"'
258 except ValueError:
259 except ValueError:
259 package_id_or_name = '"name": "' + str(dataset_id) + '"'
260 package_id_or_name = '"name": "' + str(dataset_id) + '"'
260 #------------------------------------------------------------#
261 #------------------------------------------------------------#
261 blocks = [[]]
262 blocks = [[]]
262 size_file = 0
263 size_file = 0
263 count_file = 0
264 count_file = 0
264 inter_num = 0
265 inter_num = 0
265 for value in total_list:
266 for value in total_list:
266 if value['size'] > 1024 * 1024 * float(max_size):
267 if value['size'] > 1024 * 1024 * float(max_size):
267 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
268 return 'ERROR:: The size of the "%s" file is %sMB aprox, please change "max_size" value' % (value['name'], str(round(value['size']/(1024 * 1024), 2)))
268 if not 1 <= int(max_count) <= 999:
269 if not 1 <= int(max_count) <= 999:
269 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
270 return 'ERROR:: The count of the number of files must be between 1 and 999, please change "max_count" value'
270
271
271 size_file = size_file + value['size']
272 size_file = size_file + value['size']
272 count_file = count_file + 1
273 count_file = count_file + 1
273 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
274 if size_file <= 1024 * 1024 * float(max_size) and count_file <= int(max_count):
274 del value['size']
275 del value['size']
275 blocks[inter_num].append(value)
276 blocks[inter_num].append(value)
276 else:
277 else:
277 inter_num = inter_num + 1
278 inter_num = inter_num + 1
278 size_file = value['size']
279 size_file = value['size']
279 count_file = 1
280 count_file = 1
280 blocks.append([])
281 blocks.append([])
281 del value['size']
282 del value['size']
282 blocks[inter_num].append(value)
283 blocks[inter_num].append(value)
283 #------------------------------------------------------------#
284 #------------------------------------------------------------#
284 if len(blocks[0]) > 0:
285 if len(blocks[0]) > 0:
285 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
286 print('BLOCK(S) IN TOTAL:: {}'.format(len(blocks)))
286 for count1, block in enumerate(blocks):
287 for count1, block in enumerate(blocks):
287 print('---- BLOCK N°{} ----'.format(count1 + 1))
288 print('---- BLOCK N°{} ----'.format(count1 + 1))
288 resource_extend = []
289 resource_extend = []
289 files_dict = {}
290 files_dict = {}
290 for count2, value2 in enumerate(block):
291 for count2, value2 in enumerate(block):
291 value2['file_date'] = file_date
292 value2['file_date'] = file_date
292 value2['voc_file_type'] = file_type
293 value2['voc_file_type'] = file_type
293 value2.update(self.dict)
294 value2.update(self.dict)
294
295
295 #if not 'format' in value2:
296 #if not 'format' in value2:
296 # format = ''.join(pathlib.Path(value2['name']).suffixes)
297 # format = ''.join(pathlib.Path(value2['name']).suffixes)
297 # if len(format) > 0:
298 # if len(format) > 0:
298 # value2['format'] = format.upper()[1:]
299 # value2['format'] = format.upper()[1:]
299
300
300 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
301 files_dict['update__resources__-'+ str(len(block)-count2) +'__upload'] = (value2['name'], value2['upload'])
301 del value2['upload']
302 del value2['upload']
302 resource_extend.append(value2)
303 resource_extend.append(value2)
303
304
304 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
305 print('BLOCK N°{} :: "{}" file(s) found >> uploading'.format(count1 + 1, len(block)))
305 try:
306 try:
306 result = self.ckan.call_action(
307 result = self.ckan.call_action(
307 'package_revise',
308 'package_revise',
308 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
309 {'match': '{'+ str(package_id_or_name) +'}', 'update__resources__extend': json.dumps(resource_extend)},
309 files=files_dict
310 files=files_dict
310 )
311 )
311 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
312 print('BLOCK N°{} :: Uploaded file(s) successfully'.format(count1 + 1))
312 if len(blocks) == count1 + 1:
313 if len(blocks) == count1 + 1:
313 return result
314 return result
314 except:
315 except:
315 print('ERROR :: Use the "print" for more information')
316 print('ERROR :: Use the "print" for more information')
316 _, exc_value, _ = sys.exc_info()
317 _, exc_value, _ = sys.exc_info()
317 return exc_value
318 return exc_value
318 else:
319 else:
319 return "ERROR:: No file(s) found to upload"
320 return "ERROR:: No file(s) found to upload"
320
321
321 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
322 def upload_multiple_files(self, dataset_id, path_files, date_files, type_files, ignore_repetition=False, **kwargs):
322 # Agregar si es interruptido por teclado
323 # Agregar si es interruptido por teclado
323 '''
324 '''
324 FINALIDAD:
325 FINALIDAD:
325 Funcion para subir multiples archivos al repositorio del ROJ.
326 Funcion para subir multiples archivos al repositorio del ROJ.
326
327
327 PARAMETROS DISPONIBLES:
328 PARAMETROS DISPONIBLES:
328 CONSULTAR: "GUIA DE SCRIPT.pdf"
329 CONSULTAR: "GUIA DE SCRIPT.pdf"
329
330
330 ESTRUCTURA:
331 ESTRUCTURA:
331 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
332 <access_name>.upload_multiple_files(dataset_id = <class 'str'>, path_files = <class 'str'> or <class 'list of strings'>, date_files = <class 'str'> or <class 'list of strings'>, type_files = <class 'str'> or <class 'list of strings'>, param_1 = <class 'param_1'>, ...)
332 '''
333 '''
333 #-------------------------PACKAGE SHOW-----------------------#
334 #-------------------------PACKAGE SHOW-----------------------#
334 try:
335 try:
335 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
336 dataset_show = getattr(self.ckan.action, 'package_show')(id=dataset_id)['resources']
336 except:
337 except:
337 _, exc_value, _ = sys.exc_info()
338 _, exc_value, _ = sys.exc_info()
338 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
339 print('ERROR obtaining metadata dataset:: Use the "print" for more information')
339 return exc_value
340 return exc_value
340 #------------------------------------------------------------#
341 #------------------------------------------------------------#
341 resources_name = []
342 resources_name = []
342 for u in dataset_show:
343 for u in dataset_show:
343 resources_name.append(u['name'].lower())
344 resources_name.append(u['name'].lower())
344 #------------------------------------------------------------#
345 #------------------------------------------------------------#
345
346
346 params_dict = {'upload':[], 'name':[]}
347 params_dict = {'upload':[], 'name':[]}
347 #if not 'format' in kwargs:
348 #if not 'format' in kwargs:
348 # params_dict.update({'format':[]})
349 # params_dict.update({'format':[]})
349 #---------------CASO : "path" or "path_list"-----------------#
350 #---------------CASO : "path" or "path_list"-----------------#
350 if type(path_files) is list:
351 if type(path_files) is list:
351 if len(path_files) != 0:
352 if len(path_files) != 0:
352 path_files.sort()
353 path_files.sort()
353 for u in path_files:
354 for u in path_files:
354 if os.path.isfile(u):
355 if os.path.isfile(u):
355 if os.path.basename(u).lower() in resources_name:
356 if os.path.basename(u).lower() in resources_name:
356 if not ignore_repetition:
357 if not ignore_repetition:
357 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
358 return 'ERROR:: "%s" file already exist in this dataset' % (os.path.basename(u))
358 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
359 print('WARRING:: "'+ str(os.path.basename(u)) +'" file was ignored because already exist in this dataset')
359 else:
360 else:
360 params_dict['upload'].append(open(u, 'rb'))
361 params_dict['upload'].append(open(u, 'rb'))
361 params_dict['name'].append(os.path.basename(u))
362 params_dict['name'].append(os.path.basename(u))
362 #if not 'format' in kwargs:
363 #if not 'format' in kwargs:
363 # format = ''.join(pathlib.Path(u).suffixes)
364 # format = ''.join(pathlib.Path(u).suffixes)
364 # if len(format) > 0:
365 # if len(format) > 0:
365 # params_dict['format'].append(format.upper()[1:])
366 # params_dict['format'].append(format.upper()[1:])
366 # else:
367 # else:
367 # params_dict['format'].append('')
368 # params_dict['format'].append('')
368 else:
369 else:
369 return 'File "%s" does not exist' % (u)
370 return 'File "%s" does not exist' % (u)
370 else:
371 else:
371 return 'ERROR:: "path_list is empty"'
372 return 'ERROR:: "path_list is empty"'
372 elif type(path_files) is str:
373 elif type(path_files) is str:
373 if os.path.isdir(path_files):
374 if os.path.isdir(path_files):
374 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
375 path_order = [f for f in os.listdir(path_files) if os.path.isfile(os.path.join(path_files, f))]
375 path_order.sort()
376 path_order.sort()
376 if path_order:
377 if path_order:
377 for name in path_order:
378 for name in path_order:
378 if name.lower() in resources_name:
379 if name.lower() in resources_name:
379 if not ignore_repetition:
380 if not ignore_repetition:
380 return 'ERROR:: "%s" file already exist in this dataset' % (name)
381 return 'ERROR:: "%s" file already exist in this dataset' % (name)
381 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
382 print('WARRING:: "'+ str(name) +'" file was ignored because already exist in this dataset')
382 else:
383 else:
383 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
384 params_dict['upload'].append(open(os.path.join(path_files, name), 'rb'))
384 params_dict['name'].append(name)
385 params_dict['name'].append(name)
385 #if not 'format' in kwargs:
386 #if not 'format' in kwargs:
386 # format = ''.join(pathlib.Path(name).suffixes)
387 # format = ''.join(pathlib.Path(name).suffixes)
387 # if len(format) > 0:
388 # if len(format) > 0:
388 # params_dict['format'].append(format.upper()[1:])
389 # params_dict['format'].append(format.upper()[1:])
389 # else:
390 # else:
390 # params_dict['format'].append('')
391 # params_dict['format'].append('')
391 else:
392 else:
392 return "ERROR:: There aren't files in this directory"
393 return "ERROR:: There aren't files in this directory"
393 else:
394 else:
394 return 'ERROR:: Directory "%s" does not exist' % (path_files)
395 return 'ERROR:: Directory "%s" does not exist' % (path_files)
395 else:
396 else:
396 return 'ERROR:: "path_files" must be a str or list'
397 return 'ERROR:: "path_files" must be a str or list'
397 #------------------------------------------------------------#
398 #------------------------------------------------------------#
398 params_no_dict = {'package_id': dataset_id}
399 params_no_dict = {'package_id': dataset_id}
399 if type(date_files) is list:
400 if type(date_files) is list:
400 params_dict['file_date'] = date_files
401 params_dict['file_date'] = date_files
401 else:
402 else:
402 params_no_dict['file_date'] = date_files
403 params_no_dict['file_date'] = date_files
403
404
404 if type(type_files) is list:
405 if type(type_files) is list:
405 params_dict['voc_file_type'] = type_files
406 params_dict['voc_file_type'] = type_files
406 else:
407 else:
407 params_no_dict['voc_file_type'] = type_files
408 params_no_dict['voc_file_type'] = type_files
408
409
409 for key1, value1 in kwargs.items():
410 for key1, value1 in kwargs.items():
410 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
411 if not key1 in params_dict and not key1 in params_no_dict and key1 != 'others':
411 if type(value1) is list:
412 if type(value1) is list:
412 params_dict[key1] = value1
413 params_dict[key1] = value1
413 else:
414 else:
414 params_no_dict[key1] = value1
415 params_no_dict[key1] = value1
415 #------------------------------------------#
416 #------------------------------------------#
416 if not 'others' in kwargs:
417 if not 'others' in kwargs:
417 params_no_dict['others'] = ''
418 params_no_dict['others'] = ''
418 else:
419 else:
419 if isinstance(kwargs['others'], tuple):
420 if isinstance(kwargs['others'], tuple):
420 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
421 params_dict['others'] = [json.dumps(w) for w in kwargs['others']]
421 elif isinstance(kwargs['others'], list):
422 elif isinstance(kwargs['others'], list):
422 params_no_dict['others'] = json.dumps(kwargs['others'])
423 params_no_dict['others'] = json.dumps(kwargs['others'])
423 elif isinstance(kwargs['others'], str):
424 elif isinstance(kwargs['others'], str):
424 params_no_dict['others'] = kwargs['others']
425 params_no_dict['others'] = kwargs['others']
425 else:
426 else:
426 return 'ERROR:: "others" must be a tuple, list or str'
427 return 'ERROR:: "others" must be a tuple, list or str'
427 #------------------------------------------#
428 #------------------------------------------#
428 len_params_dict = []
429 len_params_dict = []
429 for value2 in params_dict.values():
430 for value2 in params_dict.values():
430 len_params_dict.append(len(value2))
431 len_params_dict.append(len(value2))
431
432
432 if len(list(set(len_params_dict))) > 1:
433 if len(list(set(len_params_dict))) > 1:
433 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
434 return 'ERROR:: All lists must be the same length: %s' % (len(params_dict['name']))
434 #------------------------------------------------------------#
435 #------------------------------------------------------------#
435 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
436 print('"{}" file(s) found >> uploading'.format(len(params_dict['name'])))
436 for v in range(len(params_dict['name'])):
437 for v in range(len(params_dict['name'])):
437 try:
438 try:
438 send = {}
439 send = {}
439 for key_dict, value_dict in params_dict.items():
440 for key_dict, value_dict in params_dict.items():
440 send[key_dict] = value_dict[v]
441 send[key_dict] = value_dict[v]
441 for key_no_dict, value_no_dict in params_no_dict.items():
442 for key_no_dict, value_no_dict in params_no_dict.items():
442 send[key_no_dict] = value_no_dict
443 send[key_no_dict] = value_no_dict
443
444
444 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
445 self.list.append(getattr(self.ckan.action, 'resource_create')(**send))
445 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
446 print('File #{} :: "{}" was uploaded successfully'.format(v+1, params_dict['name'][v]))
446 except:
447 except:
447 _, exc_value, _ = sys.exc_info()
448 _, exc_value, _ = sys.exc_info()
448 self.list.append(exc_value)
449 self.list.append(exc_value)
449 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
450 print('File #{} :: Error uploading "{}" file'.format(v+1, params_dict['name'][v]))
450 return self.list
451 return self.list
451 #------------------------------------------------------------#
452 #------------------------------------------------------------#
452
453
453 def show(self, type_option, id, **kwargs):
454 def show(self, type_option, id, **kwargs):
454 '''
455 '''
455 FINALIDAD:
456 FINALIDAD:
456 Funcion personalizada para una busqueda en especifico.
457 Funcion personalizada para una busqueda en especifico.
457
458
458 PARAMETROS DISPONIBLES:
459 PARAMETROS DISPONIBLES:
459 CONSULTAR: "GUIA DE SCRIPT.pdf"
460 CONSULTAR: "GUIA DE SCRIPT.pdf"
460
461
461 ESTRUCTURA:
462 ESTRUCTURA:
462 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
463 <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
463 '''
464 '''
464 if type(type_option) is str:
465 if type(type_option) is str:
465 try:
466 try:
466 if type_option == 'dataset':
467 if type_option == 'dataset':
467 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
468 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
468 elif type_option == 'resource':
469 elif type_option == 'resource':
469 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
470 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
470 elif type_option == 'project':
471 elif type_option == 'project':
471 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
472 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
472 elif type_option == 'collaborator':
473 elif type_option == 'collaborator':
473 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
474 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
474 elif type_option == 'member':
475 elif type_option == 'member':
475 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
476 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
476 elif type_option == 'vocabulary':
477 elif type_option == 'vocabulary':
477 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
478 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
478 elif type_option == 'tag':
479 elif type_option == 'tag':
479 if not 'vocabulary_id' in kwargs:
480 if not 'vocabulary_id' in kwargs:
480 print('Missing "vocabulary_id" value: assume it is a free tag')
481 print('Missing "vocabulary_id" value: assume it is a free tag')
481 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
482 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
482 elif type_option == 'user':
483 elif type_option == 'user':
483 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
484 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
484 elif type_option == 'job':
485 elif type_option == 'job':
485 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
486 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
486 else:
487 else:
487 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
488 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
488 except:
489 except:
489 _, exc_value, _ = sys.exc_info()
490 _, exc_value, _ = sys.exc_info()
490 return exc_value
491 return exc_value
491 else:
492 else:
492 return 'ERROR:: "type_option" must be a str'
493 return 'ERROR:: "type_option" must be a str'
493
494
494 def search(self, type_option, query=None, **kwargs):
495 def search(self, type_option, query=None, **kwargs):
495 '''
496 '''
496 FINALIDAD:
497 FINALIDAD:
497 Funcion personalizada para busquedas que satisfagan algun criterio.
498 Funcion personalizada para busquedas que satisfagan algun criterio.
498
499
499 PARAMETROS DISPONIBLES:
500 PARAMETROS DISPONIBLES:
500 CONSULTAR: "GUIA DE SCRIPT.pdf"
501 CONSULTAR: "GUIA DE SCRIPT.pdf"
501
502
502 ESTRUCTURA:
503 ESTRUCTURA:
503 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
504 <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
504 '''
505 '''
505 if type(type_option) is str:
506 if type(type_option) is str:
506 try:
507 try:
507 if type_option == 'dataset':
508 if type_option == 'dataset':
508 key_replace = ['fq', 'fq_list', 'include_private']
509 key_replace = ['fq', 'fq_list', 'include_private']
509 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
510 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
510 for key1, value1 in kwargs.items():
511 for key1, value1 in kwargs.items():
511 if not key1 in key_replace:
512 if not key1 in key_replace:
512 if key1 in key_point:
513 if key1 in key_point:
513 self.dict[key1.replace('_', '.')] = value1
514 self.dict[key1.replace('_', '.')] = value1
514 else:
515 else:
515 self.dict[key1] = value1
516 self.dict[key1] = value1
516
517
517 if query is not None:
518 if query is not None:
518 if type(query) is dict:
519 if type(query) is dict:
519 self.dict['fq_list'] = []
520 self.dict['fq_list'] = []
520 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
521 #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
521 #----------------------------------------------------#
522 #----------------------------------------------------#
522 if 'dataset_start_date' in query:
523 if 'dataset_start_date' in query:
523 if type(query['dataset_start_date']) is str:
524 if type(query['dataset_start_date']) is str:
524 try:
525 try:
525 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
526 datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
526 if len(query['dataset_start_date']) != 10:
527 if len(query['dataset_start_date']) != 10:
527 return '"dataset_start_date", must be: <YYYY-MM-DD>'
528 return '"dataset_start_date", must be: <YYYY-MM-DD>'
528 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
529 self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
529 self.list.append('dataset_start_date')
530 self.list.append('dataset_start_date')
530 except:
531 except:
531 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
532 return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
532 else:
533 else:
533 return '"dataset_start_date" must be <str>'
534 return '"dataset_start_date" must be <str>'
534 #----------------------------------------------------#
535 #----------------------------------------------------#
535 if 'dataset_end_date' in query:
536 if 'dataset_end_date' in query:
536 if type(query['dataset_end_date']) is str:
537 if type(query['dataset_end_date']) is str:
537 try:
538 try:
538 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
539 datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
539 if len(query['dataset_end_date']) != 10:
540 if len(query['dataset_end_date']) != 10:
540 return '"dataset_end_date", must be: <YYYY-MM-DD>'
541 return '"dataset_end_date", must be: <YYYY-MM-DD>'
541
542
542 if 'dataset_start_date' in query:
543 if 'dataset_start_date' in query:
543 if query['dataset_start_date'] > query['dataset_end_date']:
544 if query['dataset_start_date'] > query['dataset_end_date']:
544 return '"dataset_end_date" must be greater than "dataset_start_date"'
545 return '"dataset_end_date" must be greater than "dataset_start_date"'
545
546
546 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
547 self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
547 self.list.append('dataset_end_date')
548 self.list.append('dataset_end_date')
548 except:
549 except:
549 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
550 return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
550 else:
551 else:
551 return '"dataset_end_date" must be <str>'
552 return '"dataset_end_date" must be <str>'
552 #----------------------------------------------------#
553 #----------------------------------------------------#
553 for key, value in query.items():
554 for key, value in query.items():
554 if value is not None and not key in self.list:
555 if value is not None and not key in self.list:
555 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
556 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
556 else:
557 else:
557 return '"query" must be <dict>'
558 return '"query" must be <dict>'
558
559
559 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
560 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
560
561
561 elif type_option == 'resource':
562 elif type_option == 'resource':
562 for key1, value1 in kwargs.items():
563 for key1, value1 in kwargs.items():
563 if key1 != 'fields':
564 if key1 != 'fields':
564 self.dict[key1] = value1
565 self.dict[key1] = value1
565
566
566 if query is not None:
567 if query is not None:
567 if type(query) is dict:
568 if type(query) is dict:
568 #----------------------------------------------------#
569 #----------------------------------------------------#
569 if 'file_date_min' in query:
570 if 'file_date_min' in query:
570 if type(query['file_date_min']) is str:
571 if type(query['file_date_min']) is str:
571 try:
572 try:
572 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
573 datetime.strptime(query['file_date_min'], '%Y-%m-%d')
573 if len(query['file_date_min']) != 10:
574 if len(query['file_date_min']) != 10:
574 return '"file_date_min", must be: <YYYY-MM-DD>'
575 return '"file_date_min", must be: <YYYY-MM-DD>'
575 except:
576 except:
576 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
577 return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
577 else:
578 else:
578 return '"file_date_min" must be <str>'
579 return '"file_date_min" must be <str>'
579 #----------------------------------------------------#
580 #----------------------------------------------------#
580 if 'file_date_max' in query:
581 if 'file_date_max' in query:
581 if type(query['file_date_max']) is str:
582 if type(query['file_date_max']) is str:
582 try:
583 try:
583 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
584 datetime.strptime(query['file_date_max'], '%Y-%m-%d')
584 if len(query['file_date_max']) != 10:
585 if len(query['file_date_max']) != 10:
585 return '"file_date_max", must be: <YYYY-MM-DD>'
586 return '"file_date_max", must be: <YYYY-MM-DD>'
586
587
587 if 'file_date_min' in query:
588 if 'file_date_min' in query:
588 if query['file_date_min'] > query['file_date_max']:
589 if query['file_date_min'] > query['file_date_max']:
589 return '"file_date_max" must be greater than "file_date_min"'
590 return '"file_date_max" must be greater than "file_date_min"'
590 except:
591 except:
591 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
592 return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
592 else:
593 else:
593 return '"file_date_max" must be <str>'
594 return '"file_date_max" must be <str>'
594 #----------------------------------------------------#
595 #----------------------------------------------------#
595 self.dict['query'] = query
596 self.dict['query'] = query
596 else:
597 else:
597 return '"query" must be <dict>'
598 return '"query" must be <dict>'
598 return getattr(self.ckan.action, 'resources_search')(**self.dict)
599 return getattr(self.ckan.action, 'resources_search')(**self.dict)
599
600
600 elif type_option == 'tag':
601 elif type_option == 'tag':
601 for key1, value1 in kwargs.items():
602 for key1, value1 in kwargs.items():
602 if key1 != 'fields':
603 if key1 != 'fields':
603 self.dict[key1] = value1
604 self.dict[key1] = value1
604
605
605 if not 'vocabulary_id' in kwargs:
606 if not 'vocabulary_id' in kwargs:
606 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
607 print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
607 else:
608 else:
608 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
609 print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
609
610
610 if query is not None:
611 if query is not None:
611 if type(query) is dict:
612 if type(query) is dict:
612 if 'search' in query:
613 if 'search' in query:
613 if type(query['search']) is list or type(query['search']) is str:
614 if type(query['search']) is list or type(query['search']) is str:
614 self.dict['query'] = query['search']
615 self.dict['query'] = query['search']
615 else:
616 else:
616 return '"search" must be <list> or <str>'
617 return '"search" must be <list> or <str>'
617 else:
618 else:
618 return '"query" must be <dict>'
619 return '"query" must be <dict>'
619 return getattr(self.ckan.action, 'tag_search')(**self.dict)
620 return getattr(self.ckan.action, 'tag_search')(**self.dict)
620
621
621 else:
622 else:
622 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
623 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
623
624
624 except:
625 except:
625 _, exc_value, _ = sys.exc_info()
626 _, exc_value, _ = sys.exc_info()
626 return exc_value
627 return exc_value
627 else:
628 else:
628 return 'ERROR:: "type_option" must be <str>'
629 return 'ERROR:: "type_option" must be <str>'
629
630
630 def create(self, type_option, select=None, **kwargs):
631 def create(self, type_option, select=None, **kwargs):
631 '''
632 '''
632 FINALIDAD:
633 FINALIDAD:
633 Funcion personalizada para crear.
634 Funcion personalizada para crear.
634
635
635 PARAMETROS DISPONIBLES:
636 PARAMETROS DISPONIBLES:
636 CONSULTAR: "GUIA DE SCRIPT.pdf"
637 CONSULTAR: "GUIA DE SCRIPT.pdf"
637
638
638 ESTRUCTURA:
639 ESTRUCTURA:
639 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
640 <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
640 '''
641 '''
641 if type(type_option) is str:
642 if type(type_option) is str:
642 try:
643 try:
643 if type_option == 'dataset':
644 if type_option == 'dataset':
644 return getattr(self.ckan.action, 'package_create')(**kwargs)
645 return getattr(self.ckan.action, 'package_create')(**kwargs)
645 elif type_option == 'project':
646 elif type_option == 'project':
646 return getattr(self.ckan.action, 'organization_create')(**kwargs)
647 return getattr(self.ckan.action, 'organization_create')(**kwargs)
647 elif type_option == 'member':
648 elif type_option == 'member':
648 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
649 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
649 elif type_option == 'collaborator':
650 elif type_option == 'collaborator':
650 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
651 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
651 elif type_option == 'vocabulary':
652 elif type_option == 'vocabulary':
652 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
653 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
653 elif type_option == 'tag':
654 elif type_option == 'tag':
654 return getattr(self.ckan.action, 'tag_create')(**kwargs)
655 return getattr(self.ckan.action, 'tag_create')(**kwargs)
655 elif type_option == 'user':
656 elif type_option == 'user':
656 return getattr(self.ckan.action, 'user_create')(**kwargs)
657 return getattr(self.ckan.action, 'user_create')(**kwargs)
657 elif type_option == 'views':
658 elif type_option == 'views':
658 if 'resource' == select:
659 if 'resource' == select:
659 self.list = ['package']
660 self.list = ['package']
660 for key1, value1 in kwargs.items():
661 for key1, value1 in kwargs.items():
661 if not key1 in self.list:
662 if not key1 in self.list:
662 self.dict[key1] = value1
663 self.dict[key1] = value1
663 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
664 return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
664 elif 'dataset' == select:
665 elif 'dataset' == select:
665 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
666 return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
666 else:
667 else:
667 return 'ERROR:: "select = %s" is not accepted' % (select)
668 return 'ERROR:: "select = %s" is not accepted' % (select)
668 else:
669 else:
669 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
670 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
670 except:
671 except:
671 _, exc_value, _ = sys.exc_info()
672 _, exc_value, _ = sys.exc_info()
672 return exc_value
673 return exc_value
673 else:
674 else:
674 return 'ERROR:: "type_option" must be <str>'
675 return 'ERROR:: "type_option" must be <str>'
675
676
676 def patch(self, type_option, **kwargs):
677 def patch(self, type_option, **kwargs):
677 '''
678 '''
678 FINALIDAD:
679 FINALIDAD:
679 Funciones personalizadas para actualizar
680 Funciones personalizadas para actualizar
680
681
681 PARAMETROS DISPONIBLES:
682 PARAMETROS DISPONIBLES:
682 CONSULTAR: "GUIA DE SCRIPT.pdf"
683 CONSULTAR: "GUIA DE SCRIPT.pdf"
683
684
684 ESTRUCTURA:
685 ESTRUCTURA:
685 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
686 <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
686 '''
687 '''
687 if type(type_option) is str:
688 if type(type_option) is str:
688 try:
689 try:
689 if type_option == 'dataset':
690 if type_option == 'dataset':
690 return getattr(self.ckan.action, 'package_patch')(**kwargs)
691 return getattr(self.ckan.action, 'package_patch')(**kwargs)
691 elif type_option == 'project':
692 elif type_option == 'project':
692 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
693 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
693 elif type_option == 'resource':
694 elif type_option == 'resource':
694 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
695 return getattr(self.ckan.action, 'resource_patch')(**kwargs)
695 elif type_option == 'member':
696 elif type_option == 'member':
696 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
697 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
697 elif type_option == 'collaborator':
698 elif type_option == 'collaborator':
698 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
699 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
699 else:
700 else:
700 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
701 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
701 except:
702 except:
702 _, exc_value, _ = sys.exc_info()
703 _, exc_value, _ = sys.exc_info()
703 return exc_value
704 return exc_value
704 else:
705 else:
705 return 'ERROR:: "type_option" must be <str>'
706 return 'ERROR:: "type_option" must be <str>'
706
707
707 def delete(self, type_option, select=None, **kwargs):
708 def delete(self, type_option, select=None, **kwargs):
708 '''
709 '''
709 FINALIDAD:
710 FINALIDAD:
710 Función personalizada para eliminar y/o purgar.
711 Función personalizada para eliminar y/o purgar.
711
712
712 PARAMETROS DISPONIBLES:
713 PARAMETROS DISPONIBLES:
713 CONSULTAR: "GUIA DE SCRIPT.pdf"
714 CONSULTAR: "GUIA DE SCRIPT.pdf"
714
715
715 ESTRUCTURA:
716 ESTRUCTURA:
716 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
717 <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
717 '''
718 '''
718 if type(type_option) is str:
719 if type(type_option) is str:
719 try:
720 try:
720 if type_option == 'dataset':
721 if type_option == 'dataset':
721 if select is None:
722 if select is None:
722 return 'ERROR:: "select" must not be "None"'
723 return 'ERROR:: "select" must not be "None"'
723 else:
724 else:
724 if 'delete' == select:
725 if 'delete' == select:
725 return getattr(self.ckan.action, 'package_delete')(**kwargs)
726 return getattr(self.ckan.action, 'package_delete')(**kwargs)
726 elif 'purge' == select:
727 elif 'purge' == select:
727 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
728 return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
728 else:
729 else:
729 return 'ERROR:: "select = %s" is not accepted' % (select)
730 return 'ERROR:: "select = %s" is not accepted' % (select)
730 elif type_option == 'project':
731 elif type_option == 'project':
731 if select is None:
732 if select is None:
732 return 'ERROR:: "select" must not be "None"'
733 return 'ERROR:: "select" must not be "None"'
733 else:
734 else:
734 if 'delete' == select:
735 if 'delete' == select:
735 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
736 return getattr(self.ckan.action, 'organization_delete')(**kwargs)
736 elif 'purge' == select:
737 elif 'purge' == select:
737 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
738 return getattr(self.ckan.action, 'organization_purge')(**kwargs)
738 else:
739 else:
739 return 'ERROR:: "select = %s" is not accepted' % (select)
740 return 'ERROR:: "select = %s" is not accepted' % (select)
740 elif type_option == 'resource':
741 elif type_option == 'resource':
741 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
742 return getattr(self.ckan.action, 'resource_delete')(**kwargs)
742 elif type_option == 'vocabulary':
743 elif type_option == 'vocabulary':
743 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
744 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
744 elif type_option == 'tag':
745 elif type_option == 'tag':
745 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
746 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
746 elif type_option == 'user':
747 elif type_option == 'user':
747 return getattr(self.ckan.action, 'user_delete')(**kwargs)
748 return getattr(self.ckan.action, 'user_delete')(**kwargs)
748 else:
749 else:
749 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
750 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
750 except:
751 except:
751 _, exc_value, _ = sys.exc_info()
752 _, exc_value, _ = sys.exc_info()
752 return exc_value
753 return exc_value
753 else:
754 else:
754 return 'ERROR:: "type_option" must be <str>'
755 return 'ERROR:: "type_option" must be <str>'
755
756
756 def f_status_note(self, total, result, path):
757 def f_status_note(self, total, result, path):
757 file_txt = open(path+'status_note.txt', 'w')
758 file_txt = open(path+'status_note.txt', 'w')
758 file_txt = open(path+'status_note.txt', 'a')
759 file_txt = open(path+'status_note.txt', 'a')
759
760
760 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
761 file_txt.write('DOWNLOADED FILE(S): "%s"' % (len(result['name'])))
761 file_txt.write(''+ os.linesep)
762 file_txt.write(''+ os.linesep)
762 for u in result['name']:
763 for u in result['name']:
763 file_txt.write(' - '+ u + os.linesep)
764 file_txt.write(' - '+ u + os.linesep)
764 file_txt.write(''+ os.linesep)
765 file_txt.write(''+ os.linesep)
765
766
766 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
767 file_txt.write('FAILED FILE(S): "%s"' % (len(total['name'])-len(result['name'])))
767 file_txt.write(''+ os.linesep)
768 file_txt.write(''+ os.linesep)
768 if len(total['name'])-len(result['name']) != 0:
769 if len(total['name'])-len(result['name']) != 0:
769 for u in total['name']:
770 for u in total['name']:
770 if not u in result['name']:
771 if not u in result['name']:
771 file_txt.write(' - '+ u + os.linesep)
772 file_txt.write(' - '+ u + os.linesep)
772 else:
773 else:
773 file_txt.write(' "None"'+ os.linesep)
774 file_txt.write(' "None"'+ os.linesep)
774
775
775 def f_name(self, name_dataset, ext, tempdir):
776 def f_name(self, name_dataset, ext, tempdir):
776 while self.check:
777 while self.check:
777 self.str = ''
778 self.str = ''
778 if self.cont == 0:
779 if self.cont == 0:
779 if os.path.exists(tempdir + name_dataset + ext):
780 if os.path.exists(tempdir + name_dataset + ext):
780 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
781 self.str = name_dataset+'('+str(self.cont+1)+')'+ext
781 else:
782 else:
782 self.check = self.check * 0
783 self.check = self.check * 0
783 self.str = name_dataset + ext
784 self.str = name_dataset + ext
784 else:
785 else:
785 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
786 if not os.path.exists(tempdir + name_dataset+'('+str(self.cont)+')'+ext):
786 self.check = self.check * 0
787 self.check = self.check * 0
787 self.str = name_dataset+'('+str(self.cont)+')'+ ext
788 self.str = name_dataset+'('+str(self.cont)+')'+ ext
788 self.cont = self.cont+1
789 self.cont = self.cont+1
789 return self.str
790 return self.str
790
791
791 def f_zipdir(self, path, ziph, zip_name):
792 def f_zipdir(self, path, ziph, zip_name):
792 for root, _, files in os.walk(path):
793 for root, _, files in os.walk(path):
793 print('.....')
794 print('.....')
794 print('Creating: "{}" >>'.format(zip_name))
795 print('Creating: "{}" >>'.format(zip_name))
795 for __file in tqdm(iterable=files, total=len(files)):
796 for __file in tqdm(iterable=files, total=len(files)):
796 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
797 new_dir = os.path.relpath(os.path.join(root, __file), os.path.join(path, '..'))
797 ziph.write(os.path.join(root, __file), new_dir)
798 ziph.write(os.path.join(root, __file), new_dir)
798 print('Created >>')
799 print('Created >>')
799
800
800 def download_by_step(self, response, tempdir_name):
801 def download_by_step(self, response, tempdir_name):
801 try:
802 try:
802 # ---------- REPLACE URL --------- #
803 # ---------- REPLACE URL --------- #
803 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
804 if urlparse(self.url).netloc != 'www.igp.gob.pe' and urlparse(response['url']).netloc == 'www.igp.gob.pe':
804 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
805 response['url'] = response['url'].replace(urlparse(response['url']).scheme + '://' + urlparse(response['url']).netloc,
805 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
806 urlparse(self.url).scheme + '://' + urlparse(self.url).netloc)
806 #----------------------------------#
807 #----------------------------------#
807 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
808 with requests.get(response['url'], stream=True, headers={'Authorization': self.Authorization}, verify=self.verify) as resp:
808 if resp.status_code == 200:
809 if resp.status_code == 200:
809 with open(tempdir_name+response['name'], 'wb') as file:
810 with open(tempdir_name+response['name'], 'wb') as file:
810 for chunk in resp.iter_content(chunk_size = self.chunk_size):
811 for chunk in resp.iter_content(chunk_size = self.chunk_size):
811 if chunk:
812 if chunk:
812 file.write(chunk)
813 file.write(chunk)
813 except requests.exceptions.RequestException:
814 except requests.exceptions.RequestException:
814 pass
815 pass
815
816
816 def download_files(self, **kwargs):
817 def download_files(self, **kwargs):
817 '''
818 '''
818 FINALIDAD:
819 FINALIDAD:
819 Funcion personalizada para la descarga de archivos existentes de un dataset.
820 Funcion personalizada para la descarga de archivos existentes de un dataset.
820
821
821 PARAMETROS DISPONIBLES:
822 PARAMETROS DISPONIBLES:
822 CONSULTAR: "GUIA DE SCRIPT.pdf"
823 CONSULTAR: "GUIA DE SCRIPT.pdf"
823
824
824 ESTRUCTURA:
825 ESTRUCTURA:
825 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
826 <access_name>.download_files(id = <class 'str'>, param_1 = <class 'param_1'>, ...)
826 '''
827 '''
827 dict_local = {}
828 dict_local = {}
828 #----------------------------------------------#
829 #----------------------------------------------#
829 if 'zip' in kwargs:
830 if 'zip' in kwargs:
830 if type(kwargs['zip']) is not bool:
831 if type(kwargs['zip']) is not bool:
831 return 'ERROR:: "zip" must be: <class "bool">'
832 return 'ERROR:: "zip" must be: <class "bool">'
832 else:
833 else:
833 dict_local['zip'] = kwargs['zip']
834 dict_local['zip'] = kwargs['zip']
834 else:
835 else:
835 dict_local['zip'] = False
836 dict_local['zip'] = False
836 #----------------------------------------------#
837 #----------------------------------------------#
837 if 'status_note' in kwargs:
838 if 'status_note' in kwargs:
838 if type(kwargs['status_note']) is not bool:
839 if type(kwargs['status_note']) is not bool:
839 return 'ERROR:: "status_note" must be: <class "bool">'
840 return 'ERROR:: "status_note" must be: <class "bool">'
840 else:
841 else:
841 dict_local['status_note'] = kwargs['status_note']
842 dict_local['status_note'] = kwargs['status_note']
842 else:
843 else:
843 dict_local['status_note'] = False
844 dict_local['status_note'] = False
844 #----------------------------------------------#
845 #----------------------------------------------#
845 if 'path' in kwargs:
846 if 'path' in kwargs:
846 if type(kwargs['path']) is str:
847 if type(kwargs['path']) is str:
847 if os.path.isdir(kwargs['path']) == False:
848 if os.path.isdir(kwargs['path']) == False:
848 return 'ERROR:: "path" does not exist'
849 return 'ERROR:: "path" does not exist'
849 else:
850 else:
850 if kwargs['path'][-1:] != self.separator:
851 if kwargs['path'][-1:] != self.separator:
851 dict_local['path'] = kwargs['path']+self.separator
852 dict_local['path'] = kwargs['path']+self.separator
852 else:
853 else:
853 dict_local['path'] = kwargs['path']
854 dict_local['path'] = kwargs['path']
854
855
855 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
856 txt = dict_local['path']+datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
856 if int(platform.python_version()[0]) == 3:
857 if int(platform.python_version()[0]) == 3:
857 try:
858 try:
858 file_txt = open(txt, 'w')
859 file_txt = open(txt, 'w')
859 file_txt.close()
860 file_txt.close()
860 os.remove(txt)
861 os.remove(txt)
861 except PermissionError:
862 except PermissionError:
862 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
863 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
863 else:
864 else:
864 try:
865 try:
865 file_txt = open(txt, 'w')
866 file_txt = open(txt, 'w')
866 file_txt.close()
867 file_txt.close()
867 os.remove(txt)
868 os.remove(txt)
868 except:
869 except:
869 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
870 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (dict_local['path'])
870 else:
871 else:
871 return 'ERROR:: "path" must be: <class "str">'
872 return 'ERROR:: "path" must be: <class "str">'
872 else:
873 else:
873 dict_local['path'] = ''
874 dict_local['path'] = ''
874 #----------------------------------------------#
875 #----------------------------------------------#
875 for key, value in kwargs.items():
876 for key, value in kwargs.items():
876 if not key in dict_local:
877 if not key in dict_local:
877 self.dict[key] = value
878 self.dict[key] = value
878 try:
879 try:
879 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
880 response = getattr(self.ckan.action, 'url_resources')(**self.dict)
880 except:
881 except:
881 _, exc_value, _ = sys.exc_info()
882 _, exc_value, _ = sys.exc_info()
882 return exc_value
883 return exc_value
883
884
884 if len(response) != 0:
885 if len(response) != 0:
885 #--------------TEMP PATH---------------#
886 #--------------TEMP PATH---------------#
886 if dict_local['zip']:
887 if dict_local['zip']:
887 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
888 tempdir = tempfile.mkdtemp(prefix=kwargs['id']+'-')+self.separator
888 os.mkdir(tempdir+kwargs['id'])
889 os.mkdir(tempdir+kwargs['id'])
889 dir_name = tempdir + kwargs['id'] + self.separator
890 dir_name = tempdir + kwargs['id'] + self.separator
890 else:
891 else:
891 dir = self.f_name(kwargs['id'], '', dict_local['path'])
892 dir = self.f_name(kwargs['id'], '', dict_local['path'])
892 os.mkdir(dict_local['path'] + dir)
893 os.mkdir(dict_local['path'] + dir)
893 dir_name = dict_local['path'] + dir + self.separator
894 dir_name = dict_local['path'] + dir + self.separator
894 #-----------DOWNLOAD FILES-------------#
895 #-----------DOWNLOAD FILES-------------#
895 print('.....')
896 print('.....')
896 print('Downloading "{}" file(s) >>'.format(len(response)))
897 print('Downloading "{}" file(s) >>'.format(len(response)))
897 name_total = {'name': []}
898 name_total = {'name': []}
898 with concurrent.futures.ThreadPoolExecutor() as executor:
899 with concurrent.futures.ThreadPoolExecutor() as executor:
899 for u in tqdm(iterable=response, total=len(response)):
900 for u in tqdm(iterable=response, total=len(response)):
900 name_total['name'].append(u['name'])
901 name_total['name'].append(u['name'])
901 executor.submit(self.download_by_step, u, dir_name)
902 executor.submit(self.download_by_step, u, dir_name)
902 name_check = {}
903 name_check = {}
903 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
904 name_check['name'] = [f for f in os.listdir(dir_name) if os.path.isfile(os.path.join(dir_name, f))]
904 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
905 print('"{}" downloaded file(s) successfully >>'.format(len(name_check['name'])))
905 #--------------------------------------#
906 #--------------------------------------#
906 if len(name_check['name']) != 0:
907 if len(name_check['name']) != 0:
907 #----------Status Note---------#
908 #----------Status Note---------#
908 if dict_local['status_note']:
909 if dict_local['status_note']:
909 print('.....')
910 print('.....')
910 print('Creating: "status_note.txt" >>')
911 print('Creating: "status_note.txt" >>')
911 self.f_status_note(name_total, name_check, dir_name)
912 self.f_status_note(name_total, name_check, dir_name)
912 print('Created>>')
913 print('Created>>')
913 #----------ZIP CREATE----------#
914 #----------ZIP CREATE----------#
914 if dict_local['zip']:
915 if dict_local['zip']:
915 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
916 zip_name = self.f_name(kwargs['id'], '.zip', dict_local['path'])
916 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
917 ziph = zipfile.ZipFile(dict_local['path'] + zip_name, 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
917 self.f_zipdir(dir_name, ziph, zip_name)
918 self.f_zipdir(dir_name, ziph, zip_name)
918 ziph.close()
919 ziph.close()
919 #Delete Temporal Path
920 #Delete Temporal Path
920 if os.path.exists(tempdir[:-1]):
921 if os.path.exists(tempdir[:-1]):
921 shutil.rmtree(tempdir[:-1])
922 shutil.rmtree(tempdir[:-1])
922 #------------------------------#
923 #------------------------------#
923 print('.....')
924 print('.....')
924 return 'DOWNLOAD FINISHED'
925 return 'DOWNLOAD FINISHED'
925 else:
926 else:
926 #Delete Temporal Path
927 #Delete Temporal Path
927 if dict_local['zip']:
928 if dict_local['zip']:
928 if os.path.exists(tempdir[:-1]):
929 if os.path.exists(tempdir[:-1]):
929 shutil.rmtree(tempdir[:-1])
930 shutil.rmtree(tempdir[:-1])
930 else:
931 else:
931 if os.path.exists(dir_name[:-1]):
932 if os.path.exists(dir_name[:-1]):
932 shutil.rmtree(dir_name[:-1])
933 shutil.rmtree(dir_name[:-1])
933 return 'NO FILES WERE DOWNLOADED'
934 return 'NO FILES WERE DOWNLOADED'
934 else:
935 else:
935 return 'FILES NOT FOUND' No newline at end of file
936 return 'FILES NOT FOUND'
937
938 def download_files_advance(self, id_or_name, processes=1, path=os.path.expanduser("~"), **kwargs):
939 '''
940 FINALIDAD:
941 Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
942
943 PARAMETROS DISPONIBLES:
944 CONSULTAR: "GUIA DE SCRIPT.pdf"
945
946 ESTRUCTURA:
947 <access_name>.download_files_advance(id_or_name= <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
948 '''
949 #------------------ PATH ----------------------#
950 if isinstance(path, str):
951 if os.path.isdir(path):
952 if not path.endswith(os.sep):
953 path = path + os.sep
954 test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
955 try:
956 file_txt = open(test_txt, 'w')
957 file_txt.close()
958 os.remove(test_txt)
959 except:
960 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
961 else:
962 return 'ERROR:: "path" does not exist'
963 else:
964 return 'ERROR:: "path" must be: <class "str">'
965
966 #------------------ PROCESSES -----------------#
967 if not isinstance(processes, int):
968 return 'ERROR:: "processes" must be: <class "int">'
969
970 #------------------ ID OR NAME ----------------#
971 if isinstance(id_or_name, str):
972 id_or_name = [id_or_name]
973 elif isinstance(id_or_name, list):
974 id_or_name = list(map(str, id_or_name))
975 else:
976 return 'ERROR:: dataset "id_or_name" must be: <class "str" or "list">'
977 #----------------------------------------------#
978 arguments = {
979 '--apikey': self.Authorization,
980 '--ckan-user': None,
981 '--config': None,
982 '--datapackages': path,
983 '--datastore-fields': False,
984 '--get-request': False,
985 '--insecure': not self.verify,
986 '--log': '/home/soporte/DUMP/download.txt',
987 '--processes': str(processes),
988 '--quiet': False,
989 '--remote': self.url,
990 '--worker': False,
991 #'--all': False,
992 #'--gzip': False,
993 #'--output': None,
994 #'--max-records': None,
995 #'--output-json': False,
996 #'--output-jsonl': False,
997 #'--create-only': False,
998 #'--help': False,
999 #'--input': None,
1000 #'--input-json': False,
1001 #'--start-record': '1',
1002 #'--update-only': False,
1003 #'--upload-logo': False,
1004 #'--upload-resources': False,
1005 #'--version': False,
1006 'ID_OR_NAME': id_or_name,
1007 'datasets': True,
1008 'dump': True,
1009 #'ACTION_NAME': None,
1010 #'KEY:JSON': [],
1011 #'KEY=STRING': [],
1012 #'KEY@FILE': [],
1013 #'action': False,
1014 #'delete': False,
1015 #'groups': False,
1016 #'load': False,
1017 #'organizations': False,
1018 #'related': False,
1019 #'search': False,
1020 #'users': False
1021 }
1022 return logic_download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs) No newline at end of file
@@ -1,12 +1,12
1 # encoding: utf-8
1 # encoding: utf-8
2 from setuptools import setup
2 from setuptools import setup
3
3
4 setup(
4 setup(
5 name = "CKAN_JRO",
5 name = "CKAN_JRO",
6 version = "2.9.2.0",
6 version = "2.9.2.0",
7 description = "Data Repository - JRO",
7 description = "Data Repository - JRO",
8 author = "Edson Ynilupu Mattos",
8 author = "Edson Ynilupu Mattos",
9 author_email = "eynilupu@igp.gob.pe",
9 author_email = "eynilupu@igp.gob.pe",
10 url = "",
10 url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente",
11 packages = ["CKAN_JRO"]
11 packages = ["CKAN_JRO"]
12 ) No newline at end of file
12 )
General Comments 0
You need to be logged in to leave comments. Login now