DATABASES/ckanext-jro/api-cliente Commit - r23:7969fa062c2f

v2.9.2 :: Update 'User-Agent' in URL parameters - download

eynilupu -

r23:7969fa062c2f master

parent child

Context file:

r23:7969fa062c2f

jrodb/api.py +4 -3

             from ckanapi import RemoteCKAN
             from datetime import datetime
             from jrodb import download
             from jrodb import resource
             #from ckanapi.errors import NotAuthorized, NotFound, ValidationError, SearchQueryError, SearchError, CKANAPIError, ServerIncompatibleError
             import sys
             import platform
             import os
             import requests
             class Api():
                 """
                 FINALIDAD:
                     Script para administrar y obtener la data del repositorio por medio de APIs.
                 REQUISITIOS PREVIOS:
                     - Paso 1: Tener "pip [Python 2]" o "pip3 [Python 3]" instalado:
                     - Paso 2: Instalar los siguientes paquetes:
                         En Python 2
                             - pip install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
                         En Python 3
                             - pip3 install -e git+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente#egg=jrodb
                 FUNCIONES DISPONIBLES:
                     - action
                     - show
                     - search
                     - create
                     - patch
                     - delete
                     - download
                 EJEMPLOS:
                     #1:
                         with Api('http://demo.example.com', Authorization='#########') as <access_name>:
                             ... some operation(s) ...
                     #2:
                         <access_name> = Api('http://example.com', Authorization='#########')
                         ... some operation(s) ...
                         <access_name>.ckan.close()
                 REPORTAR ALGUN PROBLEMA:
                     Debe enviar un correo a eynilupu@igp.gob.pe detallando los siguientes pasos:
 ) Correo para contactarlo
 ) Descripcion del problema
 ) ¿En que paso o seccion encontro el problema?
 ) ¿Cual era el resultado que usted esperaba?
                 """
                 def __init__(self, url, Authorization=None, secure=True):
                     #-------- Check Secure  -------#
                     self.verify = secure
                     if not secure and isinstance(secure, bool):
                         session = requests.Session()
                         session.verify = False
                     else:
                         session = None
                     #------------------------------#
                     self.url = url
-                    ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
+                    #ua = 'CKAN_JRO/2.9.2 (+'+str(self.url)+')'
+                    self.ua = 'CKAN_JRO/2.9.2 (+http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente)'
                     #ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
-                    self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=ua, session=session)
+                    self.ckan = RemoteCKAN(self.url, apikey=Authorization, user_agent=self.ua, session=session)
                     #self.ckan = RemoteCKAN(self.url, apikey=Authorization)
                     self.Authorization = Authorization
                     # Change for --> self.separator = os.sep
                     if platform.system() == 'Windows':
                         self.separator = '\\'
                     else:
                         self.separator = '/'
                     self.chunk_size = 1024
                     self.list = []
                     self.dict = {}
                     self.str = ''
                     self.check = 1
                     self.cont = 0
                 def __enter__(self):
                     return self
                 def __exit__(self, *args):
                     self.ckan.close()
                 def action(self, action, **kwargs):
                     """
                     FINALIDAD:
                         Funcion para llamar a las APIs disponibles
                     APIs DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     EJEMPLO:
                         <access_name>.action(<consuming API>, param_1 = <class 'param_1'>, ...)
                     """
                     #--------------- CASE: PACKAGE SEARCH ---------------#
                     if kwargs is not None:
                         if action == 'package_search':
                             self.list = ['facet_mincount', 'facet_limit', 'facet_field']
                             for facet in self.list:
                                 if facet in kwargs:
                                     kwargs[facet.replace('_', '.')] = kwargs[facet]
                                     kwargs.pop(facet)
                     #----------------------------------------------------#
                     try:
                         return getattr(self.ckan.action, action)(**kwargs)
                     except:
                         _, exc_value, _ = sys.exc_info()
                         return exc_value
                 def show(self, type_option, id, **kwargs):
                     '''
                     FINALIDAD:
                         Funcion personalizada para una busqueda en especifico.
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.show(type_option = <class 'str'>, id = <class 'str'>, param_1 = <class 'param_1'>, ...)
                     '''
                     if type(type_option) is str:
                         try:
                             if type_option == 'dataset':
                                 return getattr(self.ckan.action, 'package_show')(id=id, **kwargs)
                             elif type_option == 'resource':
                                 return getattr(self.ckan.action, 'resource_show')(id=id, **kwargs)
                             elif type_option == 'project':
                                 return getattr(self.ckan.action, 'organization_show')(id=id, **kwargs)
                             elif type_option == 'collaborator':
                                 return getattr(self.ckan.action, 'package_collaborator_list_for_user')(id=id, **kwargs)
                             elif type_option == 'member':
                                 return getattr(self.ckan.action, 'organization_list_for_user')(id=id, **kwargs)
                             elif type_option == 'vocabulary':
                                 return getattr(self.ckan.action, 'vocabulary_show')(id=id, **kwargs)
                             elif type_option == 'tag':
                                 if not 'vocabulary_id' in kwargs:
                                     print('Missing "vocabulary_id" value: assume it is a free tag')
                                 return getattr(self.ckan.action, 'tag_show')(id=id, **kwargs)
                             elif type_option == 'user':
                                 return getattr(self.ckan.action, 'user_show')(id=id, **kwargs)
                             elif type_option == 'job':
                                 return getattr(self.ckan.action, 'job_show')(id=id, **kwargs)
                             else:
                                 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
                         except:
                             _, exc_value, _ = sys.exc_info()
                             return exc_value
                     else:
                         return 'ERROR:: "type_option" must be a str'
                 def search(self, type_option, query=None, **kwargs):
                     '''
                     FINALIDAD:
                         Funcion personalizada para busquedas que satisfagan algun criterio.
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.search(type_option = <class 'str'>, query = <class 'dict'>, param_1 = <class 'param_1'>, ...)
                     '''
                     if type(type_option) is str:
                         try:
                             if type_option == 'dataset':
                                 key_replace = ['fq', 'fq_list', 'include_private']
                                 key_point = ['facet_mincount', 'facet_limit', 'facet_field']
                                 for key1, value1 in kwargs.items():
                                     if not key1 in key_replace:
                                         if key1 in key_point:
                                             self.dict[key1.replace('_', '.')] = value1
                                         else:
                                             self.dict[key1] = value1
                                 if query is not None:
                                     if type(query) is dict:
                                         self.dict['fq_list'] = []
                                         #NUM_RESOURCES_MIN / NUM_RESOURCES_MAX
                                         #----------------------------------------------------#
                                         if 'dataset_start_date' in query:
                                             if type(query['dataset_start_date']) is str:
                                                 try:
                                                     datetime.strptime(query['dataset_start_date'], '%Y-%m-%d')
                                                     if len(query['dataset_start_date']) != 10:
                                                         return '"dataset_start_date", must be:  <YYYY-MM-DD>'
                                                     self.dict['fq_list'].append('dataset_start_date:"'+query['dataset_start_date']+'"')
                                                     self.list.append('dataset_start_date')
                                                 except:
                                                     return '"dataset_start_date" incorrect: "%s"' % (query['dataset_start_date'])
                                             else:
                                                 return '"dataset_start_date" must be <str>'
                                         #----------------------------------------------------#
                                         if 'dataset_end_date' in query:
                                             if type(query['dataset_end_date']) is str:
                                                 try:
                                                     datetime.strptime(query['dataset_end_date'], '%Y-%m-%d')
                                                     if len(query['dataset_end_date']) != 10:
                                                         return '"dataset_end_date", must be:  <YYYY-MM-DD>'
                                                     if 'dataset_start_date' in query:
                                                         if query['dataset_start_date'] > query['dataset_end_date']:
                                                             return '"dataset_end_date" must be greater than "dataset_start_date"'
                                                     self.dict['fq_list'].append('dataset_end_date:"'+query['dataset_end_date']+'"')
                                                     self.list.append('dataset_end_date')
                                                 except:
                                                     return '"dataset_end_date" incorrect: "%s"' % (query['dataset_end_date'])
                                             else:
                                                 return '"dataset_end_date" must be <str>'
                                         #----------------------------------------------------#
                                         if 'tags' in query:
                                             if isinstance(query['tags'], (int, float, str, list)):
                                                 if type(query['tags']) is list:
                                                     for u in query['tags']:
                                                         self.dict['fq_list'].append('tags:"'+str(u)+'"')
                                                 else:
                                                     self.dict['fq_list'].append('tags:"'+str(query['tags'])+'"')
                                                 self.list.append('tags')
                                             else:
                                                 return '"tags" must be <list> or <float> or <int> or <str>'
                                         #----------------------------------------------------#
                                         for key, value in query.items():
                                             if value is not None and not key in self.list:
                                                 self.dict['fq_list'].append(str(key)+':"'+str(value)+'"')
                                     else:
                                         return '"query" must be <dict>'
                                 return getattr(self.ckan.action, 'package_search')(include_private=True, **self.dict)
                             elif type_option == 'resource':
                                 for key1, value1 in kwargs.items():
                                     if key1 != 'fields':
                                         self.dict[key1] = value1
                                 if query is not None:
                                     if type(query) is dict:
                                         #----------------------------------------------------#
                                         if 'file_date_min' in query:
                                             if type(query['file_date_min']) is str:
                                                 try:
                                                     datetime.strptime(query['file_date_min'], '%Y-%m-%d')
                                                     if len(query['file_date_min']) != 10:
                                                         return '"file_date_min", must be:  <YYYY-MM-DD>'
                                                 except:
                                                     return '"file_date_min" incorrect: "%s"' % (query['file_date_min'])
                                             else:
                                                 return '"file_date_min" must be <str>'
                                         #----------------------------------------------------#
                                         if 'file_date_max' in query:
                                             if type(query['file_date_max']) is str:
                                                 try:
                                                     datetime.strptime(query['file_date_max'], '%Y-%m-%d')
                                                     if len(query['file_date_max']) != 10:
                                                         return '"file_date_max", must be:  <YYYY-MM-DD>'
                                                     if 'file_date_min' in query:
                                                         if query['file_date_min'] > query['file_date_max']:
                                                             return '"file_date_max" must be greater than "file_date_min"'
                                                 except:
                                                     return '"file_date_max" incorrect: "%s"' % (query['file_date_max'])
                                             else:
                                                 return '"file_date_max" must be <str>'
                                         #----------------------------------------------------#
                                         self.dict['query'] = query
                                     else:
                                         return '"query" must be <dict>'
                                 return getattr(self.ckan.action, 'resources_search')(**self.dict)
                             elif type_option == 'tag':
                                 for key1, value1 in kwargs.items():
                                     if key1 != 'fields':
                                         self.dict[key1] = value1
                                 if not 'vocabulary_id' in kwargs:
                                     print('Missing "vocabulary_id" value: tags that don’t belong to any vocabulary')
                                 else:
                                     print('Only tags that belong to "{}" vocabulary'.format(kwargs['vocabulary_id']))
                                 if query is not None:
                                     if type(query) is dict:
                                         if 'search' in query:
                                             if type(query['search']) is list or type(query['search']) is str:
                                                 self.dict['query'] = query['search']
                                             else:
                                                 return '"search" must be <list> or <str>'
                                     else:
                                         return '"query" must be <dict>'
                                 return getattr(self.ckan.action, 'tag_search')(**self.dict)
                             else:
                                 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
                         except:
                             _, exc_value, _ = sys.exc_info()
                             return exc_value
                     else:
                         return 'ERROR:: "type_option" must be <str>'
                 def create(self, type_option, select=None, **kwargs):
                     '''
                     FINALIDAD:
                         Funcion personalizada para crear.
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.create(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
                     '''
                     if type(type_option) is str:
                         try:
                             if type_option == 'dataset':
                                 return getattr(self.ckan.action, 'package_create')(**kwargs)
                             if type_option == 'resource':
                                 return resource.resource_create(self, **kwargs)
                             elif type_option == 'project':
                                 return getattr(self.ckan.action, 'organization_create')(**kwargs)
                             elif type_option == 'member':
                                 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
                             elif type_option == 'collaborator':
                                 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
                             elif type_option == 'vocabulary':
                                 return getattr(self.ckan.action, 'vocabulary_create')(**kwargs)
                             elif type_option == 'tag':
                                 return getattr(self.ckan.action, 'tag_create')(**kwargs)
                             elif type_option == 'user':
                                 return getattr(self.ckan.action, 'user_create')(**kwargs)
                             elif type_option == 'views':
                                 if 'resource' == select:
                                     self.list = ['package']
                                     for key1, value1 in kwargs.items():
                                         if not key1 in self.list:
                                             self.dict[key1] = value1
                                     return getattr(self.ckan.action, 'resource_create_default_resource_views')(**self.dict)
                                 elif 'dataset' == select:
                                     return getattr(self.ckan.action, 'package_create_default_resource_views')(**kwargs)
                                 else:
                                     return 'ERROR:: "select = %s" is not accepted' % (select)
                             else:
                                 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
                         except:
                             _, exc_value, _ = sys.exc_info()
                             return exc_value
                     else:
                         return 'ERROR:: "type_option" must be <str>'
                 def patch(self, type_option, **kwargs):
                     '''
                     FINALIDAD:
                         Funciones personalizadas para actualizar
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.patch(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
                     '''
                     if type(type_option) is str:
                         try:
                             if type_option == 'dataset':
                                 #Agregar que solo se debe modificar parámetros del Dataset y que no incluya Resources
                                 return getattr(self.ckan.action, 'package_patch')(**kwargs)
                             elif type_option == 'project':
                                 return getattr(self.ckan.action, 'organization_patch')(**kwargs)
                             elif type_option == 'resource':
                                 return resource.resource_patch(self, **kwargs)
                             elif type_option == 'member':
                                 return getattr(self.ckan.action, 'organization_member_create')(**kwargs)
                             elif type_option == 'collaborator':
                                 return getattr(self.ckan.action, 'package_collaborator_create')(**kwargs)
                             else:
                                 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
                         except:
                             _, exc_value, _ = sys.exc_info()
                             return exc_value
                     else:
                         return 'ERROR:: "type_option" must be <str>'
                 def delete(self, type_option, select=None, **kwargs):
                     '''
                     FINALIDAD:
                         Función personalizada para eliminar y/o purgar.
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.delete(type_option = <class 'str'>, param_1 = <class 'param_1'>, ...)
                     '''
                     if type(type_option) is str:
                         try:
                             if type_option == 'dataset':
                                 if select is None:
                                     return 'ERROR:: "select" must not be "None"'
                                 else:
                                     if 'delete' == select:
                                         return getattr(self.ckan.action, 'package_delete')(**kwargs)
                                     elif 'purge' == select:
                                         return getattr(self.ckan.action, 'dataset_purge')(**kwargs)
                                     else:
                                         return 'ERROR:: "select = %s" is not accepted' % (select)
                             elif type_option == 'project':
                                 if select is None:
                                     return 'ERROR:: "select" must not be "None"'
                                 else:
                                     if 'delete' == select:
                                         return getattr(self.ckan.action, 'organization_delete')(**kwargs)
                                     elif 'purge' == select:
                                         return getattr(self.ckan.action, 'organization_purge')(**kwargs)
                                     else:
                                         return 'ERROR:: "select = %s" is not accepted' % (select)
                             elif type_option == 'resource':
                                 if select is None:
                                     return 'ERROR:: "select" must not be "None"'
                                 else:
                                     return resource.resource_delete(self, select, **kwargs)
                             elif type_option == 'vocabulary':
                                 return getattr(self.ckan.action, 'vocabulary_delete')(**kwargs)
                             elif type_option == 'tag':
                                 return getattr(self.ckan.action, 'tag_delete')(**kwargs)
                             elif type_option == 'user':
                                 return getattr(self.ckan.action, 'user_delete')(**kwargs)
                             else:
                                 return 'ERROR:: "type_option = %s" is not accepted' % (type_option)
                         except:
                             _, exc_value, _ = sys.exc_info()
                             return exc_value
                     else:
                         return 'ERROR:: "type_option" must be <str>'
                 def download(self, id, processes=1, path=os.path.expanduser("~"), **kwargs):
                     '''
                     FINALIDAD:
                         Funcion personalizada avanzada para la descarga de archivos existentes de un(os) dataset(s).
                     PARAMETROS DISPONIBLES:
                         CONSULTAR: "GUIA DE SCRIPT.pdf"
                     ESTRUCTURA:
                         <access_name>.download(id = <class 'str' or 'list'>, param_1 = <class 'param_1'>, ...)
                     '''
                     #------------------ PATH ----------------------#
                     if isinstance(path, str):
                         if os.path.isdir(path):
                             if not path.endswith(os.sep):
                                 path = path + os.sep
                             test_txt = path + datetime.now().strftime("%Y_%m_%d_%H_%M_%S_%f")+'.txt'
                             try:
                                 file_txt = open(test_txt, 'w')
                                 file_txt.close()
                                 os.remove(test_txt)
                             except:
                                 return 'ERROR:: Access denied, you are not authorized to write files: "%s"' % (path)
                         else:
                             return 'ERROR:: "path" does not exist'
                     else:
                         return 'ERROR:: "path" must be: <class "str">'
                     #------------------ PROCESSES -----------------#
                     if not isinstance(processes, int):
                         return 'ERROR:: "processes" must be: <class "int">'
                     #------------------ ID OR NAME ----------------#
                     if isinstance(id, str):
                         id = [id]
                     elif isinstance(id, list):
                         id = list(map(str, id))
                     else:
                         return 'ERROR:: dataset "id" must be: <class "str" or "list">'
                     #----------------------------------------------#
                     arguments = {
                             '--apikey': self.Authorization,
                             '--ckan-user': None,
                             '--config': None,
                             '--datapackages': path,
                             '--datastore-fields': False,
                             '--get-request': False,
                             '--insecure': not self.verify,
                             '--processes': str(processes),
                             '--quiet': False,
                             '--remote': self.url,
                             '--worker': False,
                             #'--log': 'log.txt',
                             #'--all': False,
                             #'--gzip': False,
                             #'--output': None,
                             #'--max-records': None,
                             #'--output-json': False,
                             #'--output-jsonl': False,
                             #'--create-only': False,
                             #'--help': False,
                             #'--input': None,
                             #'--input-json': False,
                             #'--start-record': '1',
                             #'--update-only': False,
                             #'--upload-logo': False,
                             #'--upload-resources': False,
                             #'--version': False,
                             'ID_OR_NAME': id,
                             'datasets': True,
                             'dump': True,
                             #'ACTION_NAME': None,
                             #'KEY:JSON': [],
                             #'KEY=STRING': [],
                             #'KEY@FILE': [],
                             #'action': False,
                             #'delete': False,
                             #'groups': False,
                             #'load': False,
                             #'organizations': False,
                             #'related': False,
                             #'search': False,
                             #'users': False
                             }
-                    return download.dump_things_change(self.ckan, 'datasets', arguments, **kwargs)
  No newline at end of file
+                    return download.dump_things_change(self.ckan, 'datasets', arguments, self.ua, **kwargs)
  No newline at end of file

jrodb/download.py +6 -6

             #from ckanapi.datapackage import populate_schema_from_datastore
             from ckanapi.cli import workers, dump
             from ckanapi.cli.utils import pretty_json, completion_stats, compact_json, quiet_int_pipe
             from datetime import datetime
             from tqdm import tqdm
             import sys
             import json
             import os
             import requests
             import six
             if sys.version_info.major == 3:
                 from urllib.parse import urlparse
             else:
                 import urlparse
             DL_CHUNK_SIZE = 100 * 1024
-            def dump_things_change(ckan, thing, arguments, worker_pool=None, stdout=None, stderr=None, **kwargs):
+            def dump_things_change(ckan, thing, arguments, ua, worker_pool=None, stdout=None, stderr=None, **kwargs):
                 if worker_pool is None:
                     worker_pool = workers.worker_pool
                 if stdout is None:
                     stdout = getattr(sys.__stdout__, 'buffer', sys.__stdout__)
                 if stderr is None:
                     stderr = getattr(sys.stderr, 'buffer', sys.stderr)
                 if arguments['--worker']:
                     return dump.dump_things_worker(ckan, thing, arguments)
                 '''
                 log = None
                 if arguments['--log']:
                     log = open(arguments['--log'], 'a')
                 '''
                 jsonl_output = stdout
                 if arguments['--datapackages']:
                     jsonl_output = open(os.devnull, 'wb')
                 names = arguments['ID_OR_NAME']
                 if names and isinstance(names[0], dict):
                     names = [rec.get('name',rec.get('id')) for rec in names]
                 '''
                 if arguments['--datapackages']:
                     arguments['--datastore-fields'] = True
                 '''
                 #----------------------------#
                 filtered_urls = {}
                 for val in names:
                     try:
                         filtered_urls[val] = getattr(ckan.action, 'url_resources')(id=val, **kwargs)
                     except:
                         _, exc_value, _ = sys.exc_info()
                         return exc_value
                 #----------------------------#
                 cmd = dump._worker_command_line(thing, arguments)
                 processes = int(arguments['--processes'])
                 if hasattr(ckan, 'parallel_limit'):
                     processes = min(processes, ckan.parallel_limit)
                 stats = completion_stats(processes)
                 pool = worker_pool(cmd, processes, enumerate(compact_json(n) + b'\n' for n in names))
                 results = {}
                 expecting_number = 0
                 with quiet_int_pipe() as errors:
                     for job_ids, finished, result in pool:
                         if not result:
                             return 1
                         timestamp, error, record = json.loads(result.decode('utf-8'))
                         results[finished] = record
                         #----------------------------------------#
                         datapackages_path = arguments['--datapackages']
                         datapackage_dir = name_no_repetition(record.get('name', ''), datapackages_path)
                         #----------------------------------------#
                         if not arguments['--quiet']:
                             stderr.write('** Finished: {0} | Job IDs: {1} | Next Report: {2} | Error: {3} | Path: {4} | Dataset Name: {5}\n'.format(
                                 finished,
                                 job_ids,
                                 next(stats),
                                 error,
                                 datapackage_dir,
                                 record.get('name', '') if record else '',
                                 ).encode('utf-8'))
                         '''
                         if log:
                             log.write(compact_json([
                                 timestamp,
                                 finished,
                                 error,
                                 record.get('name', '') if record else None,
                                 ]) + b'\n')
                         '''
                         if datapackages_path:
                             try:
                                 filter_url = filtered_urls[record.get('name', '')]
                             except:
                                 filter_url = filtered_urls[record.get('id', '')]
-                            create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'])
+                            create_datapackage_change(record, filter_url, datapackage_dir, stderr, arguments['--apikey'], arguments['--remote'], arguments['--insecure'], ua)
                         while expecting_number in results:
                             record = results.pop(expecting_number)
                             if record:
                                 jsonl_output.write(compact_json(record, sort_keys=True) + b'\n')
                             expecting_number += 1
                 if 'pipe' in errors:
                     return 1
                 if 'interrupt' in errors:
                     return 2
-            def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure):
+            def create_datapackage_change(record, filtered_url, datapackage_dir, stderr, apikey, host_url, insecure, ua):
                 resource_formats_to_ignore = ['API', 'api']
                 os.makedirs(os.path.join(datapackage_dir, 'data'))
                 record['path'] = datapackage_dir
                 ckan_resources = []
                 for resource in tqdm(record.get('resources', []), unit_scale=True):
                 #for resource in record.get('resources', []):
                     if resource['format'] in resource_formats_to_ignore:
                         continue
                     if not {'name': resource['name'], 'url': resource['url']} in filtered_url:
                         continue
                     if len(resource['url']) == 0:
                         continue
                     filename = name_no_repetition(resource['name'], os.path.join(datapackage_dir, 'data'), 'resource')
                     resource['path'] = os.path.join(datapackage_dir, 'data', filename)
-                    cres = create_resource_change(resource, stderr, apikey, host_url, insecure)
+                    cres = create_resource_change(resource, stderr, apikey, host_url, insecure, ua)
                     if not cres:
                         continue
                     '''
                     #----------------------------------------#
                     dres = {'path': os.path.join('data', filename),
                             'description': cres.get('description', ''),
                             'format': cres.get('format', ''),
                             'name': cres.get('name', ''),
                             'title': cres.get('name', '').title()}
                     #----------------------------------------#
                     populate_schema_from_datastore(cres, dres)
                     '''
                     ckan_resources.append(resource)
                 dataset = dict(record, resources=ckan_resources)
                 datapackage = dataset_to_datapackage_change(dataset)
                 json_path = os.path.join(datapackage_dir, 'datapackage.json')
                 with open(json_path, 'wb') as out:
                     out.write(pretty_json(datapackage))
                 return datapackage_dir, datapackage, json_path
-            def create_resource_change(resource, stderr, apikey, host_url, insecure):
+            def create_resource_change(resource, stderr, apikey, host_url, insecure, ua):
                 # ---------- REPLACE URL --------- #
                 if urlparse(host_url).netloc != 'www.igp.gob.pe' and urlparse(resource['url']).netloc == 'www.igp.gob.pe':
                     resource['url'] = resource['url'].replace(urlparse(resource['url']).scheme + '://' + urlparse(resource['url']).netloc,
                                                             urlparse(host_url).scheme + '://' + urlparse(host_url).netloc)
                 #----------------------------------#
                 try:
-                    r = requests.get(resource['url'], headers={'Authorization': apikey}, stream=True, verify=not insecure)
+                    r = requests.get(resource['url'], headers={'Authorization': apikey, 'User-Agent': ua}, stream=True, verify=not insecure)
                     #---------------------------------------#
                     try:
                         r.raise_for_status()
                     except requests.exceptions.HTTPError as e:
                         return False
                     #---------------------------------------#
                     with open(resource['path'], 'wb') as f:
                         for chunk in r.iter_content(chunk_size=DL_CHUNK_SIZE):
                             if chunk:
                                 f.write(chunk)
                 except requests.ConnectionError:
                     stderr.write('URL {0} refused connection. The resource will not be downloaded\n'.format(resource['url']).encode('utf-8'))
                 except requests.exceptions.RequestException as e:
                     stderr.write('{0}\n'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
                 except Exception as e:
                     stderr.write('{0}'.format(str(e.args[0]) if len(e.args) > 0 else '').encode('utf-8'))
                 return resource
             def dataset_to_datapackage_change(dataset_dict):
                 dp = {'name': dataset_dict['name'],
                       'id': dataset_dict['id'],
                       'path': dataset_dict['path'],
                       'last_update': datetime.strptime(dataset_dict['metadata_modified'], "%Y-%m-%dT%H:%M:%S.%f").strftime("%d-%b-%Y %I.%M %p")}
                 resources = dataset_dict.get('resources')
                 if resources:
                     dp['resources'] = [convert_to_datapackage_resource_change(r)
                                        for r in resources]
                 return dp
             def convert_to_datapackage_resource_change(resource_dict):
                 resource = {}
                 if resource_dict.get('id'):
                     resource['id'] = resource_dict['id']
                 if resource_dict.get('name'):
                     resource['name'] = resource_dict['name']
                 if resource_dict.get('path'):
                     if os.path.isfile(resource_dict['path']):
                         resource['path'] = resource_dict['path']
                     else:
                         resource['url'] = resource_dict['url']
                 schema = resource_dict.get('schema')
                 if isinstance(schema, six.string_types):
                     try:
                         resource['schema'] = json.loads(schema)
                     except ValueError:
                         resource['schema'] = schema
                 elif isinstance(schema, dict):
                     resource['schema'] = schema
                 return resource
             def name_no_repetition(name, dir, option=''):
                 count = 0
                 while True:
                     count = count + 1
                     if not os.path.exists(os.path.join(dir, name)):
                         if option == 'resource':
                             return name
                         else:
                             return os.path.join(dir, name)
                     elif not os.path.exists(os.path.join(dir, '('+str(count)+')'+name)):
                         if option == 'resource':
                             return '('+str(count)+')'+name
                         else:
                             return os.path.join(dir, '('+str(count)+')'+name)
                     else:
                         pass

setup.py +1 -1

             # encoding: utf-8
             from setuptools import setup
             setup(
                 name = "jrodb",
-                version = "2.9.2.0",
+                version = "2.9.2.1",
                 description = "Data Repository - JRO",
                 author = "Edson Ynilupu Mattos",
                 author_email = "eynilupu@igp.gob.pe",
                 url = "http://intranet.igp.gob.pe:8082/DATABASES/ckanext-jro/api-cliente",
                 packages = ["jrodb"],
                 install_requires = [
                     "ckanapi==4.7",
                     "requests",
                     "tqdm"
                     ],
             )

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages