diff --git a/schainpy/model/io/jroIO_param.py b/schainpy/model/io/jroIO_param.py index b6a24eb..be7595a 100644 --- a/schainpy/model/io/jroIO_param.py +++ b/schainpy/model/io/jroIO_param.py @@ -17,7 +17,7 @@ class HDFReader(Reader, ProcessingUnit): This unit reads HDF5 files created with `HDFWriter` operation contains by default two groups Data and Metadata all variables would be saved as `dataOut` - attributes. + attributes. It is possible to read any HDF5 file by given the structure in the `description` parameter, also you can add extra values to metadata with the parameter `extras`. @@ -46,7 +46,7 @@ class HDFReader(Reader, ProcessingUnit): Examples -------- - + desc = { 'Data': { 'data_output': ['u', 'v', 'w'], @@ -70,7 +70,6 @@ class HDFReader(Reader, ProcessingUnit): extras = { 'timeZone': 300 } - reader = project.addReadUnit( name='HDFReader', @@ -104,44 +103,45 @@ class HDFReader(Reader, ProcessingUnit): self.utcoffset = 0 def setup(self, **kwargs): + self.set_kwargs(**kwargs) if not self.ext.startswith('.'): - self.ext = '.{}'.format(self.ext) + self.ext = '.{}'.format(self.ext) if self.online: log.log("Searching files in online mode...", self.name) for nTries in range(self.nTries): fullpath = self.searchFilesOnLine(self.path, self.startDate, - self.endDate, self.expLabel, self.ext, self.walk, + self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt) try: fullpath = next(fullpath) except: fullpath = None - + if fullpath: break log.warning( 'Waiting {} sec for a valid file in {}: try {} ...'.format( - self.delay, self.path, nTries + 1), + self.delay, self.path, nTries + 1), self.name) time.sleep(self.delay) if not(fullpath): raise schainpy.admin.SchainError( - 'There isn\'t any valid file in {}'.format(self.path)) + 'There isn\'t any valid file in {}'.format(self.path)) pathname, filename = os.path.split(fullpath) self.year = int(filename[1:5]) self.doy = int(filename[5:8]) - self.set = int(filename[8:11]) - 1 + self.set = int(filename[8:11]) - 1 else: log.log("Searching files in {}".format(self.path), self.name) - self.filenameList = self.searchFilesOffLine(self.path, self.startDate, + self.filenameList = self.searchFilesOffLine(self.path, self.startDate, self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt) - + self.setNextFile() return @@ -149,18 +149,18 @@ class HDFReader(Reader, ProcessingUnit): def readFirstHeader(self): '''Read metadata and data''' - self.__readMetadata() + self.__readMetadata() self.__readData() self.__setBlockList() - + if 'type' in self.meta: self.dataOut = eval(self.meta['type'])() - + for attr in self.meta: setattr(self.dataOut, attr, self.meta[attr]) - + self.blockIndex = 0 - + return def __setBlockList(self): @@ -178,7 +178,6 @@ class HDFReader(Reader, ProcessingUnit): startTime = self.startTime endTime = self.endTime thisUtcTime = self.data['utctime'] + self.utcoffset - self.interval = numpy.min(thisUtcTime[1:] - thisUtcTime[:-1]) thisDatetime = datetime.datetime.utcfromtimestamp(thisUtcTime[0]) @@ -224,7 +223,7 @@ class HDFReader(Reader, ProcessingUnit): def __readData(self): data = {} - + if self.description: for key, value in self.description['Data'].items(): if isinstance(value, str): @@ -252,7 +251,7 @@ class HDFReader(Reader, ProcessingUnit): array = numpy.array(array) else: log.warning('Unknown type: {}'.format(name)) - + if name in self.description: key = self.description[name] else: @@ -261,7 +260,7 @@ class HDFReader(Reader, ProcessingUnit): self.data = data return - + def getData(self): for attr in self.data: @@ -300,8 +299,8 @@ class HDFWriter(Operation): The HDF5 file contains by default two groups Data and Metadata where you can save any `dataOut` attribute specified by `dataList` and `metadataList` parameters, data attributes are normaly time dependent where the metadata - are not. - It is possible to customize the structure of the HDF5 file with the + are not. + It is possible to customize the structure of the HDF5 file with the optional description parameter see the examples. Parameters: @@ -318,10 +317,10 @@ class HDFWriter(Operation): If True the name of the files corresponds to the timestamp of the data description : dict, optional Dictionary with the desired description of the HDF5 file - + Examples -------- - + desc = { 'data_output': {'winds': ['z', 'w', 'v']}, 'utctime': 'timestamps', @@ -341,7 +340,7 @@ class HDFWriter(Operation): 'heightList': 'heights' } } - + writer = proc_unit.addOperation(name='HDFWriter') writer.addParameter(name='path', value='/path/to/file') writer.addParameter(name='blocksPerFile', value='32') @@ -369,17 +368,28 @@ class HDFWriter(Operation): lastTime = None def __init__(self): - + Operation.__init__(self) return - def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None): + def set_kwargs(self, **kwargs): + + for key, value in kwargs.items(): + setattr(self, key, value) + + def set_kwargs_obj(self, obj, **kwargs): + + for key, value in kwargs.items(): + setattr(obj, key, value) + + def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None, **kwargs): self.path = path self.blocksPerFile = blocksPerFile self.metadataList = metadataList self.dataList = [s.strip() for s in dataList] self.setType = setType self.description = description + self.set_kwargs(**kwargs) if self.metadataList is None: self.metadataList = self.dataOut.metadata_list @@ -405,7 +415,7 @@ class HDFWriter(Operation): dsDict['shape'] = dataAux.shape dsDict['dsNumber'] = dataAux.shape[0] dsDict['dtype'] = dataAux.dtype - + dsList.append(dsDict) self.dsList = dsList @@ -420,7 +430,7 @@ class HDFWriter(Operation): self.lastTime = currentTime self.currentDay = dataDay return False - + timeDiff = currentTime - self.lastTime #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora @@ -435,22 +445,23 @@ class HDFWriter(Operation): return False def run(self, dataOut, path, blocksPerFile=10, metadataList=None, - dataList=[], setType=None, description={}): + dataList=[], setType=None, description={}, **kwargs): self.dataOut = dataOut + self.set_kwargs_obj(self.dataOut, **kwargs) if not(self.isConfig): - self.setup(path=path, blocksPerFile=blocksPerFile, + self.setup(path=path, blocksPerFile=blocksPerFile, metadataList=metadataList, dataList=dataList, - setType=setType, description=description) + setType=setType, description=description, **kwargs) self.isConfig = True self.setNextFile() self.putData() return - + def setNextFile(self): - + ext = self.ext path = self.path setFile = self.setFile @@ -462,7 +473,7 @@ class HDFWriter(Operation): if os.path.exists(fullpath): filesList = os.listdir(fullpath) filesList = [k for k in filesList if k.startswith(self.optchar)] - if len( filesList ) > 0: + if len(filesList) > 0: filesList = sorted(filesList, key=str.lower) filen = filesList[-1] # el filename debera tener el siguiente formato @@ -484,16 +495,16 @@ class HDFWriter(Operation): timeTuple.tm_year, timeTuple.tm_yday, setFile, - ext ) + ext) else: setFile = timeTuple.tm_hour*60+timeTuple.tm_min file = '%s%4.4d%3.3d%04d%s' % (self.optchar, timeTuple.tm_year, timeTuple.tm_yday, setFile, - ext ) + ext) - self.filename = os.path.join( path, subfolder, file ) + self.filename = os.path.join(path, subfolder, file) #Setting HDF5 File self.fp = h5py.File(self.filename, 'w') @@ -535,7 +546,7 @@ class HDFWriter(Operation): return 'pair{:02d}'.format(x) else: return 'channel{:02d}'.format(x) - + def writeMetadata(self, fp): if self.description: @@ -560,7 +571,7 @@ class HDFWriter(Operation): return def writeData(self, fp): - + if self.description: if 'Data' in self.description: grp = fp.create_group('Data') @@ -571,13 +582,13 @@ class HDFWriter(Operation): dtsets = [] data = [] - + for dsInfo in self.dsList: if dsInfo['nDim'] == 0: ds = grp.create_dataset( - self.getLabel(dsInfo['variable']), - (self.blocksPerFile, ), - chunks=True, + self.getLabel(dsInfo['variable']), + (self.blocksPerFile,), + chunks=True, dtype=numpy.float64) dtsets.append(ds) data.append((dsInfo['variable'], -1)) @@ -589,8 +600,8 @@ class HDFWriter(Operation): sgrp = grp for i in range(dsInfo['dsNumber']): ds = sgrp.create_dataset( - self.getLabel(dsInfo['variable'], i), - (self.blocksPerFile, ) + dsInfo['shape'][1:], + self.getLabel(dsInfo['variable'], i), + (self.blocksPerFile,) + dsInfo['shape'][1:], chunks=True, dtype=dsInfo['dtype']) dtsets.append(ds) @@ -598,7 +609,7 @@ class HDFWriter(Operation): fp.flush() log.log('Creating file: {}'.format(fp.filename), self.name) - + self.ds = dtsets self.data = data self.firsttime = True