##// END OF EJS Templates
update jroIO_param class HDFWriter
Alexander Valdez -
r1701:dc9cba07d600
parent child
Show More
@@ -17,7 +17,7 class HDFReader(Reader, ProcessingUnit):
17
17
18 This unit reads HDF5 files created with `HDFWriter` operation contains
18 This unit reads HDF5 files created with `HDFWriter` operation contains
19 by default two groups Data and Metadata all variables would be saved as `dataOut`
19 by default two groups Data and Metadata all variables would be saved as `dataOut`
20 attributes.
20 attributes.
21 It is possible to read any HDF5 file by given the structure in the `description`
21 It is possible to read any HDF5 file by given the structure in the `description`
22 parameter, also you can add extra values to metadata with the parameter `extras`.
22 parameter, also you can add extra values to metadata with the parameter `extras`.
23
23
@@ -46,7 +46,7 class HDFReader(Reader, ProcessingUnit):
46
46
47 Examples
47 Examples
48 --------
48 --------
49
49
50 desc = {
50 desc = {
51 'Data': {
51 'Data': {
52 'data_output': ['u', 'v', 'w'],
52 'data_output': ['u', 'v', 'w'],
@@ -70,7 +70,6 class HDFReader(Reader, ProcessingUnit):
70 extras = {
70 extras = {
71 'timeZone': 300
71 'timeZone': 300
72 }
72 }
73
74
73
75 reader = project.addReadUnit(
74 reader = project.addReadUnit(
76 name='HDFReader',
75 name='HDFReader',
@@ -104,44 +103,45 class HDFReader(Reader, ProcessingUnit):
104 self.utcoffset = 0
103 self.utcoffset = 0
105
104
106 def setup(self, **kwargs):
105 def setup(self, **kwargs):
106
107 self.set_kwargs(**kwargs)
107 self.set_kwargs(**kwargs)
108 if not self.ext.startswith('.'):
108 if not self.ext.startswith('.'):
109 self.ext = '.{}'.format(self.ext)
109 self.ext = '.{}'.format(self.ext)
110
110
111 if self.online:
111 if self.online:
112 log.log("Searching files in online mode...", self.name)
112 log.log("Searching files in online mode...", self.name)
113
113
114 for nTries in range(self.nTries):
114 for nTries in range(self.nTries):
115 fullpath = self.searchFilesOnLine(self.path, self.startDate,
115 fullpath = self.searchFilesOnLine(self.path, self.startDate,
116 self.endDate, self.expLabel, self.ext, self.walk,
116 self.endDate, self.expLabel, self.ext, self.walk,
117 self.filefmt, self.folderfmt)
117 self.filefmt, self.folderfmt)
118 try:
118 try:
119 fullpath = next(fullpath)
119 fullpath = next(fullpath)
120 except:
120 except:
121 fullpath = None
121 fullpath = None
122
122
123 if fullpath:
123 if fullpath:
124 break
124 break
125
125
126 log.warning(
126 log.warning(
127 'Waiting {} sec for a valid file in {}: try {} ...'.format(
127 'Waiting {} sec for a valid file in {}: try {} ...'.format(
128 self.delay, self.path, nTries + 1),
128 self.delay, self.path, nTries + 1),
129 self.name)
129 self.name)
130 time.sleep(self.delay)
130 time.sleep(self.delay)
131
131
132 if not(fullpath):
132 if not(fullpath):
133 raise schainpy.admin.SchainError(
133 raise schainpy.admin.SchainError(
134 'There isn\'t any valid file in {}'.format(self.path))
134 'There isn\'t any valid file in {}'.format(self.path))
135
135
136 pathname, filename = os.path.split(fullpath)
136 pathname, filename = os.path.split(fullpath)
137 self.year = int(filename[1:5])
137 self.year = int(filename[1:5])
138 self.doy = int(filename[5:8])
138 self.doy = int(filename[5:8])
139 self.set = int(filename[8:11]) - 1
139 self.set = int(filename[8:11]) - 1
140 else:
140 else:
141 log.log("Searching files in {}".format(self.path), self.name)
141 log.log("Searching files in {}".format(self.path), self.name)
142 self.filenameList = self.searchFilesOffLine(self.path, self.startDate,
142 self.filenameList = self.searchFilesOffLine(self.path, self.startDate,
143 self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt)
143 self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt)
144
144
145 self.setNextFile()
145 self.setNextFile()
146
146
147 return
147 return
@@ -149,18 +149,18 class HDFReader(Reader, ProcessingUnit):
149 def readFirstHeader(self):
149 def readFirstHeader(self):
150 '''Read metadata and data'''
150 '''Read metadata and data'''
151
151
152 self.__readMetadata()
152 self.__readMetadata()
153 self.__readData()
153 self.__readData()
154 self.__setBlockList()
154 self.__setBlockList()
155
155
156 if 'type' in self.meta:
156 if 'type' in self.meta:
157 self.dataOut = eval(self.meta['type'])()
157 self.dataOut = eval(self.meta['type'])()
158
158
159 for attr in self.meta:
159 for attr in self.meta:
160 setattr(self.dataOut, attr, self.meta[attr])
160 setattr(self.dataOut, attr, self.meta[attr])
161
161
162 self.blockIndex = 0
162 self.blockIndex = 0
163
163
164 return
164 return
165
165
166 def __setBlockList(self):
166 def __setBlockList(self):
@@ -178,7 +178,6 class HDFReader(Reader, ProcessingUnit):
178 startTime = self.startTime
178 startTime = self.startTime
179 endTime = self.endTime
179 endTime = self.endTime
180 thisUtcTime = self.data['utctime'] + self.utcoffset
180 thisUtcTime = self.data['utctime'] + self.utcoffset
181
182 self.interval = numpy.min(thisUtcTime[1:] - thisUtcTime[:-1])
181 self.interval = numpy.min(thisUtcTime[1:] - thisUtcTime[:-1])
183 thisDatetime = datetime.datetime.utcfromtimestamp(thisUtcTime[0])
182 thisDatetime = datetime.datetime.utcfromtimestamp(thisUtcTime[0])
184
183
@@ -224,7 +223,7 class HDFReader(Reader, ProcessingUnit):
224 def __readData(self):
223 def __readData(self):
225
224
226 data = {}
225 data = {}
227
226
228 if self.description:
227 if self.description:
229 for key, value in self.description['Data'].items():
228 for key, value in self.description['Data'].items():
230 if isinstance(value, str):
229 if isinstance(value, str):
@@ -252,7 +251,7 class HDFReader(Reader, ProcessingUnit):
252 array = numpy.array(array)
251 array = numpy.array(array)
253 else:
252 else:
254 log.warning('Unknown type: {}'.format(name))
253 log.warning('Unknown type: {}'.format(name))
255
254
256 if name in self.description:
255 if name in self.description:
257 key = self.description[name]
256 key = self.description[name]
258 else:
257 else:
@@ -261,7 +260,7 class HDFReader(Reader, ProcessingUnit):
261
260
262 self.data = data
261 self.data = data
263 return
262 return
264
263
265 def getData(self):
264 def getData(self):
266
265
267 for attr in self.data:
266 for attr in self.data:
@@ -300,8 +299,8 class HDFWriter(Operation):
300 The HDF5 file contains by default two groups Data and Metadata where
299 The HDF5 file contains by default two groups Data and Metadata where
301 you can save any `dataOut` attribute specified by `dataList` and `metadataList`
300 you can save any `dataOut` attribute specified by `dataList` and `metadataList`
302 parameters, data attributes are normaly time dependent where the metadata
301 parameters, data attributes are normaly time dependent where the metadata
303 are not.
302 are not.
304 It is possible to customize the structure of the HDF5 file with the
303 It is possible to customize the structure of the HDF5 file with the
305 optional description parameter see the examples.
304 optional description parameter see the examples.
306
305
307 Parameters:
306 Parameters:
@@ -318,10 +317,10 class HDFWriter(Operation):
318 If True the name of the files corresponds to the timestamp of the data
317 If True the name of the files corresponds to the timestamp of the data
319 description : dict, optional
318 description : dict, optional
320 Dictionary with the desired description of the HDF5 file
319 Dictionary with the desired description of the HDF5 file
321
320
322 Examples
321 Examples
323 --------
322 --------
324
323
325 desc = {
324 desc = {
326 'data_output': {'winds': ['z', 'w', 'v']},
325 'data_output': {'winds': ['z', 'w', 'v']},
327 'utctime': 'timestamps',
326 'utctime': 'timestamps',
@@ -341,7 +340,7 class HDFWriter(Operation):
341 'heightList': 'heights'
340 'heightList': 'heights'
342 }
341 }
343 }
342 }
344
343
345 writer = proc_unit.addOperation(name='HDFWriter')
344 writer = proc_unit.addOperation(name='HDFWriter')
346 writer.addParameter(name='path', value='/path/to/file')
345 writer.addParameter(name='path', value='/path/to/file')
347 writer.addParameter(name='blocksPerFile', value='32')
346 writer.addParameter(name='blocksPerFile', value='32')
@@ -369,17 +368,28 class HDFWriter(Operation):
369 lastTime = None
368 lastTime = None
370
369
371 def __init__(self):
370 def __init__(self):
372
371
373 Operation.__init__(self)
372 Operation.__init__(self)
374 return
373 return
375
374
376 def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None):
375 def set_kwargs(self, **kwargs):
376
377 for key, value in kwargs.items():
378 setattr(self, key, value)
379
380 def set_kwargs_obj(self, obj, **kwargs):
381
382 for key, value in kwargs.items():
383 setattr(obj, key, value)
384
385 def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None, **kwargs):
377 self.path = path
386 self.path = path
378 self.blocksPerFile = blocksPerFile
387 self.blocksPerFile = blocksPerFile
379 self.metadataList = metadataList
388 self.metadataList = metadataList
380 self.dataList = [s.strip() for s in dataList]
389 self.dataList = [s.strip() for s in dataList]
381 self.setType = setType
390 self.setType = setType
382 self.description = description
391 self.description = description
392 self.set_kwargs(**kwargs)
383
393
384 if self.metadataList is None:
394 if self.metadataList is None:
385 self.metadataList = self.dataOut.metadata_list
395 self.metadataList = self.dataOut.metadata_list
@@ -405,7 +415,7 class HDFWriter(Operation):
405 dsDict['shape'] = dataAux.shape
415 dsDict['shape'] = dataAux.shape
406 dsDict['dsNumber'] = dataAux.shape[0]
416 dsDict['dsNumber'] = dataAux.shape[0]
407 dsDict['dtype'] = dataAux.dtype
417 dsDict['dtype'] = dataAux.dtype
408
418
409 dsList.append(dsDict)
419 dsList.append(dsDict)
410
420
411 self.dsList = dsList
421 self.dsList = dsList
@@ -420,7 +430,7 class HDFWriter(Operation):
420 self.lastTime = currentTime
430 self.lastTime = currentTime
421 self.currentDay = dataDay
431 self.currentDay = dataDay
422 return False
432 return False
423
433
424 timeDiff = currentTime - self.lastTime
434 timeDiff = currentTime - self.lastTime
425
435
426 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
436 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
@@ -435,22 +445,23 class HDFWriter(Operation):
435 return False
445 return False
436
446
437 def run(self, dataOut, path, blocksPerFile=10, metadataList=None,
447 def run(self, dataOut, path, blocksPerFile=10, metadataList=None,
438 dataList=[], setType=None, description={}):
448 dataList=[], setType=None, description={}, **kwargs):
439
449
440 self.dataOut = dataOut
450 self.dataOut = dataOut
451 self.set_kwargs_obj(self.dataOut, **kwargs)
441 if not(self.isConfig):
452 if not(self.isConfig):
442 self.setup(path=path, blocksPerFile=blocksPerFile,
453 self.setup(path=path, blocksPerFile=blocksPerFile,
443 metadataList=metadataList, dataList=dataList,
454 metadataList=metadataList, dataList=dataList,
444 setType=setType, description=description)
455 setType=setType, description=description, **kwargs)
445
456
446 self.isConfig = True
457 self.isConfig = True
447 self.setNextFile()
458 self.setNextFile()
448
459
449 self.putData()
460 self.putData()
450 return
461 return
451
462
452 def setNextFile(self):
463 def setNextFile(self):
453
464
454 ext = self.ext
465 ext = self.ext
455 path = self.path
466 path = self.path
456 setFile = self.setFile
467 setFile = self.setFile
@@ -462,7 +473,7 class HDFWriter(Operation):
462 if os.path.exists(fullpath):
473 if os.path.exists(fullpath):
463 filesList = os.listdir(fullpath)
474 filesList = os.listdir(fullpath)
464 filesList = [k for k in filesList if k.startswith(self.optchar)]
475 filesList = [k for k in filesList if k.startswith(self.optchar)]
465 if len( filesList ) > 0:
476 if len(filesList) > 0:
466 filesList = sorted(filesList, key=str.lower)
477 filesList = sorted(filesList, key=str.lower)
467 filen = filesList[-1]
478 filen = filesList[-1]
468 # el filename debera tener el siguiente formato
479 # el filename debera tener el siguiente formato
@@ -484,16 +495,16 class HDFWriter(Operation):
484 timeTuple.tm_year,
495 timeTuple.tm_year,
485 timeTuple.tm_yday,
496 timeTuple.tm_yday,
486 setFile,
497 setFile,
487 ext )
498 ext)
488 else:
499 else:
489 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
500 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
490 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
501 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
491 timeTuple.tm_year,
502 timeTuple.tm_year,
492 timeTuple.tm_yday,
503 timeTuple.tm_yday,
493 setFile,
504 setFile,
494 ext )
505 ext)
495
506
496 self.filename = os.path.join( path, subfolder, file )
507 self.filename = os.path.join(path, subfolder, file)
497
508
498 #Setting HDF5 File
509 #Setting HDF5 File
499 self.fp = h5py.File(self.filename, 'w')
510 self.fp = h5py.File(self.filename, 'w')
@@ -535,7 +546,7 class HDFWriter(Operation):
535 return 'pair{:02d}'.format(x)
546 return 'pair{:02d}'.format(x)
536 else:
547 else:
537 return 'channel{:02d}'.format(x)
548 return 'channel{:02d}'.format(x)
538
549
539 def writeMetadata(self, fp):
550 def writeMetadata(self, fp):
540
551
541 if self.description:
552 if self.description:
@@ -560,7 +571,7 class HDFWriter(Operation):
560 return
571 return
561
572
562 def writeData(self, fp):
573 def writeData(self, fp):
563
574
564 if self.description:
575 if self.description:
565 if 'Data' in self.description:
576 if 'Data' in self.description:
566 grp = fp.create_group('Data')
577 grp = fp.create_group('Data')
@@ -571,13 +582,13 class HDFWriter(Operation):
571
582
572 dtsets = []
583 dtsets = []
573 data = []
584 data = []
574
585
575 for dsInfo in self.dsList:
586 for dsInfo in self.dsList:
576 if dsInfo['nDim'] == 0:
587 if dsInfo['nDim'] == 0:
577 ds = grp.create_dataset(
588 ds = grp.create_dataset(
578 self.getLabel(dsInfo['variable']),
589 self.getLabel(dsInfo['variable']),
579 (self.blocksPerFile, ),
590 (self.blocksPerFile,),
580 chunks=True,
591 chunks=True,
581 dtype=numpy.float64)
592 dtype=numpy.float64)
582 dtsets.append(ds)
593 dtsets.append(ds)
583 data.append((dsInfo['variable'], -1))
594 data.append((dsInfo['variable'], -1))
@@ -589,8 +600,8 class HDFWriter(Operation):
589 sgrp = grp
600 sgrp = grp
590 for i in range(dsInfo['dsNumber']):
601 for i in range(dsInfo['dsNumber']):
591 ds = sgrp.create_dataset(
602 ds = sgrp.create_dataset(
592 self.getLabel(dsInfo['variable'], i),
603 self.getLabel(dsInfo['variable'], i),
593 (self.blocksPerFile, ) + dsInfo['shape'][1:],
604 (self.blocksPerFile,) + dsInfo['shape'][1:],
594 chunks=True,
605 chunks=True,
595 dtype=dsInfo['dtype'])
606 dtype=dsInfo['dtype'])
596 dtsets.append(ds)
607 dtsets.append(ds)
@@ -598,7 +609,7 class HDFWriter(Operation):
598 fp.flush()
609 fp.flush()
599
610
600 log.log('Creating file: {}'.format(fp.filename), self.name)
611 log.log('Creating file: {}'.format(fp.filename), self.name)
601
612
602 self.ds = dtsets
613 self.ds = dtsets
603 self.data = data
614 self.data = data
604 self.firsttime = True
615 self.firsttime = True
General Comments 0
You need to be logged in to leave comments. Login now