##// END OF EJS Templates
update jroIO_param class HDFWriter
Alexander Valdez -
r1701:dc9cba07d600
parent child
Show More
@@ -1,639 +1,650
1 import os
1 import os
2 import time
2 import time
3 import datetime
3 import datetime
4
4
5 import numpy
5 import numpy
6 import h5py
6 import h5py
7
7
8 import schainpy.admin
8 import schainpy.admin
9 from schainpy.model.data.jrodata import *
9 from schainpy.model.data.jrodata import *
10 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation, MPDecorator
10 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation, MPDecorator
11 from schainpy.model.io.jroIO_base import *
11 from schainpy.model.io.jroIO_base import *
12 from schainpy.utils import log
12 from schainpy.utils import log
13
13
14
14
15 class HDFReader(Reader, ProcessingUnit):
15 class HDFReader(Reader, ProcessingUnit):
16 """Processing unit to read HDF5 format files
16 """Processing unit to read HDF5 format files
17
17
18 This unit reads HDF5 files created with `HDFWriter` operation contains
18 This unit reads HDF5 files created with `HDFWriter` operation contains
19 by default two groups Data and Metadata all variables would be saved as `dataOut`
19 by default two groups Data and Metadata all variables would be saved as `dataOut`
20 attributes.
20 attributes.
21 It is possible to read any HDF5 file by given the structure in the `description`
21 It is possible to read any HDF5 file by given the structure in the `description`
22 parameter, also you can add extra values to metadata with the parameter `extras`.
22 parameter, also you can add extra values to metadata with the parameter `extras`.
23
23
24 Parameters:
24 Parameters:
25 -----------
25 -----------
26 path : str
26 path : str
27 Path where files are located.
27 Path where files are located.
28 startDate : date
28 startDate : date
29 Start date of the files
29 Start date of the files
30 endDate : list
30 endDate : list
31 End date of the files
31 End date of the files
32 startTime : time
32 startTime : time
33 Start time of the files
33 Start time of the files
34 endTime : time
34 endTime : time
35 End time of the files
35 End time of the files
36 description : dict, optional
36 description : dict, optional
37 Dictionary with the description of the HDF5 file
37 Dictionary with the description of the HDF5 file
38 extras : dict, optional
38 extras : dict, optional
39 Dictionary with extra metadata to be be added to `dataOut`
39 Dictionary with extra metadata to be be added to `dataOut`
40
40
41 Attention: Be carefull, add attribute utcoffset, in the last part of reader in order to work in Local Time without time problems.
41 Attention: Be carefull, add attribute utcoffset, in the last part of reader in order to work in Local Time without time problems.
42
42
43 -----------
43 -----------
44 utcoffset='-18000'
44 utcoffset='-18000'
45
45
46
46
47 Examples
47 Examples
48 --------
48 --------
49
49
50 desc = {
50 desc = {
51 'Data': {
51 'Data': {
52 'data_output': ['u', 'v', 'w'],
52 'data_output': ['u', 'v', 'w'],
53 'utctime': 'timestamps',
53 'utctime': 'timestamps',
54 } ,
54 } ,
55 'Metadata': {
55 'Metadata': {
56 'heightList': 'heights'
56 'heightList': 'heights'
57 }
57 }
58 }
58 }
59
59
60 desc = {
60 desc = {
61 'Data': {
61 'Data': {
62 'data_output': 'winds',
62 'data_output': 'winds',
63 'utctime': 'timestamps'
63 'utctime': 'timestamps'
64 },
64 },
65 'Metadata': {
65 'Metadata': {
66 'heightList': 'heights'
66 'heightList': 'heights'
67 }
67 }
68 }
68 }
69
69
70 extras = {
70 extras = {
71 'timeZone': 300
71 'timeZone': 300
72 }
72 }
73
74
73
75 reader = project.addReadUnit(
74 reader = project.addReadUnit(
76 name='HDFReader',
75 name='HDFReader',
77 path='/path/to/files',
76 path='/path/to/files',
78 startDate='2019/01/01',
77 startDate='2019/01/01',
79 endDate='2019/01/31',
78 endDate='2019/01/31',
80 startTime='00:00:00',
79 startTime='00:00:00',
81 endTime='23:59:59',
80 endTime='23:59:59',
82 utcoffset='-18000'
81 utcoffset='-18000'
83 # description=json.dumps(desc),
82 # description=json.dumps(desc),
84 # extras=json.dumps(extras),
83 # extras=json.dumps(extras),
85 )
84 )
86
85
87 """
86 """
88
87
89 __attrs__ = ['path', 'startDate', 'endDate', 'startTime', 'endTime', 'description', 'extras']
88 __attrs__ = ['path', 'startDate', 'endDate', 'startTime', 'endTime', 'description', 'extras']
90
89
91 def __init__(self):
90 def __init__(self):
92 ProcessingUnit.__init__(self)
91 ProcessingUnit.__init__(self)
93 self.dataOut = Parameters()
92 self.dataOut = Parameters()
94 self.ext = ".hdf5"
93 self.ext = ".hdf5"
95 self.optchar = "D"
94 self.optchar = "D"
96 self.meta = {}
95 self.meta = {}
97 self.data = {}
96 self.data = {}
98 self.open_file = h5py.File
97 self.open_file = h5py.File
99 self.open_mode = 'r'
98 self.open_mode = 'r'
100 self.description = {}
99 self.description = {}
101 self.extras = {}
100 self.extras = {}
102 self.filefmt = "*%Y%j***"
101 self.filefmt = "*%Y%j***"
103 self.folderfmt = "*%Y%j"
102 self.folderfmt = "*%Y%j"
104 self.utcoffset = 0
103 self.utcoffset = 0
105
104
106 def setup(self, **kwargs):
105 def setup(self, **kwargs):
106
107 self.set_kwargs(**kwargs)
107 self.set_kwargs(**kwargs)
108 if not self.ext.startswith('.'):
108 if not self.ext.startswith('.'):
109 self.ext = '.{}'.format(self.ext)
109 self.ext = '.{}'.format(self.ext)
110
110
111 if self.online:
111 if self.online:
112 log.log("Searching files in online mode...", self.name)
112 log.log("Searching files in online mode...", self.name)
113
113
114 for nTries in range(self.nTries):
114 for nTries in range(self.nTries):
115 fullpath = self.searchFilesOnLine(self.path, self.startDate,
115 fullpath = self.searchFilesOnLine(self.path, self.startDate,
116 self.endDate, self.expLabel, self.ext, self.walk,
116 self.endDate, self.expLabel, self.ext, self.walk,
117 self.filefmt, self.folderfmt)
117 self.filefmt, self.folderfmt)
118 try:
118 try:
119 fullpath = next(fullpath)
119 fullpath = next(fullpath)
120 except:
120 except:
121 fullpath = None
121 fullpath = None
122
122
123 if fullpath:
123 if fullpath:
124 break
124 break
125
125
126 log.warning(
126 log.warning(
127 'Waiting {} sec for a valid file in {}: try {} ...'.format(
127 'Waiting {} sec for a valid file in {}: try {} ...'.format(
128 self.delay, self.path, nTries + 1),
128 self.delay, self.path, nTries + 1),
129 self.name)
129 self.name)
130 time.sleep(self.delay)
130 time.sleep(self.delay)
131
131
132 if not(fullpath):
132 if not(fullpath):
133 raise schainpy.admin.SchainError(
133 raise schainpy.admin.SchainError(
134 'There isn\'t any valid file in {}'.format(self.path))
134 'There isn\'t any valid file in {}'.format(self.path))
135
135
136 pathname, filename = os.path.split(fullpath)
136 pathname, filename = os.path.split(fullpath)
137 self.year = int(filename[1:5])
137 self.year = int(filename[1:5])
138 self.doy = int(filename[5:8])
138 self.doy = int(filename[5:8])
139 self.set = int(filename[8:11]) - 1
139 self.set = int(filename[8:11]) - 1
140 else:
140 else:
141 log.log("Searching files in {}".format(self.path), self.name)
141 log.log("Searching files in {}".format(self.path), self.name)
142 self.filenameList = self.searchFilesOffLine(self.path, self.startDate,
142 self.filenameList = self.searchFilesOffLine(self.path, self.startDate,
143 self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt)
143 self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt)
144
144
145 self.setNextFile()
145 self.setNextFile()
146
146
147 return
147 return
148
148
149 def readFirstHeader(self):
149 def readFirstHeader(self):
150 '''Read metadata and data'''
150 '''Read metadata and data'''
151
151
152 self.__readMetadata()
152 self.__readMetadata()
153 self.__readData()
153 self.__readData()
154 self.__setBlockList()
154 self.__setBlockList()
155
155
156 if 'type' in self.meta:
156 if 'type' in self.meta:
157 self.dataOut = eval(self.meta['type'])()
157 self.dataOut = eval(self.meta['type'])()
158
158
159 for attr in self.meta:
159 for attr in self.meta:
160 setattr(self.dataOut, attr, self.meta[attr])
160 setattr(self.dataOut, attr, self.meta[attr])
161
161
162 self.blockIndex = 0
162 self.blockIndex = 0
163
163
164 return
164 return
165
165
166 def __setBlockList(self):
166 def __setBlockList(self):
167 '''
167 '''
168 Selects the data within the times defined
168 Selects the data within the times defined
169
169
170 self.fp
170 self.fp
171 self.startTime
171 self.startTime
172 self.endTime
172 self.endTime
173 self.blockList
173 self.blockList
174 self.blocksPerFile
174 self.blocksPerFile
175
175
176 '''
176 '''
177
177
178 startTime = self.startTime
178 startTime = self.startTime
179 endTime = self.endTime
179 endTime = self.endTime
180 thisUtcTime = self.data['utctime'] + self.utcoffset
180 thisUtcTime = self.data['utctime'] + self.utcoffset
181
182 self.interval = numpy.min(thisUtcTime[1:] - thisUtcTime[:-1])
181 self.interval = numpy.min(thisUtcTime[1:] - thisUtcTime[:-1])
183 thisDatetime = datetime.datetime.utcfromtimestamp(thisUtcTime[0])
182 thisDatetime = datetime.datetime.utcfromtimestamp(thisUtcTime[0])
184
183
185 thisDate = thisDatetime.date()
184 thisDate = thisDatetime.date()
186 thisTime = thisDatetime.time()
185 thisTime = thisDatetime.time()
187 startUtcTime = (datetime.datetime.combine(thisDate, startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
186 startUtcTime = (datetime.datetime.combine(thisDate, startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
188 endUtcTime = (datetime.datetime.combine(thisDate, endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
187 endUtcTime = (datetime.datetime.combine(thisDate, endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
189 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
188 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
190
189
191 self.blockList = ind
190 self.blockList = ind
192 self.blocksPerFile = len(ind)
191 self.blocksPerFile = len(ind)
193
192
194 if len(ind)==0:
193 if len(ind)==0:
195 print("[Reading] Block No. %d/%d -> %s [Skipping]" % (self.blockIndex,
194 print("[Reading] Block No. %d/%d -> %s [Skipping]" % (self.blockIndex,
196 self.blocksPerFile,
195 self.blocksPerFile,
197 thisDatetime))
196 thisDatetime))
198 self.setNextFile()
197 self.setNextFile()
199
198
200 return
199 return
201
200
202 def __readMetadata(self):
201 def __readMetadata(self):
203 '''
202 '''
204 Reads Metadata
203 Reads Metadata
205 '''
204 '''
206
205
207 meta = {}
206 meta = {}
208
207
209 if self.description:
208 if self.description:
210 for key, value in self.description['Metadata'].items():
209 for key, value in self.description['Metadata'].items():
211 meta[key] = self.fp[value][()]
210 meta[key] = self.fp[value][()]
212 else:
211 else:
213 grp = self.fp['Metadata']
212 grp = self.fp['Metadata']
214 for name in grp:
213 for name in grp:
215 meta[name] = grp[name][()]
214 meta[name] = grp[name][()]
216
215
217 if self.extras:
216 if self.extras:
218 for key, value in self.extras.items():
217 for key, value in self.extras.items():
219 meta[key] = value
218 meta[key] = value
220 self.meta = meta
219 self.meta = meta
221
220
222 return
221 return
223
222
224 def __readData(self):
223 def __readData(self):
225
224
226 data = {}
225 data = {}
227
226
228 if self.description:
227 if self.description:
229 for key, value in self.description['Data'].items():
228 for key, value in self.description['Data'].items():
230 if isinstance(value, str):
229 if isinstance(value, str):
231 if isinstance(self.fp[value], h5py.Dataset):
230 if isinstance(self.fp[value], h5py.Dataset):
232 data[key] = self.fp[value][()]
231 data[key] = self.fp[value][()]
233 elif isinstance(self.fp[value], h5py.Group):
232 elif isinstance(self.fp[value], h5py.Group):
234 array = []
233 array = []
235 for ch in self.fp[value]:
234 for ch in self.fp[value]:
236 array.append(self.fp[value][ch][()])
235 array.append(self.fp[value][ch][()])
237 data[key] = numpy.array(array)
236 data[key] = numpy.array(array)
238 elif isinstance(value, list):
237 elif isinstance(value, list):
239 array = []
238 array = []
240 for ch in value:
239 for ch in value:
241 array.append(self.fp[ch][()])
240 array.append(self.fp[ch][()])
242 data[key] = numpy.array(array)
241 data[key] = numpy.array(array)
243 else:
242 else:
244 grp = self.fp['Data']
243 grp = self.fp['Data']
245 for name in grp:
244 for name in grp:
246 if isinstance(grp[name], h5py.Dataset):
245 if isinstance(grp[name], h5py.Dataset):
247 array = grp[name][()]
246 array = grp[name][()]
248 elif isinstance(grp[name], h5py.Group):
247 elif isinstance(grp[name], h5py.Group):
249 array = []
248 array = []
250 for ch in grp[name]:
249 for ch in grp[name]:
251 array.append(grp[name][ch][()])
250 array.append(grp[name][ch][()])
252 array = numpy.array(array)
251 array = numpy.array(array)
253 else:
252 else:
254 log.warning('Unknown type: {}'.format(name))
253 log.warning('Unknown type: {}'.format(name))
255
254
256 if name in self.description:
255 if name in self.description:
257 key = self.description[name]
256 key = self.description[name]
258 else:
257 else:
259 key = name
258 key = name
260 data[key] = array
259 data[key] = array
261
260
262 self.data = data
261 self.data = data
263 return
262 return
264
263
265 def getData(self):
264 def getData(self):
266
265
267 for attr in self.data:
266 for attr in self.data:
268 if self.data[attr].ndim == 1:
267 if self.data[attr].ndim == 1:
269 setattr(self.dataOut, attr, self.data[attr][self.blockIndex])
268 setattr(self.dataOut, attr, self.data[attr][self.blockIndex])
270 else:
269 else:
271 setattr(self.dataOut, attr, self.data[attr][:, self.blockIndex])
270 setattr(self.dataOut, attr, self.data[attr][:, self.blockIndex])
272
271
273 self.dataOut.flagNoData = False
272 self.dataOut.flagNoData = False
274 self.blockIndex += 1
273 self.blockIndex += 1
275
274
276 log.log("Block No. {}/{} -> {}".format(
275 log.log("Block No. {}/{} -> {}".format(
277 self.blockIndex,
276 self.blockIndex,
278 self.blocksPerFile,
277 self.blocksPerFile,
279 self.dataOut.datatime.ctime()), self.name)
278 self.dataOut.datatime.ctime()), self.name)
280
279
281 return
280 return
282
281
283 def run(self, **kwargs):
282 def run(self, **kwargs):
284
283
285 if not(self.isConfig):
284 if not(self.isConfig):
286 self.setup(**kwargs)
285 self.setup(**kwargs)
287 self.isConfig = True
286 self.isConfig = True
288
287
289 if self.blockIndex == self.blocksPerFile:
288 if self.blockIndex == self.blocksPerFile:
290 self.setNextFile()
289 self.setNextFile()
291
290
292 self.getData()
291 self.getData()
293
292
294 return
293 return
295
294
296 @MPDecorator
295 @MPDecorator
297 class HDFWriter(Operation):
296 class HDFWriter(Operation):
298 """Operation to write HDF5 files.
297 """Operation to write HDF5 files.
299
298
300 The HDF5 file contains by default two groups Data and Metadata where
299 The HDF5 file contains by default two groups Data and Metadata where
301 you can save any `dataOut` attribute specified by `dataList` and `metadataList`
300 you can save any `dataOut` attribute specified by `dataList` and `metadataList`
302 parameters, data attributes are normaly time dependent where the metadata
301 parameters, data attributes are normaly time dependent where the metadata
303 are not.
302 are not.
304 It is possible to customize the structure of the HDF5 file with the
303 It is possible to customize the structure of the HDF5 file with the
305 optional description parameter see the examples.
304 optional description parameter see the examples.
306
305
307 Parameters:
306 Parameters:
308 -----------
307 -----------
309 path : str
308 path : str
310 Path where files will be saved.
309 Path where files will be saved.
311 blocksPerFile : int
310 blocksPerFile : int
312 Number of blocks per file
311 Number of blocks per file
313 metadataList : list
312 metadataList : list
314 List of the dataOut attributes that will be saved as metadata
313 List of the dataOut attributes that will be saved as metadata
315 dataList : int
314 dataList : int
316 List of the dataOut attributes that will be saved as data
315 List of the dataOut attributes that will be saved as data
317 setType : bool
316 setType : bool
318 If True the name of the files corresponds to the timestamp of the data
317 If True the name of the files corresponds to the timestamp of the data
319 description : dict, optional
318 description : dict, optional
320 Dictionary with the desired description of the HDF5 file
319 Dictionary with the desired description of the HDF5 file
321
320
322 Examples
321 Examples
323 --------
322 --------
324
323
325 desc = {
324 desc = {
326 'data_output': {'winds': ['z', 'w', 'v']},
325 'data_output': {'winds': ['z', 'w', 'v']},
327 'utctime': 'timestamps',
326 'utctime': 'timestamps',
328 'heightList': 'heights'
327 'heightList': 'heights'
329 }
328 }
330 desc = {
329 desc = {
331 'data_output': ['z', 'w', 'v'],
330 'data_output': ['z', 'w', 'v'],
332 'utctime': 'timestamps',
331 'utctime': 'timestamps',
333 'heightList': 'heights'
332 'heightList': 'heights'
334 }
333 }
335 desc = {
334 desc = {
336 'Data': {
335 'Data': {
337 'data_output': 'winds',
336 'data_output': 'winds',
338 'utctime': 'timestamps'
337 'utctime': 'timestamps'
339 },
338 },
340 'Metadata': {
339 'Metadata': {
341 'heightList': 'heights'
340 'heightList': 'heights'
342 }
341 }
343 }
342 }
344
343
345 writer = proc_unit.addOperation(name='HDFWriter')
344 writer = proc_unit.addOperation(name='HDFWriter')
346 writer.addParameter(name='path', value='/path/to/file')
345 writer.addParameter(name='path', value='/path/to/file')
347 writer.addParameter(name='blocksPerFile', value='32')
346 writer.addParameter(name='blocksPerFile', value='32')
348 writer.addParameter(name='metadataList', value='heightList,timeZone')
347 writer.addParameter(name='metadataList', value='heightList,timeZone')
349 writer.addParameter(name='dataList',value='data_output,utctime')
348 writer.addParameter(name='dataList',value='data_output,utctime')
350 # writer.addParameter(name='description',value=json.dumps(desc))
349 # writer.addParameter(name='description',value=json.dumps(desc))
351
350
352 """
351 """
353
352
354 ext = ".hdf5"
353 ext = ".hdf5"
355 optchar = "D"
354 optchar = "D"
356 filename = None
355 filename = None
357 path = None
356 path = None
358 setFile = None
357 setFile = None
359 fp = None
358 fp = None
360 firsttime = True
359 firsttime = True
361 #Configurations
360 #Configurations
362 blocksPerFile = None
361 blocksPerFile = None
363 blockIndex = None
362 blockIndex = None
364 dataOut = None
363 dataOut = None
365 #Data Arrays
364 #Data Arrays
366 dataList = None
365 dataList = None
367 metadataList = None
366 metadataList = None
368 currentDay = None
367 currentDay = None
369 lastTime = None
368 lastTime = None
370
369
371 def __init__(self):
370 def __init__(self):
372
371
373 Operation.__init__(self)
372 Operation.__init__(self)
374 return
373 return
375
374
376 def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None):
375 def set_kwargs(self, **kwargs):
376
377 for key, value in kwargs.items():
378 setattr(self, key, value)
379
380 def set_kwargs_obj(self, obj, **kwargs):
381
382 for key, value in kwargs.items():
383 setattr(obj, key, value)
384
385 def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None, description=None, **kwargs):
377 self.path = path
386 self.path = path
378 self.blocksPerFile = blocksPerFile
387 self.blocksPerFile = blocksPerFile
379 self.metadataList = metadataList
388 self.metadataList = metadataList
380 self.dataList = [s.strip() for s in dataList]
389 self.dataList = [s.strip() for s in dataList]
381 self.setType = setType
390 self.setType = setType
382 self.description = description
391 self.description = description
392 self.set_kwargs(**kwargs)
383
393
384 if self.metadataList is None:
394 if self.metadataList is None:
385 self.metadataList = self.dataOut.metadata_list
395 self.metadataList = self.dataOut.metadata_list
386
396
387 tableList = []
397 tableList = []
388 dsList = []
398 dsList = []
389
399
390 for i in range(len(self.dataList)):
400 for i in range(len(self.dataList)):
391 dsDict = {}
401 dsDict = {}
392 if hasattr(self.dataOut, self.dataList[i]):
402 if hasattr(self.dataOut, self.dataList[i]):
393 dataAux = getattr(self.dataOut, self.dataList[i])
403 dataAux = getattr(self.dataOut, self.dataList[i])
394 dsDict['variable'] = self.dataList[i]
404 dsDict['variable'] = self.dataList[i]
395 else:
405 else:
396 log.warning('Attribute {} not found in dataOut', self.name)
406 log.warning('Attribute {} not found in dataOut', self.name)
397 continue
407 continue
398
408
399 if dataAux is None:
409 if dataAux is None:
400 continue
410 continue
401 elif isinstance(dataAux, (int, float, numpy.integer, numpy.float)):
411 elif isinstance(dataAux, (int, float, numpy.integer, numpy.float)):
402 dsDict['nDim'] = 0
412 dsDict['nDim'] = 0
403 else:
413 else:
404 dsDict['nDim'] = len(dataAux.shape)
414 dsDict['nDim'] = len(dataAux.shape)
405 dsDict['shape'] = dataAux.shape
415 dsDict['shape'] = dataAux.shape
406 dsDict['dsNumber'] = dataAux.shape[0]
416 dsDict['dsNumber'] = dataAux.shape[0]
407 dsDict['dtype'] = dataAux.dtype
417 dsDict['dtype'] = dataAux.dtype
408
418
409 dsList.append(dsDict)
419 dsList.append(dsDict)
410
420
411 self.dsList = dsList
421 self.dsList = dsList
412 self.currentDay = self.dataOut.datatime.date()
422 self.currentDay = self.dataOut.datatime.date()
413
423
414 def timeFlag(self):
424 def timeFlag(self):
415 currentTime = self.dataOut.utctime
425 currentTime = self.dataOut.utctime
416 timeTuple = time.localtime(currentTime)
426 timeTuple = time.localtime(currentTime)
417 dataDay = timeTuple.tm_yday
427 dataDay = timeTuple.tm_yday
418
428
419 if self.lastTime is None:
429 if self.lastTime is None:
420 self.lastTime = currentTime
430 self.lastTime = currentTime
421 self.currentDay = dataDay
431 self.currentDay = dataDay
422 return False
432 return False
423
433
424 timeDiff = currentTime - self.lastTime
434 timeDiff = currentTime - self.lastTime
425
435
426 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
436 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
427 if dataDay != self.currentDay:
437 if dataDay != self.currentDay:
428 self.currentDay = dataDay
438 self.currentDay = dataDay
429 return True
439 return True
430 elif timeDiff > 3*60*60:
440 elif timeDiff > 3*60*60:
431 self.lastTime = currentTime
441 self.lastTime = currentTime
432 return True
442 return True
433 else:
443 else:
434 self.lastTime = currentTime
444 self.lastTime = currentTime
435 return False
445 return False
436
446
437 def run(self, dataOut, path, blocksPerFile=10, metadataList=None,
447 def run(self, dataOut, path, blocksPerFile=10, metadataList=None,
438 dataList=[], setType=None, description={}):
448 dataList=[], setType=None, description={}, **kwargs):
439
449
440 self.dataOut = dataOut
450 self.dataOut = dataOut
451 self.set_kwargs_obj(self.dataOut, **kwargs)
441 if not(self.isConfig):
452 if not(self.isConfig):
442 self.setup(path=path, blocksPerFile=blocksPerFile,
453 self.setup(path=path, blocksPerFile=blocksPerFile,
443 metadataList=metadataList, dataList=dataList,
454 metadataList=metadataList, dataList=dataList,
444 setType=setType, description=description)
455 setType=setType, description=description, **kwargs)
445
456
446 self.isConfig = True
457 self.isConfig = True
447 self.setNextFile()
458 self.setNextFile()
448
459
449 self.putData()
460 self.putData()
450 return
461 return
451
462
452 def setNextFile(self):
463 def setNextFile(self):
453
464
454 ext = self.ext
465 ext = self.ext
455 path = self.path
466 path = self.path
456 setFile = self.setFile
467 setFile = self.setFile
457
468
458 timeTuple = time.localtime(self.dataOut.utctime)
469 timeTuple = time.localtime(self.dataOut.utctime)
459 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
470 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
460 fullpath = os.path.join(path, subfolder)
471 fullpath = os.path.join(path, subfolder)
461
472
462 if os.path.exists(fullpath):
473 if os.path.exists(fullpath):
463 filesList = os.listdir(fullpath)
474 filesList = os.listdir(fullpath)
464 filesList = [k for k in filesList if k.startswith(self.optchar)]
475 filesList = [k for k in filesList if k.startswith(self.optchar)]
465 if len( filesList ) > 0:
476 if len(filesList) > 0:
466 filesList = sorted(filesList, key=str.lower)
477 filesList = sorted(filesList, key=str.lower)
467 filen = filesList[-1]
478 filen = filesList[-1]
468 # el filename debera tener el siguiente formato
479 # el filename debera tener el siguiente formato
469 # 0 1234 567 89A BCDE (hex)
480 # 0 1234 567 89A BCDE (hex)
470 # x YYYY DDD SSS .ext
481 # x YYYY DDD SSS .ext
471 if isNumber(filen[8:11]):
482 if isNumber(filen[8:11]):
472 setFile = int(filen[8:11]) #inicializo mi contador de seteo al seteo del ultimo file
483 setFile = int(filen[8:11]) #inicializo mi contador de seteo al seteo del ultimo file
473 else:
484 else:
474 setFile = -1
485 setFile = -1
475 else:
486 else:
476 setFile = -1 #inicializo mi contador de seteo
487 setFile = -1 #inicializo mi contador de seteo
477 else:
488 else:
478 os.makedirs(fullpath)
489 os.makedirs(fullpath)
479 setFile = -1 #inicializo mi contador de seteo
490 setFile = -1 #inicializo mi contador de seteo
480
491
481 if self.setType is None:
492 if self.setType is None:
482 setFile += 1
493 setFile += 1
483 file = '%s%4.4d%3.3d%03d%s' % (self.optchar,
494 file = '%s%4.4d%3.3d%03d%s' % (self.optchar,
484 timeTuple.tm_year,
495 timeTuple.tm_year,
485 timeTuple.tm_yday,
496 timeTuple.tm_yday,
486 setFile,
497 setFile,
487 ext )
498 ext)
488 else:
499 else:
489 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
500 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
490 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
501 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
491 timeTuple.tm_year,
502 timeTuple.tm_year,
492 timeTuple.tm_yday,
503 timeTuple.tm_yday,
493 setFile,
504 setFile,
494 ext )
505 ext)
495
506
496 self.filename = os.path.join( path, subfolder, file )
507 self.filename = os.path.join(path, subfolder, file)
497
508
498 #Setting HDF5 File
509 #Setting HDF5 File
499 self.fp = h5py.File(self.filename, 'w')
510 self.fp = h5py.File(self.filename, 'w')
500 #write metadata
511 #write metadata
501 self.writeMetadata(self.fp)
512 self.writeMetadata(self.fp)
502 #Write data
513 #Write data
503 self.writeData(self.fp)
514 self.writeData(self.fp)
504
515
505 def getLabel(self, name, x=None):
516 def getLabel(self, name, x=None):
506
517
507 if x is None:
518 if x is None:
508 if 'Data' in self.description:
519 if 'Data' in self.description:
509 data = self.description['Data']
520 data = self.description['Data']
510 if 'Metadata' in self.description:
521 if 'Metadata' in self.description:
511 data.update(self.description['Metadata'])
522 data.update(self.description['Metadata'])
512 else:
523 else:
513 data = self.description
524 data = self.description
514 if name in data:
525 if name in data:
515 if isinstance(data[name], str):
526 if isinstance(data[name], str):
516 return data[name]
527 return data[name]
517 elif isinstance(data[name], list):
528 elif isinstance(data[name], list):
518 return None
529 return None
519 elif isinstance(data[name], dict):
530 elif isinstance(data[name], dict):
520 for key, value in data[name].items():
531 for key, value in data[name].items():
521 return key
532 return key
522 return name
533 return name
523 else:
534 else:
524 if 'Metadata' in self.description:
535 if 'Metadata' in self.description:
525 meta = self.description['Metadata']
536 meta = self.description['Metadata']
526 else:
537 else:
527 meta = self.description
538 meta = self.description
528 if name in meta:
539 if name in meta:
529 if isinstance(meta[name], list):
540 if isinstance(meta[name], list):
530 return meta[name][x]
541 return meta[name][x]
531 elif isinstance(meta[name], dict):
542 elif isinstance(meta[name], dict):
532 for key, value in meta[name].items():
543 for key, value in meta[name].items():
533 return value[x]
544 return value[x]
534 if 'cspc' in name:
545 if 'cspc' in name:
535 return 'pair{:02d}'.format(x)
546 return 'pair{:02d}'.format(x)
536 else:
547 else:
537 return 'channel{:02d}'.format(x)
548 return 'channel{:02d}'.format(x)
538
549
539 def writeMetadata(self, fp):
550 def writeMetadata(self, fp):
540
551
541 if self.description:
552 if self.description:
542 if 'Metadata' in self.description:
553 if 'Metadata' in self.description:
543 grp = fp.create_group('Metadata')
554 grp = fp.create_group('Metadata')
544 else:
555 else:
545 grp = fp
556 grp = fp
546 else:
557 else:
547 grp = fp.create_group('Metadata')
558 grp = fp.create_group('Metadata')
548
559
549 for i in range(len(self.metadataList)):
560 for i in range(len(self.metadataList)):
550 if not hasattr(self.dataOut, self.metadataList[i]):
561 if not hasattr(self.dataOut, self.metadataList[i]):
551 log.warning('Metadata: `{}` not found'.format(self.metadataList[i]), self.name)
562 log.warning('Metadata: `{}` not found'.format(self.metadataList[i]), self.name)
552 continue
563 continue
553 value = getattr(self.dataOut, self.metadataList[i])
564 value = getattr(self.dataOut, self.metadataList[i])
554 if isinstance(value, bool):
565 if isinstance(value, bool):
555 if value is True:
566 if value is True:
556 value = 1
567 value = 1
557 else:
568 else:
558 value = 0
569 value = 0
559 grp.create_dataset(self.getLabel(self.metadataList[i]), data=value)
570 grp.create_dataset(self.getLabel(self.metadataList[i]), data=value)
560 return
571 return
561
572
562 def writeData(self, fp):
573 def writeData(self, fp):
563
574
564 if self.description:
575 if self.description:
565 if 'Data' in self.description:
576 if 'Data' in self.description:
566 grp = fp.create_group('Data')
577 grp = fp.create_group('Data')
567 else:
578 else:
568 grp = fp
579 grp = fp
569 else:
580 else:
570 grp = fp.create_group('Data')
581 grp = fp.create_group('Data')
571
582
572 dtsets = []
583 dtsets = []
573 data = []
584 data = []
574
585
575 for dsInfo in self.dsList:
586 for dsInfo in self.dsList:
576 if dsInfo['nDim'] == 0:
587 if dsInfo['nDim'] == 0:
577 ds = grp.create_dataset(
588 ds = grp.create_dataset(
578 self.getLabel(dsInfo['variable']),
589 self.getLabel(dsInfo['variable']),
579 (self.blocksPerFile, ),
590 (self.blocksPerFile,),
580 chunks=True,
591 chunks=True,
581 dtype=numpy.float64)
592 dtype=numpy.float64)
582 dtsets.append(ds)
593 dtsets.append(ds)
583 data.append((dsInfo['variable'], -1))
594 data.append((dsInfo['variable'], -1))
584 else:
595 else:
585 label = self.getLabel(dsInfo['variable'])
596 label = self.getLabel(dsInfo['variable'])
586 if label is not None:
597 if label is not None:
587 sgrp = grp.create_group(label)
598 sgrp = grp.create_group(label)
588 else:
599 else:
589 sgrp = grp
600 sgrp = grp
590 for i in range(dsInfo['dsNumber']):
601 for i in range(dsInfo['dsNumber']):
591 ds = sgrp.create_dataset(
602 ds = sgrp.create_dataset(
592 self.getLabel(dsInfo['variable'], i),
603 self.getLabel(dsInfo['variable'], i),
593 (self.blocksPerFile, ) + dsInfo['shape'][1:],
604 (self.blocksPerFile,) + dsInfo['shape'][1:],
594 chunks=True,
605 chunks=True,
595 dtype=dsInfo['dtype'])
606 dtype=dsInfo['dtype'])
596 dtsets.append(ds)
607 dtsets.append(ds)
597 data.append((dsInfo['variable'], i))
608 data.append((dsInfo['variable'], i))
598 fp.flush()
609 fp.flush()
599
610
600 log.log('Creating file: {}'.format(fp.filename), self.name)
611 log.log('Creating file: {}'.format(fp.filename), self.name)
601
612
602 self.ds = dtsets
613 self.ds = dtsets
603 self.data = data
614 self.data = data
604 self.firsttime = True
615 self.firsttime = True
605 self.blockIndex = 0
616 self.blockIndex = 0
606 return
617 return
607
618
608 def putData(self):
619 def putData(self):
609
620
610 if (self.blockIndex == self.blocksPerFile) or self.timeFlag():
621 if (self.blockIndex == self.blocksPerFile) or self.timeFlag():
611 self.closeFile()
622 self.closeFile()
612 self.setNextFile()
623 self.setNextFile()
613
624
614 for i, ds in enumerate(self.ds):
625 for i, ds in enumerate(self.ds):
615 attr, ch = self.data[i]
626 attr, ch = self.data[i]
616 if ch == -1:
627 if ch == -1:
617 ds[self.blockIndex] = getattr(self.dataOut, attr)
628 ds[self.blockIndex] = getattr(self.dataOut, attr)
618 else:
629 else:
619 ds[self.blockIndex] = getattr(self.dataOut, attr)[ch]
630 ds[self.blockIndex] = getattr(self.dataOut, attr)[ch]
620
631
621 self.fp.flush()
632 self.fp.flush()
622 self.blockIndex += 1
633 self.blockIndex += 1
623 log.log('Block No. {}/{}'.format(self.blockIndex, self.blocksPerFile), self.name)
634 log.log('Block No. {}/{}'.format(self.blockIndex, self.blocksPerFile), self.name)
624
635
625 return
636 return
626
637
627 def closeFile(self):
638 def closeFile(self):
628
639
629 if self.blockIndex != self.blocksPerFile:
640 if self.blockIndex != self.blocksPerFile:
630 for ds in self.ds:
641 for ds in self.ds:
631 ds.resize(self.blockIndex, axis=0)
642 ds.resize(self.blockIndex, axis=0)
632
643
633 if self.fp:
644 if self.fp:
634 self.fp.flush()
645 self.fp.flush()
635 self.fp.close()
646 self.fp.close()
636
647
637 def close(self):
648 def close(self):
638
649
639 self.closeFile()
650 self.closeFile()
General Comments 0
You need to be logged in to leave comments. Login now