##// END OF EJS Templates
Fix blocklist value in ParameterReader
Juan C. Espinoza -
r1233:6b3665c1b242
parent child
Show More
@@ -1,1544 +1,1543
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation, MPDecorator
10 10 from schainpy.model.io.jroIO_base import *
11 11 from schainpy.utils import log
12 12
13 13 @MPDecorator
14 14 class ParamReader(JRODataReader,ProcessingUnit):
15 15 '''
16 16 Reads HDF5 format files
17 17 path
18 18 startDate
19 19 endDate
20 20 startTime
21 21 endTime
22 22 '''
23 23
24 24 ext = ".hdf5"
25 25 optchar = "D"
26 26 timezone = None
27 27 startTime = None
28 28 endTime = None
29 29 fileIndex = None
30 30 utcList = None #To select data in the utctime list
31 31 blockList = None #List to blocks to be read from the file
32 32 blocksPerFile = None #Number of blocks to be read
33 33 blockIndex = None
34 34 path = None
35 35 #List of Files
36 36 filenameList = None
37 37 datetimeList = None
38 38 #Hdf5 File
39 39 listMetaname = None
40 40 listMeta = None
41 41 listDataname = None
42 42 listData = None
43 43 listShapes = None
44 44 fp = None
45 45 #dataOut reconstruction
46 46 dataOut = None
47 47
48 48 def __init__(self):#, **kwargs):
49 49 ProcessingUnit.__init__(self) #, **kwargs)
50 50 self.dataOut = Parameters()
51 51 return
52 52
53 53 def setup(self, **kwargs):
54 54
55 55 path = kwargs['path']
56 56 startDate = kwargs['startDate']
57 57 endDate = kwargs['endDate']
58 58 startTime = kwargs['startTime']
59 59 endTime = kwargs['endTime']
60 60 walk = kwargs['walk']
61 61 if 'ext' in kwargs:
62 62 ext = kwargs['ext']
63 63 else:
64 64 ext = '.hdf5'
65 65 if 'timezone' in kwargs:
66 66 self.timezone = kwargs['timezone']
67 67 else:
68 68 self.timezone = 'lt'
69 69
70 70 print("[Reading] Searching files in offline mode ...")
71 71 pathList, filenameList = self.searchFilesOffLine(path, startDate=startDate, endDate=endDate,
72 72 startTime=startTime, endTime=endTime,
73 73 ext=ext, walk=walk)
74 74
75 75 if not(filenameList):
76 76 print("There is no files into the folder: %s"%(path))
77 77 sys.exit(-1)
78 78
79 79 self.fileIndex = -1
80 80 self.startTime = startTime
81 81 self.endTime = endTime
82 82
83 83 self.__readMetadata()
84 84
85 85 self.__setNextFileOffline()
86 86
87 87 return
88 88
89 89 def searchFilesOffLine(self,
90 90 path,
91 91 startDate=None,
92 92 endDate=None,
93 93 startTime=datetime.time(0,0,0),
94 94 endTime=datetime.time(23,59,59),
95 95 ext='.hdf5',
96 96 walk=True):
97 97
98 98 expLabel = ''
99 99 self.filenameList = []
100 100 self.datetimeList = []
101 101
102 102 pathList = []
103 103
104 104 JRODataObj = JRODataReader()
105 105 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
106 106
107 107 if dateList == []:
108 108 print("[Reading] No *%s files in %s from %s to %s)"%(ext, path,
109 109 datetime.datetime.combine(startDate,startTime).ctime(),
110 110 datetime.datetime.combine(endDate,endTime).ctime()))
111 111
112 112 return None, None
113 113
114 114 if len(dateList) > 1:
115 115 print("[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate))
116 116 else:
117 117 print("[Reading] data was found for the date %s" %(dateList[0]))
118 118
119 119 filenameList = []
120 120 datetimeList = []
121 121
122 122 #----------------------------------------------------------------------------------
123 123
124 124 for thisPath in pathList:
125 125
126 126 fileList = glob.glob1(thisPath, "*%s" %ext)
127 127 fileList.sort()
128 128
129 129 for file in fileList:
130 130
131 131 filename = os.path.join(thisPath,file)
132 132
133 133 if not isFileInDateRange(filename, startDate, endDate):
134 134 continue
135 135
136 136 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
137 137
138 138 if not(thisDatetime):
139 139 continue
140 140
141 141 filenameList.append(filename)
142 142 datetimeList.append(thisDatetime)
143 143
144 144 if not(filenameList):
145 145 print("[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime()))
146 146 return None, None
147 147
148 148 print("[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime))
149 149 print()
150 150
151 151 self.filenameList = filenameList
152 152 self.datetimeList = datetimeList
153 153
154 154 return pathList, filenameList
155 155
156 156 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
157 157
158 158 """
159 159 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
160 160
161 161 Inputs:
162 162 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
163 163 startDate : fecha inicial del rango seleccionado en formato datetime.date
164 164 endDate : fecha final del rango seleccionado en formato datetime.date
165 165 startTime : tiempo inicial del rango seleccionado en formato datetime.time
166 166 endTime : tiempo final del rango seleccionado en formato datetime.time
167 167
168 168 Return:
169 169 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
170 170 fecha especificado, de lo contrario retorna False.
171 171
172 172 Excepciones:
173 173 Si el archivo no existe o no puede ser abierto
174 174 Si la cabecera no puede ser leida.
175 175
176 176 """
177 177
178 178 try:
179 179 fp = h5py.File(filename,'r')
180 180 grp1 = fp['Data']
181 181
182 182 except IOError:
183 183 traceback.print_exc()
184 184 raise IOError("The file %s can't be opened" %(filename))
185 185
186 186 #In case has utctime attribute
187 187 grp2 = grp1['utctime']
188 188 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
189 189 thisUtcTime = grp2.value[0]
190 190
191 191 fp.close()
192 192
193 193 if self.timezone == 'lt':
194 194 thisUtcTime -= 5*3600
195 195
196 196 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
197 197 thisDate = thisDatetime.date()
198 198 thisTime = thisDatetime.time()
199 199
200 200 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
201 201 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
202 202
203 203 #General case
204 204 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
205 205 #-----------o----------------------------o-----------
206 206 # startTime endTime
207 207
208 208 if endTime >= startTime:
209 209 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
210 210 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
211 211 return thisDatetime
212 212 return None
213 213
214 214 #If endTime < startTime then endTime belongs to the next day
215 215 #<<<<<<<<<<<o o>>>>>>>>>>>
216 216 #-----------o----------------------------o-----------
217 217 # endTime startTime
218 218
219 219 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
220 220 return None
221 221
222 222 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
223 223 return None
224 224
225 225 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
226 226 return None
227 227
228 228 return thisDatetime
229 229
230 230 def __setNextFileOffline(self):
231 231
232 232 self.fileIndex += 1
233 233 idFile = self.fileIndex
234 234
235 235 if not(idFile < len(self.filenameList)):
236 236 self.dataOut.error = "No more Files"
237 237 return 0
238 238
239 239 filename = self.filenameList[idFile]
240 240 filePointer = h5py.File(filename,'r')
241 241 self.filename = filename
242 242 self.fp = filePointer
243 243
244 244 print("Setting the file: %s"%self.filename)
245 245
246 246 self.__setBlockList()
247 247 self.__readData()
248 248 self.blockIndex = 0
249 249 return 1
250 250
251 251 def __setBlockList(self):
252 252 '''
253 253 Selects the data within the times defined
254 254
255 255 self.fp
256 256 self.startTime
257 257 self.endTime
258 258
259 259 self.blockList
260 260 self.blocksPerFile
261 261
262 262 '''
263 263 fp = self.fp
264 264 startTime = self.startTime
265 265 endTime = self.endTime
266 266
267 267 grp = fp['Data']
268 268 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
269 269
270 270 #ERROOOOR
271 271 if self.timezone == 'lt':
272 272 thisUtcTime -= 5*3600
273 273
274 274 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
275 275
276 276 thisDate = thisDatetime.date()
277 277 thisTime = thisDatetime.time()
278 278
279 279 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
280 280 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
281 281
282 282 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
283 283
284 284 self.blockList = ind
285 285 self.blocksPerFile = len(ind)
286 286
287 287 return
288 288
289 289 def __readMetadata(self):
290 290 '''
291 291 Reads Metadata
292 292
293 293 self.pathMeta
294 294 self.listShapes
295 295 self.listMetaname
296 296 self.listMeta
297 297
298 298 '''
299 299
300 300 filename = self.filenameList[0]
301 301 fp = h5py.File(filename,'r')
302 302 gp = fp['Metadata']
303 303
304 304 listMetaname = []
305 305 listMetadata = []
306 306 for item in list(gp.items()):
307 307 name = item[0]
308 308
309 309 if name=='array dimensions':
310 310 table = gp[name][:]
311 311 listShapes = {}
312 312 for shapes in table:
313 313 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
314 314 else:
315 315 data = gp[name].value
316 316 listMetaname.append(name)
317 317 listMetadata.append(data)
318 318
319 319 self.listShapes = listShapes
320 320 self.listMetaname = listMetaname
321 321 self.listMeta = listMetadata
322 322
323 323 fp.close()
324 324 return
325 325
326 326 def __readData(self):
327 327 grp = self.fp['Data']
328 328 listdataname = []
329 329 listdata = []
330 330
331 331 for item in list(grp.items()):
332 332 name = item[0]
333 333 listdataname.append(name)
334 334
335 335 array = self.__setDataArray(grp[name],self.listShapes[name])
336 336 listdata.append(array)
337 337
338 338 self.listDataname = listdataname
339 339 self.listData = listdata
340 340 return
341 341
342 342 def __setDataArray(self, dataset, shapes):
343 343
344 344 nDims = shapes[0]
345 345 nDim2 = shapes[1] #Dimension 0
346 346 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
347 347 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
348 348 mode = shapes[4] #Mode of storing
349 349 blockList = self.blockList
350 350 blocksPerFile = self.blocksPerFile
351 351
352 352 #Depending on what mode the data was stored
353 353 if mode == 0: #Divided in channels
354 354 arrayData = dataset.value.astype(numpy.float)[0][blockList]
355 355 if mode == 1: #Divided in parameter
356 356 strds = 'table'
357 357 nDatas = nDim1
358 358 newShapes = (blocksPerFile,nDim2,nDim0)
359 359 elif mode==2: #Concatenated in a table
360 360 strds = 'table0'
361 361 arrayData = dataset[strds].value
362 362 #Selecting part of the dataset
363 363 utctime = arrayData[:,0]
364 364 u, indices = numpy.unique(utctime, return_index=True)
365 365
366 366 if blockList.size != indices.size:
367 367 indMin = indices[blockList[0]]
368 368 if blockList[1] + 1 >= indices.size:
369 369 arrayData = arrayData[indMin:,:]
370 370 else:
371 371 indMax = indices[blockList[1] + 1]
372 372 arrayData = arrayData[indMin:indMax,:]
373 373 return arrayData
374 374
375 375 # One dimension
376 376 if nDims == 0:
377 377 arrayData = dataset.value.astype(numpy.float)[0][blockList]
378 378
379 379 # Two dimensions
380 380 elif nDims == 2:
381 381 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
382 382 newShapes = (blocksPerFile,nDim0)
383 383 nDatas = nDim1
384 384
385 385 for i in range(nDatas):
386 386 data = dataset[strds + str(i)].value
387 387 arrayData[:,i,:] = data[blockList,:]
388 388
389 389 # Three dimensions
390 390 else:
391 391 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
392 392 for i in range(nDatas):
393 393
394 394 data = dataset[strds + str(i)].value
395 395
396 396 for b in range(blockList.size):
397 397 arrayData[b,:,i,:] = data[:,:,blockList[b]]
398 398
399 399 return arrayData
400 400
401 401 def __setDataOut(self):
402 402 listMeta = self.listMeta
403 403 listMetaname = self.listMetaname
404 404 listDataname = self.listDataname
405 405 listData = self.listData
406 406 listShapes = self.listShapes
407 407
408 408 blockIndex = self.blockIndex
409 409 # blockList = self.blockList
410 410
411 411 for i in range(len(listMeta)):
412 412 setattr(self.dataOut,listMetaname[i],listMeta[i])
413 413
414 414 for j in range(len(listData)):
415 415 nShapes = listShapes[listDataname[j]][0]
416 416 mode = listShapes[listDataname[j]][4]
417 417 if nShapes == 1:
418 418 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
419 419 elif nShapes > 1:
420 420 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
421 421 elif mode==0:
422 422 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
423 423 #Mode Meteors
424 424 elif mode ==2:
425 425 selectedData = self.__selectDataMode2(listData[j], blockIndex)
426 426 setattr(self.dataOut, listDataname[j], selectedData)
427 427 return
428 428
429 429 def __selectDataMode2(self, data, blockIndex):
430 430 utctime = data[:,0]
431 431 aux, indices = numpy.unique(utctime, return_inverse=True)
432 432 selInd = numpy.where(indices == blockIndex)[0]
433 433 selData = data[selInd,:]
434 434
435 435 return selData
436 436
437 437 def getData(self):
438 438
439 439 if self.blockIndex==self.blocksPerFile:
440 440 if not( self.__setNextFileOffline() ):
441 441 self.dataOut.flagNoData = True
442 442 return 0
443 443
444 444 self.__setDataOut()
445 445 self.dataOut.flagNoData = False
446 446
447 447 self.blockIndex += 1
448 448
449 449 return
450 450
451 451 def run(self, **kwargs):
452 452
453 453 if not(self.isConfig):
454 454 self.setup(**kwargs)
455 455 self.isConfig = True
456 456
457 457 self.getData()
458 458
459 459 return
460 460
461 461 @MPDecorator
462 462 class ParamWriter(Operation):
463 463 '''
464 464 HDF5 Writer, stores parameters data in HDF5 format files
465 465
466 466 path: path where the files will be stored
467 467 blocksPerFile: number of blocks that will be saved in per HDF5 format file
468 468 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
469 469 metadataList: list of attributes that will be stored as metadata
470 470 dataList: list of attributes that will be stores as data
471 471 '''
472 472
473 473 ext = ".hdf5"
474 474 optchar = "D"
475 475 metaoptchar = "M"
476 476 metaFile = None
477 477 filename = None
478 478 path = None
479 479 setFile = None
480 480 fp = None
481 481 grp = None
482 482 ds = None
483 483 firsttime = True
484 484 #Configurations
485 485 blocksPerFile = None
486 486 blockIndex = None
487 487 dataOut = None
488 488 #Data Arrays
489 489 dataList = None
490 490 metadataList = None
491 491 dsList = None #List of dictionaries with dataset properties
492 492 tableDim = None
493 493 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
494 494 currentDay = None
495 495 lastTime = None
496 496 setType = None
497 497
498 498 def __init__(self):
499 499
500 500 Operation.__init__(self)
501 501 return
502 502
503 503 def setup(self, dataOut, path=None, blocksPerFile=10, metadataList=None, dataList=None, mode=None, setType=None):
504 504 self.path = path
505 505 self.blocksPerFile = blocksPerFile
506 506 self.metadataList = metadataList
507 507 self.dataList = dataList
508 508 self.dataOut = dataOut
509 509 self.mode = mode
510 510 if self.mode is not None:
511 511 self.mode = numpy.zeros(len(self.dataList)) + mode
512 512 else:
513 513 self.mode = numpy.ones(len(self.dataList))
514 514
515 515 self.setType = setType
516 516
517 517 arrayDim = numpy.zeros((len(self.dataList),5))
518 518
519 519 #Table dimensions
520 520 dtype0 = self.dtype
521 521 tableList = []
522 522
523 523 #Dictionary and list of tables
524 524 dsList = []
525 525
526 526 for i in range(len(self.dataList)):
527 527 dsDict = {}
528 528 dataAux = getattr(self.dataOut, self.dataList[i])
529 529 dsDict['variable'] = self.dataList[i]
530 530 #--------------------- Conditionals ------------------------
531 531 #There is no data
532 532
533 533 if dataAux is None:
534 534
535 535 return 0
536 536
537 537 if isinstance(dataAux, (int, float, numpy.integer, numpy.float)):
538 538 dsDict['mode'] = 0
539 539 dsDict['nDim'] = 0
540 540 arrayDim[i,0] = 0
541 541 dsList.append(dsDict)
542 542
543 543 #Mode 2: meteors
544 544 elif self.mode[i] == 2:
545 545 dsDict['dsName'] = 'table0'
546 546 dsDict['mode'] = 2 # Mode meteors
547 547 dsDict['shape'] = dataAux.shape[-1]
548 548 dsDict['nDim'] = 0
549 549 dsDict['dsNumber'] = 1
550 550 arrayDim[i,3] = dataAux.shape[-1]
551 551 arrayDim[i,4] = self.mode[i] #Mode the data was stored
552 552 dsList.append(dsDict)
553 553
554 554 #Mode 1
555 555 else:
556 556 arrayDim0 = dataAux.shape #Data dimensions
557 557 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
558 558 arrayDim[i,4] = self.mode[i] #Mode the data was stored
559 559 strtable = 'table'
560 560 dsDict['mode'] = 1 # Mode parameters
561 561
562 562 # Three-dimension arrays
563 563 if len(arrayDim0) == 3:
564 564 arrayDim[i,1:-1] = numpy.array(arrayDim0)
565 565 nTables = int(arrayDim[i,2])
566 566 dsDict['dsNumber'] = nTables
567 567 dsDict['shape'] = arrayDim[i,2:4]
568 568 dsDict['nDim'] = 3
569 569
570 570 for j in range(nTables):
571 571 dsDict = dsDict.copy()
572 572 dsDict['dsName'] = strtable + str(j)
573 573 dsList.append(dsDict)
574 574
575 575 # Two-dimension arrays
576 576 elif len(arrayDim0) == 2:
577 577 arrayDim[i,2:-1] = numpy.array(arrayDim0)
578 578 nTables = int(arrayDim[i,2])
579 579 dsDict['dsNumber'] = nTables
580 580 dsDict['shape'] = arrayDim[i,3]
581 581 dsDict['nDim'] = 2
582 582
583 583 for j in range(nTables):
584 584 dsDict = dsDict.copy()
585 585 dsDict['dsName'] = strtable + str(j)
586 586 dsList.append(dsDict)
587 587
588 588 # One-dimension arrays
589 589 elif len(arrayDim0) == 1:
590 590 arrayDim[i,3] = arrayDim0[0]
591 591 dsDict['shape'] = arrayDim0[0]
592 592 dsDict['dsNumber'] = 1
593 593 dsDict['dsName'] = strtable + str(0)
594 594 dsDict['nDim'] = 1
595 595 dsList.append(dsDict)
596 596
597 597 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
598 598 tableList.append(table)
599 599
600 600 self.dsList = dsList
601 601 self.tableDim = numpy.array(tableList, dtype = dtype0)
602 602 self.blockIndex = 0
603 603 timeTuple = time.localtime(dataOut.utctime)
604 604 self.currentDay = timeTuple.tm_yday
605 605
606 606 def putMetadata(self):
607 607
608 608 fp = self.createMetadataFile()
609 609 self.writeMetadata(fp)
610 610 fp.close()
611 611 return
612 612
613 613 def createMetadataFile(self):
614 614 ext = self.ext
615 615 path = self.path
616 616 setFile = self.setFile
617 617
618 618 timeTuple = time.localtime(self.dataOut.utctime)
619 619
620 620 subfolder = ''
621 621 fullpath = os.path.join( path, subfolder )
622 622
623 623 if not( os.path.exists(fullpath) ):
624 624 os.mkdir(fullpath)
625 625 setFile = -1 #inicializo mi contador de seteo
626 626
627 627 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
628 628 fullpath = os.path.join( path, subfolder )
629 629
630 630 if not( os.path.exists(fullpath) ):
631 631 os.mkdir(fullpath)
632 632 setFile = -1 #inicializo mi contador de seteo
633 633
634 634 else:
635 635 filesList = os.listdir( fullpath )
636 636 filesList = sorted( filesList, key=str.lower )
637 637 if len( filesList ) > 0:
638 638 filesList = [k for k in filesList if k.startswith(self.metaoptchar)]
639 639 filen = filesList[-1]
640 640 # el filename debera tener el siguiente formato
641 641 # 0 1234 567 89A BCDE (hex)
642 642 # x YYYY DDD SSS .ext
643 643 if isNumber( filen[8:11] ):
644 644 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
645 645 else:
646 646 setFile = -1
647 647 else:
648 648 setFile = -1 #inicializo mi contador de seteo
649 649
650 650 if self.setType is None:
651 651 setFile += 1
652 652 file = '%s%4.4d%3.3d%03d%s' % (self.metaoptchar,
653 653 timeTuple.tm_year,
654 654 timeTuple.tm_yday,
655 655 setFile,
656 656 ext )
657 657 else:
658 658 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
659 659 file = '%s%4.4d%3.3d%04d%s' % (self.metaoptchar,
660 660 timeTuple.tm_year,
661 661 timeTuple.tm_yday,
662 662 setFile,
663 663 ext )
664 664
665 665 filename = os.path.join( path, subfolder, file )
666 666 self.metaFile = file
667 667 #Setting HDF5 File
668 668 fp = h5py.File(filename,'w')
669 669
670 670 return fp
671 671
672 672 def writeMetadata(self, fp):
673 673
674 674 grp = fp.create_group("Metadata")
675 675 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
676 676
677 677 for i in range(len(self.metadataList)):
678 678 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
679 679 return
680 680
681 681 def timeFlag(self):
682 682 currentTime = self.dataOut.utctime
683 683
684 684 if self.lastTime is None:
685 685 self.lastTime = currentTime
686 686
687 687 #Day
688 688 timeTuple = time.localtime(currentTime)
689 689 dataDay = timeTuple.tm_yday
690 690
691 691 #Time
692 692 timeDiff = currentTime - self.lastTime
693 693
694 694 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
695 695 if dataDay != self.currentDay:
696 696 self.currentDay = dataDay
697 697 return True
698 698 elif timeDiff > 3*60*60:
699 699 self.lastTime = currentTime
700 700 return True
701 701 else:
702 702 self.lastTime = currentTime
703 703 return False
704 704
705 705 def setNextFile(self):
706 706
707 707 ext = self.ext
708 708 path = self.path
709 709 setFile = self.setFile
710 710 mode = self.mode
711 711
712 712 timeTuple = time.localtime(self.dataOut.utctime)
713 713 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
714 714
715 715 fullpath = os.path.join( path, subfolder )
716 716
717 717 if os.path.exists(fullpath):
718 718 filesList = os.listdir( fullpath )
719 719 filesList = [k for k in filesList if 'M' in k]
720 720 if len( filesList ) > 0:
721 721 filesList = sorted( filesList, key=str.lower )
722 722 filen = filesList[-1]
723 723 # el filename debera tener el siguiente formato
724 724 # 0 1234 567 89A BCDE (hex)
725 725 # x YYYY DDD SSS .ext
726 726 if isNumber( filen[8:11] ):
727 727 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
728 728 else:
729 729 setFile = -1
730 730 else:
731 731 setFile = -1 #inicializo mi contador de seteo
732 732 else:
733 733 os.makedirs(fullpath)
734 734 setFile = -1 #inicializo mi contador de seteo
735 735
736 736 if self.setType is None:
737 737 setFile += 1
738 738 file = '%s%4.4d%3.3d%03d%s' % (self.optchar,
739 739 timeTuple.tm_year,
740 740 timeTuple.tm_yday,
741 741 setFile,
742 742 ext )
743 743 else:
744 744 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
745 745 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
746 746 timeTuple.tm_year,
747 747 timeTuple.tm_yday,
748 748 setFile,
749 749 ext )
750 750
751 751 filename = os.path.join( path, subfolder, file )
752 752
753 753 #Setting HDF5 File
754 754 fp = h5py.File(filename,'w')
755 755 #write metadata
756 756 self.writeMetadata(fp)
757 757 #Write data
758 758 grp = fp.create_group("Data")
759 759 ds = []
760 760 data = []
761 761 dsList = self.dsList
762 762 i = 0
763 763 while i < len(dsList):
764 764 dsInfo = dsList[i]
765 765 #One-dimension data
766 766 if dsInfo['mode'] == 0:
767 767 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
768 768 ds.append(ds0)
769 769 data.append([])
770 770 i += 1
771 771 continue
772 772
773 773 elif dsInfo['mode'] == 2:
774 774 grp0 = grp.create_group(dsInfo['variable'])
775 775 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
776 776 ds.append(ds0)
777 777 data.append([])
778 778 i += 1
779 779 continue
780 780
781 781 elif dsInfo['mode'] == 1:
782 782 grp0 = grp.create_group(dsInfo['variable'])
783 783
784 784 for j in range(dsInfo['dsNumber']):
785 785 dsInfo = dsList[i]
786 786 tableName = dsInfo['dsName']
787 787
788 788
789 789 if dsInfo['nDim'] == 3:
790 790 shape = dsInfo['shape'].astype(int)
791 791 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
792 792 else:
793 793 shape = int(dsInfo['shape'])
794 794 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
795 795
796 796 ds.append(ds0)
797 797 data.append([])
798 798 i += 1
799 799
800 800 fp.flush()
801 801 fp.close()
802 802
803 803 log.log('creating file: {}'.format(filename), 'Writing')
804 804 self.filename = filename
805 805 self.ds = ds
806 806 self.data = data
807 807 self.firsttime = True
808 808 self.blockIndex = 0
809 809 return
810 810
811 811 def putData(self):
812 812
813 813 if self.blockIndex == self.blocksPerFile or self.timeFlag():
814 814 self.setNextFile()
815 815
816 816 self.readBlock()
817 817 self.setBlock() #Prepare data to be written
818 818 self.writeBlock() #Write data
819 819
820 820 return
821 821
822 822 def readBlock(self):
823 823
824 824 '''
825 825 data Array configured
826 826
827 827
828 828 self.data
829 829 '''
830 830 dsList = self.dsList
831 831 ds = self.ds
832 832 #Setting HDF5 File
833 833 fp = h5py.File(self.filename,'r+')
834 834 grp = fp["Data"]
835 835 ind = 0
836 836
837 837 while ind < len(dsList):
838 838 dsInfo = dsList[ind]
839 839
840 840 if dsInfo['mode'] == 0:
841 841 ds0 = grp[dsInfo['variable']]
842 842 ds[ind] = ds0
843 843 ind += 1
844 844 else:
845 845
846 846 grp0 = grp[dsInfo['variable']]
847 847
848 848 for j in range(dsInfo['dsNumber']):
849 849 dsInfo = dsList[ind]
850 850 ds0 = grp0[dsInfo['dsName']]
851 851 ds[ind] = ds0
852 852 ind += 1
853 853
854 854 self.fp = fp
855 855 self.grp = grp
856 856 self.ds = ds
857 857
858 858 return
859 859
860 860 def setBlock(self):
861 861 '''
862 862 data Array configured
863 863
864 864
865 865 self.data
866 866 '''
867 867 #Creating Arrays
868 868 dsList = self.dsList
869 869 data = self.data
870 870 ind = 0
871 871
872 872 while ind < len(dsList):
873 873 dsInfo = dsList[ind]
874 874 dataAux = getattr(self.dataOut, dsInfo['variable'])
875 875
876 876 mode = dsInfo['mode']
877 877 nDim = dsInfo['nDim']
878 878
879 879 if mode == 0 or mode == 2 or nDim == 1:
880 880 data[ind] = dataAux
881 881 ind += 1
882 882 # elif nDim == 1:
883 883 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
884 884 # ind += 1
885 885 elif nDim == 2:
886 886 for j in range(dsInfo['dsNumber']):
887 887 data[ind] = dataAux[j,:]
888 888 ind += 1
889 889 elif nDim == 3:
890 890 for j in range(dsInfo['dsNumber']):
891 891 data[ind] = dataAux[:,j,:]
892 892 ind += 1
893 893
894 894 self.data = data
895 895 return
896 896
897 897 def writeBlock(self):
898 898 '''
899 899 Saves the block in the HDF5 file
900 900 '''
901 901 dsList = self.dsList
902 902
903 903 for i in range(len(self.ds)):
904 904 dsInfo = dsList[i]
905 905 nDim = dsInfo['nDim']
906 906 mode = dsInfo['mode']
907 907
908 908 # First time
909 909 if self.firsttime:
910 910 if type(self.data[i]) == numpy.ndarray:
911 911
912 912 if nDim == 3:
913 913 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
914 914 self.ds[i].resize(self.data[i].shape)
915 915 if mode == 2:
916 916 self.ds[i].resize(self.data[i].shape)
917 917 self.ds[i][:] = self.data[i]
918 918 else:
919 919
920 920 # From second time
921 921 # Meteors!
922 922 if mode == 2:
923 923 dataShape = self.data[i].shape
924 924 dsShape = self.ds[i].shape
925 925 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
926 926 self.ds[i][dsShape[0]:,:] = self.data[i]
927 927 # No dimension
928 928 elif mode == 0:
929 929 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
930 930 self.ds[i][0,-1] = self.data[i]
931 931 # One dimension
932 932 elif nDim == 1:
933 933 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
934 934 self.ds[i][-1,:] = self.data[i]
935 935 # Two dimension
936 936 elif nDim == 2:
937 937 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
938 938 self.ds[i][self.blockIndex,:] = self.data[i]
939 939 # Three dimensions
940 940 elif nDim == 3:
941 941 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
942 942 self.ds[i][:,:,-1] = self.data[i]
943 943
944 944 self.firsttime = False
945 945 self.blockIndex += 1
946 946
947 947 #Close to save changes
948 948 self.fp.flush()
949 949 self.fp.close()
950 950 return
951 951
952 952 def run(self, dataOut, path, blocksPerFile=10, metadataList=None, dataList=None, mode=None, setType=None):
953 953
954 954 self.dataOut = dataOut
955 955 if not(self.isConfig):
956 956 self.setup(dataOut, path=path, blocksPerFile=blocksPerFile,
957 957 metadataList=metadataList, dataList=dataList, mode=mode,
958 958 setType=setType)
959 959
960 960 self.isConfig = True
961 961 self.setNextFile()
962 962
963 963 self.putData()
964 964 return
965 965
966 966
967 967 @MPDecorator
968 968 class ParameterReader(JRODataReader,ProcessingUnit):
969 969 '''
970 970 Reads HDF5 format files
971 971 '''
972 972
973 973 ext = ".hdf5"
974 974 optchar = "D"
975 975 timezone = None
976 976 startTime = None
977 977 endTime = None
978 978 fileIndex = None
979 979 blockList = None #List to blocks to be read from the file
980 980 blocksPerFile = None #Number of blocks to be read
981 981 blockIndex = None
982 982 path = None
983 983 #List of Files
984 984 filenameList = None
985 985 datetimeList = None
986 986 #Hdf5 File
987 987 listMetaname = None
988 988 listMeta = None
989 989 listDataname = None
990 990 listData = None
991 991 listShapes = None
992 992 fp = None
993 993 #dataOut reconstruction
994 994 dataOut = None
995 995
996 996 def __init__(self):
997 997 ProcessingUnit.__init__(self)
998 998 self.dataOut = Parameters()
999 999 return
1000 1000
1001 1001 def setup(self, **kwargs):
1002 1002
1003 1003 path = kwargs['path']
1004 1004 startDate = kwargs['startDate']
1005 1005 endDate = kwargs['endDate']
1006 1006 startTime = kwargs['startTime']
1007 1007 endTime = kwargs['endTime']
1008 1008 walk = kwargs['walk']
1009 1009 if 'ext' in kwargs:
1010 1010 ext = kwargs['ext']
1011 1011 else:
1012 1012 ext = '.hdf5'
1013 1013 if 'timezone' in kwargs:
1014 1014 self.timezone = kwargs['timezone']
1015 1015 else:
1016 1016 self.timezone = 'lt'
1017 1017
1018 1018 print("[Reading] Searching files in offline mode ...")
1019 1019 pathList, filenameList = self.searchFilesOffLine(path, startDate=startDate, endDate=endDate,
1020 1020 startTime=startTime, endTime=endTime,
1021 1021 ext=ext, walk=walk)
1022 1022
1023 1023 if not(filenameList):
1024 1024 print("There is no files into the folder: %s"%(path))
1025 1025 sys.exit(-1)
1026 1026
1027 1027 self.fileIndex = -1
1028 1028 self.startTime = startTime
1029 1029 self.endTime = endTime
1030 1030 self.__readMetadata()
1031 1031 self.__setNextFileOffline()
1032 1032
1033 1033 return
1034 1034
1035 1035 def searchFilesOffLine(self, path, startDate=None, endDate=None, startTime=datetime.time(0,0,0), endTime=datetime.time(23,59,59), ext='.hdf5', walk=True):
1036 1036
1037 1037 expLabel = ''
1038 1038 self.filenameList = []
1039 1039 self.datetimeList = []
1040 1040 pathList = []
1041 1041 dateList, pathList = self.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
1042 1042
1043 1043 if dateList == []:
1044 1044 print("[Reading] No *%s files in %s from %s to %s)"%(ext, path,
1045 1045 datetime.datetime.combine(startDate,startTime).ctime(),
1046 1046 datetime.datetime.combine(endDate,endTime).ctime()))
1047 1047
1048 1048 return None, None
1049 1049
1050 1050 if len(dateList) > 1:
1051 1051 print("[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate))
1052 1052 else:
1053 1053 print("[Reading] data was found for the date %s" %(dateList[0]))
1054 1054
1055 1055 filenameList = []
1056 1056 datetimeList = []
1057 1057
1058 1058 for thisPath in pathList:
1059 1059
1060 1060 fileList = glob.glob1(thisPath, "*%s" %ext)
1061 1061 fileList.sort()
1062 1062
1063 1063 for file in fileList:
1064 1064
1065 1065 filename = os.path.join(thisPath,file)
1066 1066
1067 1067 if not isFileInDateRange(filename, startDate, endDate):
1068 1068 continue
1069 1069
1070 1070 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
1071 1071
1072 1072 if not(thisDatetime):
1073 1073 continue
1074 1074
1075 1075 filenameList.append(filename)
1076 1076 datetimeList.append(thisDatetime)
1077 1077
1078 1078 if not(filenameList):
1079 1079 print("[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime()))
1080 1080 return None, None
1081 1081
1082 1082 print("[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime))
1083 1083 print()
1084 1084
1085 1085 self.filenameList = filenameList
1086 1086 self.datetimeList = datetimeList
1087 1087
1088 1088 return pathList, filenameList
1089 1089
1090 1090 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
1091 1091
1092 1092 """
1093 1093 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
1094 1094
1095 1095 Inputs:
1096 1096 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
1097 1097 startDate : fecha inicial del rango seleccionado en formato datetime.date
1098 1098 endDate : fecha final del rango seleccionado en formato datetime.date
1099 1099 startTime : tiempo inicial del rango seleccionado en formato datetime.time
1100 1100 endTime : tiempo final del rango seleccionado en formato datetime.time
1101 1101
1102 1102 Return:
1103 1103 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
1104 1104 fecha especificado, de lo contrario retorna False.
1105 1105
1106 1106 Excepciones:
1107 1107 Si el archivo no existe o no puede ser abierto
1108 1108 Si la cabecera no puede ser leida.
1109 1109
1110 1110 """
1111 1111
1112 1112 try:
1113 1113 fp = h5py.File(filename, 'r')
1114 1114 grp1 = fp['Data']
1115 1115
1116 1116 except IOError:
1117 1117 traceback.print_exc()
1118 1118 raise IOError("The file %s can't be opened" %(filename))
1119 1119 #In case has utctime attribute
1120 1120 grp2 = grp1['utctime']
1121 1121 thisUtcTime = grp2.value[0]
1122 1122
1123 1123 fp.close()
1124 1124
1125 1125 if self.timezone == 'lt':
1126 1126 thisUtcTime -= 5*3600
1127 1127
1128 1128 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime + 5*3600)
1129 1129 thisDate = thisDatetime.date()
1130 1130 thisTime = thisDatetime.time()
1131 1131
1132 1132 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
1133 1133 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
1134 1134
1135 1135 #General case
1136 1136 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
1137 1137 #-----------o----------------------------o-----------
1138 1138 # startTime endTime
1139 1139
1140 1140 if endTime >= startTime:
1141 1141 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
1142 1142 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
1143 1143 return thisDatetime
1144 1144 return None
1145 1145
1146 1146 #If endTime < startTime then endTime belongs to the next day
1147 1147 #<<<<<<<<<<<o o>>>>>>>>>>>
1148 1148 #-----------o----------------------------o-----------
1149 1149 # endTime startTime
1150 1150
1151 1151 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
1152 1152 return None
1153 1153
1154 1154 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
1155 1155 return None
1156 1156
1157 1157 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
1158 1158 return None
1159 1159
1160 1160 return thisDatetime
1161 1161
1162 1162 def __setNextFileOffline(self):
1163 1163
1164 1164 self.fileIndex += 1
1165 1165 idFile = self.fileIndex
1166 1166
1167 1167 if not(idFile < len(self.filenameList)):
1168 1168 self.dataOut.error = 'No more files'
1169 1169 return 0
1170 1170
1171 1171 filename = self.filenameList[idFile]
1172 1172 self.fp = h5py.File(filename, 'r')
1173 1173 self.filename = filename
1174 1174
1175 1175 print("Setting the file: %s"%self.filename)
1176 1176
1177 1177 self.__setBlockList()
1178 1178 self.__readData()
1179 1179 self.blockIndex = 0
1180 1180 return 1
1181 1181
1182 1182 def __setBlockList(self):
1183 1183 '''
1184 1184 Selects the data within the times defined
1185 1185
1186 1186 self.fp
1187 1187 self.startTime
1188 1188 self.endTime
1189 1189 self.blockList
1190 1190 self.blocksPerFile
1191 1191
1192 1192 '''
1193 1193 fp = self.fp
1194 1194 startTime = self.startTime
1195 1195 endTime = self.endTime
1196 1196
1197 1197 grp = fp['Data']
1198 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
1198 thisUtcTime = grp['utctime'].value
1199 1199
1200 1200 if self.timezone == 'lt':
1201 1201 thisUtcTime -= 5*3600
1202 1202
1203 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime + 5*3600)
1203 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
1204 1204
1205 1205 thisDate = thisDatetime.date()
1206 1206 thisTime = thisDatetime.time()
1207 1207
1208 1208 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
1209 1209 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
1210 1210
1211 1211 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
1212 1212
1213 1213 self.blockList = ind
1214 1214 self.blocksPerFile = len(ind)
1215
1216 1215 return
1217 1216
1218 1217 def __readMetadata(self):
1219 1218 '''
1220 1219 Reads Metadata
1221 1220 '''
1222 1221
1223 1222 filename = self.filenameList[0]
1224 1223 fp = h5py.File(filename, 'r')
1225 1224 gp = fp['Metadata']
1226 1225 listMetaname = []
1227 1226 listMetadata = []
1228 1227
1229 1228 for item in list(gp.items()):
1230 1229 name = item[0]
1231 1230
1232 1231 if name=='variables':
1233 1232 table = gp[name][:]
1234 1233 listShapes = {}
1235 1234 for shapes in table:
1236 1235 listShapes[shapes[0].decode()] = numpy.array([shapes[1]])
1237 1236 else:
1238 1237 data = gp[name].value
1239 1238 listMetaname.append(name)
1240 1239 listMetadata.append(data)
1241 1240
1242 1241 self.listShapes = listShapes
1243 1242 self.listMetaname = listMetaname
1244 1243 self.listMeta = listMetadata
1245 1244
1246 1245 fp.close()
1247 1246 return
1248 1247
1249 1248 def __readData(self):
1250 1249
1251 1250 grp = self.fp['Data']
1252 1251 listdataname = []
1253 1252 listdata = []
1254 1253
1255 1254 for item in list(grp.items()):
1256 1255 name = item[0]
1257 1256 listdataname.append(name)
1258 1257 dim = self.listShapes[name][0]
1259 1258 if dim == 0:
1260 1259 array = grp[name].value
1261 1260 else:
1262 1261 array = []
1263 1262 for i in range(dim):
1264 1263 array.append(grp[name]['table{:02d}'.format(i)].value)
1265 1264 array = numpy.array(array)
1266 1265
1267 1266 listdata.append(array)
1268 1267
1269 1268 self.listDataname = listdataname
1270 1269 self.listData = listdata
1271 1270 return
1272 1271
1273 1272 def getData(self):
1274 1273
1275 1274 for i in range(len(self.listMeta)):
1276 1275 setattr(self.dataOut, self.listMetaname[i], self.listMeta[i])
1277 1276
1278 1277 for j in range(len(self.listData)):
1279 1278 dim = self.listShapes[self.listDataname[j]][0]
1280 1279 if dim == 0:
1281 1280 setattr(self.dataOut, self.listDataname[j], self.listData[j][self.blockIndex])
1282 1281 else:
1283 1282 setattr(self.dataOut, self.listDataname[j], self.listData[j][:,self.blockIndex])
1284 1283
1285 1284 self.dataOut.flagNoData = False
1286 1285 self.blockIndex += 1
1287 1286
1288 1287 return
1289 1288
1290 1289 def run(self, **kwargs):
1291 1290
1292 1291 if not(self.isConfig):
1293 1292 self.setup(**kwargs)
1294 1293 self.isConfig = True
1295 1294
1296 1295 if self.blockIndex == self.blocksPerFile:
1297 if not(self.__setNextFileOffline()):
1296 if not(self.__setNextFileOffline()):
1298 1297 self.dataOut.flagNoData = True
1299 1298 return 0
1300 1299
1301 1300 self.getData()
1302 1301
1303 1302 return
1304 1303
1305 1304 @MPDecorator
1306 1305 class ParameterWriter(Operation):
1307 1306 '''
1308 1307 HDF5 Writer, stores parameters data in HDF5 format files
1309 1308
1310 1309 path: path where the files will be stored
1311 1310 blocksPerFile: number of blocks that will be saved in per HDF5 format file
1312 1311 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
1313 1312 metadataList: list of attributes that will be stored as metadata
1314 1313 dataList: list of attributes that will be stores as data
1315 1314 '''
1316 1315
1317 1316
1318 1317 ext = ".hdf5"
1319 1318 optchar = "D"
1320 1319 metaoptchar = "M"
1321 1320 metaFile = None
1322 1321 filename = None
1323 1322 path = None
1324 1323 setFile = None
1325 1324 fp = None
1326 1325 grp = None
1327 1326 ds = None
1328 1327 firsttime = True
1329 1328 #Configurations
1330 1329 blocksPerFile = None
1331 1330 blockIndex = None
1332 1331 dataOut = None
1333 1332 #Data Arrays
1334 1333 dataList = None
1335 1334 metadataList = None
1336 1335 dsList = None #List of dictionaries with dataset properties
1337 1336 tableDim = None
1338 1337 dtype = [('name', 'S20'),('nDim', 'i')]
1339 1338 currentDay = None
1340 1339 lastTime = None
1341 1340
1342 1341 def __init__(self):
1343 1342
1344 1343 Operation.__init__(self)
1345 1344 return
1346 1345
1347 1346 def setup(self, path=None, blocksPerFile=10, metadataList=None, dataList=None, setType=None):
1348 1347 self.path = path
1349 1348 self.blocksPerFile = blocksPerFile
1350 1349 self.metadataList = metadataList
1351 1350 self.dataList = dataList
1352 1351 self.setType = setType
1353 1352
1354 1353 tableList = []
1355 1354 dsList = []
1356 1355
1357 1356 for i in range(len(self.dataList)):
1358 1357 dsDict = {}
1359 1358 dataAux = getattr(self.dataOut, self.dataList[i])
1360 1359 dsDict['variable'] = self.dataList[i]
1361 1360
1362 1361 if dataAux is None:
1363 1362 continue
1364 1363 elif isinstance(dataAux, (int, float, numpy.integer, numpy.float)):
1365 1364 dsDict['nDim'] = 0
1366 1365 else:
1367 1366 dsDict['nDim'] = len(dataAux.shape)
1368 1367 dsDict['shape'] = dataAux.shape
1369 1368 dsDict['dsNumber'] = dataAux.shape[0]
1370 1369
1371 1370 dsList.append(dsDict)
1372 1371 tableList.append((self.dataList[i], dsDict['nDim']))
1373 1372
1374 1373 self.dsList = dsList
1375 1374 self.tableDim = numpy.array(tableList, dtype=self.dtype)
1376 1375 self.currentDay = self.dataOut.datatime.date()
1377 1376
1378 1377 def timeFlag(self):
1379 1378 currentTime = self.dataOut.utctime
1380 1379 timeTuple = time.localtime(currentTime)
1381 1380 dataDay = timeTuple.tm_yday
1382 1381
1383 1382 if self.lastTime is None:
1384 1383 self.lastTime = currentTime
1385 1384 self.currentDay = dataDay
1386 1385 return False
1387 1386
1388 1387 timeDiff = currentTime - self.lastTime
1389 1388
1390 1389 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
1391 1390 if dataDay != self.currentDay:
1392 1391 self.currentDay = dataDay
1393 1392 return True
1394 1393 elif timeDiff > 3*60*60:
1395 1394 self.lastTime = currentTime
1396 1395 return True
1397 1396 else:
1398 1397 self.lastTime = currentTime
1399 1398 return False
1400 1399
1401 1400 def run(self, dataOut, path, blocksPerFile=10, metadataList=None, dataList=None, setType=None):
1402 1401
1403 1402 self.dataOut = dataOut
1404 1403 if not(self.isConfig):
1405 1404 self.setup(path=path, blocksPerFile=blocksPerFile,
1406 1405 metadataList=metadataList, dataList=dataList,
1407 1406 setType=setType)
1408 1407
1409 1408 self.isConfig = True
1410 1409 self.setNextFile()
1411 1410
1412 1411 self.putData()
1413 1412 return
1414 1413
1415 1414 def setNextFile(self):
1416 1415
1417 1416 ext = self.ext
1418 1417 path = self.path
1419 1418 setFile = self.setFile
1420 1419
1421 1420 timeTuple = time.localtime(self.dataOut.utctime)
1422 1421 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
1423 1422 fullpath = os.path.join(path, subfolder)
1424 1423
1425 1424 if os.path.exists(fullpath):
1426 1425 filesList = os.listdir(fullpath)
1427 1426 filesList = [k for k in filesList if k.startswith(self.optchar)]
1428 1427 if len( filesList ) > 0:
1429 1428 filesList = sorted(filesList, key=str.lower)
1430 1429 filen = filesList[-1]
1431 1430 # el filename debera tener el siguiente formato
1432 1431 # 0 1234 567 89A BCDE (hex)
1433 1432 # x YYYY DDD SSS .ext
1434 1433 if isNumber(filen[8:11]):
1435 1434 setFile = int(filen[8:11]) #inicializo mi contador de seteo al seteo del ultimo file
1436 1435 else:
1437 1436 setFile = -1
1438 1437 else:
1439 1438 setFile = -1 #inicializo mi contador de seteo
1440 1439 else:
1441 1440 os.makedirs(fullpath)
1442 1441 setFile = -1 #inicializo mi contador de seteo
1443 1442
1444 1443 if self.setType is None:
1445 1444 setFile += 1
1446 1445 file = '%s%4.4d%3.3d%03d%s' % (self.optchar,
1447 1446 timeTuple.tm_year,
1448 1447 timeTuple.tm_yday,
1449 1448 setFile,
1450 1449 ext )
1451 1450 else:
1452 1451 setFile = timeTuple.tm_hour*60+timeTuple.tm_min
1453 1452 file = '%s%4.4d%3.3d%04d%s' % (self.optchar,
1454 1453 timeTuple.tm_year,
1455 1454 timeTuple.tm_yday,
1456 1455 setFile,
1457 1456 ext )
1458 1457
1459 1458 self.filename = os.path.join( path, subfolder, file )
1460 1459
1461 1460 #Setting HDF5 File
1462 1461 self.fp = h5py.File(self.filename, 'w')
1463 1462 #write metadata
1464 1463 self.writeMetadata(self.fp)
1465 1464 #Write data
1466 1465 self.writeData(self.fp)
1467 1466
1468 1467 def writeMetadata(self, fp):
1469 1468
1470 1469 grp = fp.create_group("Metadata")
1471 1470 grp.create_dataset('variables', data=self.tableDim, dtype=self.dtype)
1472 1471
1473 1472 for i in range(len(self.metadataList)):
1474 1473 if not hasattr(self.dataOut, self.metadataList[i]):
1475 1474 log.warning('Metadata: `{}` not found'.format(self.metadataList[i]), self.name)
1476 1475 continue
1477 1476 value = getattr(self.dataOut, self.metadataList[i])
1478 1477 grp.create_dataset(self.metadataList[i], data=value)
1479 1478 return
1480 1479
1481 1480 def writeData(self, fp):
1482 1481
1483 1482 grp = fp.create_group("Data")
1484 1483 dtsets = []
1485 1484 data = []
1486 1485
1487 1486 for dsInfo in self.dsList:
1488 1487 if dsInfo['nDim'] == 0:
1489 1488 ds = grp.create_dataset(
1490 1489 dsInfo['variable'],
1491 1490 (self.blocksPerFile, ),
1492 1491 chunks=True,
1493 1492 dtype=numpy.float64)
1494 1493 dtsets.append(ds)
1495 1494 data.append((dsInfo['variable'], -1))
1496 1495 else:
1497 1496 sgrp = grp.create_group(dsInfo['variable'])
1498 1497 for i in range(dsInfo['dsNumber']):
1499 1498 ds = sgrp.create_dataset(
1500 1499 'table{:02d}'.format(i),
1501 1500 (self.blocksPerFile, ) + dsInfo['shape'][1:],
1502 1501 chunks=True)
1503 1502 dtsets.append(ds)
1504 1503 data.append((dsInfo['variable'], i))
1505 1504 fp.flush()
1506 1505
1507 1506 log.log('creating file: {}'.format(fp.filename), 'Writing')
1508 1507
1509 1508 self.ds = dtsets
1510 1509 self.data = data
1511 1510 self.firsttime = True
1512 1511 self.blockIndex = 0
1513 1512 return
1514 1513
1515 1514 def putData(self):
1516 1515
1517 1516 if (self.blockIndex == self.blocksPerFile) or self.timeFlag():
1518 1517 self.closeFile()
1519 1518 self.setNextFile()
1520 1519
1521 1520 for i, ds in enumerate(self.ds):
1522 1521 attr, ch = self.data[i]
1523 1522 if ch == -1:
1524 1523 ds[self.blockIndex] = getattr(self.dataOut, attr)
1525 1524 else:
1526 1525 ds[self.blockIndex] = getattr(self.dataOut, attr)[ch]
1527 1526
1528 1527 self.fp.flush()
1529 1528 self.blockIndex += 1
1530 1529
1531 1530 return
1532 1531
1533 1532 def closeFile(self):
1534 1533
1535 1534 if self.blockIndex != self.blocksPerFile:
1536 1535 for ds in self.ds:
1537 1536 ds.resize(self.blockIndex, axis=0)
1538 1537
1539 1538 self.fp.flush()
1540 1539 self.fp.close()
1541 1540
1542 1541 def close(self):
1543 1542
1544 1543 self.closeFile()
General Comments 0
You need to be logged in to leave comments. Login now