##// END OF EJS Templates
-bug fix in HDF5 Writer Module
Julio Valdez -
r810:7e14525bff70
parent child
Show More
@@ -1,1054 +1,1201
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
10 10 # from jroIO_base import *
11 11 from schainpy.model.io.jroIO_base import *
12 12 import schainpy
13 13
14 14
15 15 class HDF5Reader(ProcessingUnit):
16 16 '''
17 17 Reads HDF5 format files
18 18
19 19 path
20 20
21 21 startDate
22 22
23 23 endDate
24 24
25 25 startTime
26 26
27 27 endTime
28 28 '''
29 29
30 30 ext = ".hdf5"
31 31
32 32 optchar = "D"
33 33
34 34 timezone = None
35 35
36 36 startTime = None
37 37
38 38 endTime = None
39 39
40 40 fileIndex = None
41 41
42 42 utcList = None #To select data in the utctime list
43 43
44 44 blockList = None #List to blocks to be read from the file
45 45
46 46 blocksPerFile = None #Number of blocks to be read
47 47
48 48 blockIndex = None
49 49
50 50 path = None
51 51
52 52 #List of Files
53 53
54 54 filenameList = None
55 55
56 56 datetimeList = None
57 57
58 58 #Hdf5 File
59 59
60 60 listMetaname = None
61 61
62 62 listMeta = None
63 63
64 64 listDataname = None
65 65
66 66 listData = None
67 67
68 68 listShapes = None
69 69
70 70 fp = None
71 71
72 72 #dataOut reconstruction
73 73
74 74 dataOut = None
75 75
76 76
77 77 def __init__(self):
78 78 self.dataOut = Parameters()
79 79 return
80 80
81 81 def setup(self, **kwargs):
82 82
83 83 path = kwargs['path']
84 84 startDate = kwargs['startDate']
85 85 endDate = kwargs['endDate']
86 86 startTime = kwargs['startTime']
87 87 endTime = kwargs['endTime']
88 88 walk = kwargs['walk']
89 89 if kwargs.has_key('ext'):
90 90 ext = kwargs['ext']
91 91 else:
92 92 ext = '.hdf5'
93 93
94 94 print "[Reading] Searching files in offline mode ..."
95 95 pathList, filenameList = self.__searchFilesOffLine(path, startDate=startDate, endDate=endDate,
96 96 startTime=startTime, endTime=endTime,
97 97 ext=ext, walk=walk)
98 98
99 99 if not(filenameList):
100 100 print "There is no files into the folder: %s"%(path)
101 101 sys.exit(-1)
102 102
103 103 self.fileIndex = -1
104 104 self.startTime = startTime
105 105 self.endTime = endTime
106 106
107 107 self.__readMetadata()
108 108
109 109 self.__setNextFileOffline()
110 110
111 111 return
112 112
113 113 def __searchFilesOffLine(self,
114 114 path,
115 115 startDate=None,
116 116 endDate=None,
117 117 startTime=datetime.time(0,0,0),
118 118 endTime=datetime.time(23,59,59),
119 119 ext='.hdf5',
120 120 walk=True):
121 121
122 122 expLabel = ''
123 123 self.filenameList = []
124 124 self.datetimeList = []
125 125
126 126 pathList = []
127 127
128 128 JRODataObj = JRODataReader()
129 129 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
130 130
131 131 if dateList == []:
132 132 print "[Reading] No *%s files in %s from %s to %s)"%(ext, path,
133 133 datetime.datetime.combine(startDate,startTime).ctime(),
134 134 datetime.datetime.combine(endDate,endTime).ctime())
135 135
136 136 return None, None
137 137
138 138 if len(dateList) > 1:
139 139 print "[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate)
140 140 else:
141 141 print "[Reading] data was found for the date %s" %(dateList[0])
142 142
143 143 filenameList = []
144 144 datetimeList = []
145 145
146 146 #----------------------------------------------------------------------------------
147 147
148 148 for thisPath in pathList:
149 149 # thisPath = pathList[pathDict[file]]
150 150
151 151 fileList = glob.glob1(thisPath, "*%s" %ext)
152 152 fileList.sort()
153 153
154 154 for file in fileList:
155 155
156 156 filename = os.path.join(thisPath,file)
157 157
158 158 if not isFileInDateRange(filename, startDate, endDate):
159 159 continue
160 160
161 161 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
162 162
163 163 if not(thisDatetime):
164 164 continue
165 165
166 166 filenameList.append(filename)
167 167 datetimeList.append(thisDatetime)
168 168
169 169 if not(filenameList):
170 170 print "[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime())
171 171 return None, None
172 172
173 173 print "[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime)
174 174 print
175 175
176 176 for i in range(len(filenameList)):
177 177 print "[Reading] %s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
178 178
179 179 self.filenameList = filenameList
180 180 self.datetimeList = datetimeList
181 181
182 182 return pathList, filenameList
183 183
184 184 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
185 185
186 186 """
187 187 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
188 188
189 189 Inputs:
190 190 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
191 191
192 192 startDate : fecha inicial del rango seleccionado en formato datetime.date
193 193
194 194 endDate : fecha final del rango seleccionado en formato datetime.date
195 195
196 196 startTime : tiempo inicial del rango seleccionado en formato datetime.time
197 197
198 198 endTime : tiempo final del rango seleccionado en formato datetime.time
199 199
200 200 Return:
201 201 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
202 202 fecha especificado, de lo contrario retorna False.
203 203
204 204 Excepciones:
205 205 Si el archivo no existe o no puede ser abierto
206 206 Si la cabecera no puede ser leida.
207 207
208 208 """
209 209
210 210 try:
211 211 fp = h5py.File(filename,'r')
212 212 grp1 = fp['Data']
213 213
214 214 except IOError:
215 215 traceback.print_exc()
216 216 raise IOError, "The file %s can't be opened" %(filename)
217 217 #chino rata
218 218 #In case has utctime attribute
219 219 grp2 = grp1['utctime']
220 220 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
221 221 thisUtcTime = grp2.value[0]
222 222
223 223 fp.close()
224 224
225 225 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
226 226 thisDate = thisDatetime.date()
227 227 thisTime = thisDatetime.time()
228 228
229 229 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
230 230 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
231 231
232 232 #General case
233 233 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
234 234 #-----------o----------------------------o-----------
235 235 # startTime endTime
236 236
237 237 if endTime >= startTime:
238 238 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
239 239 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
240 240 return thisDatetime
241 241 return None
242 242
243 243 #If endTime < startTime then endTime belongs to the next day
244 244 #<<<<<<<<<<<o o>>>>>>>>>>>
245 245 #-----------o----------------------------o-----------
246 246 # endTime startTime
247 247
248 248 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
249 249 return None
250 250
251 251 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
252 252 return None
253 253
254 254 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
255 255 return None
256 256
257 257 return thisDatetime
258 258
259 259 def __setNextFileOffline(self):
260 260
261 261 self.fileIndex += 1
262 262 idFile = self.fileIndex
263 263
264 264 if not(idFile < len(self.filenameList)):
265 265 print "No more Files"
266 266 return 0
267 267
268 268 filename = self.filenameList[idFile]
269 269
270 270 filePointer = h5py.File(filename,'r')
271 271
272 272 self.filename = filename
273 273
274 274 self.fp = filePointer
275 275
276 276 print "Setting the file: %s"%self.filename
277 277
278 278 # self.__readMetadata()
279 279 self.__setBlockList()
280 280 self.__readData()
281 281 # self.nRecords = self.fp['Data'].attrs['blocksPerFile']
282 282 # self.nRecords = self.fp['Data'].attrs['nRecords']
283 283 self.blockIndex = 0
284 284 return 1
285 285
286 286 def __setBlockList(self):
287 287 '''
288 288 Selects the data within the times defined
289 289
290 290 self.fp
291 291 self.startTime
292 292 self.endTime
293 293
294 294 self.blockList
295 295 self.blocksPerFile
296 296
297 297 '''
298 298 fp = self.fp
299 299 startTime = self.startTime
300 300 endTime = self.endTime
301 301
302 302 grp = fp['Data']
303 303 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
304 304
305 305 if self.timezone == 'lt':
306 306 thisUtcTime -= 5*3600
307 307
308 308 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
309 309 thisDate = thisDatetime.date()
310 310 thisTime = thisDatetime.time()
311 311
312 312 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
313 313 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
314 314
315 315 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
316 316
317 317 self.blockList = ind
318 318 self.blocksPerFile = len(ind)
319 319
320 320 return
321 321
322 322 def __readMetadata(self):
323 323 '''
324 324 Reads Metadata
325 325
326 326 self.pathMeta
327 327
328 328 self.listShapes
329 329 self.listMetaname
330 330 self.listMeta
331 331
332 332 '''
333 333
334 334 # grp = self.fp['Data']
335 335 # pathMeta = os.path.join(self.path, grp.attrs['metadata'])
336 336 #
337 337 # if pathMeta == self.pathMeta:
338 338 # return
339 339 # else:
340 340 # self.pathMeta = pathMeta
341 341 #
342 342 # filePointer = h5py.File(self.pathMeta,'r')
343 343 # groupPointer = filePointer['Metadata']
344 344
345 345 filename = self.filenameList[0]
346 346
347 347 fp = h5py.File(filename,'r')
348 348
349 349 gp = fp['Metadata']
350 350
351 351 listMetaname = []
352 352 listMetadata = []
353 353 for item in gp.items():
354 354 name = item[0]
355 355
356 356 if name=='array dimensions':
357 357 table = gp[name][:]
358 358 listShapes = {}
359 359 for shapes in table:
360 360 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
361 361 else:
362 362 data = gp[name].value
363 363 listMetaname.append(name)
364 364 listMetadata.append(data)
365 365
366 366 # if name=='type':
367 367 # self.__initDataOut(data)
368 368
369 369 self.listShapes = listShapes
370 370 self.listMetaname = listMetaname
371 371 self.listMeta = listMetadata
372 372
373 373 fp.close()
374 374 return
375 375
376 376 def __readData(self):
377 377 grp = self.fp['Data']
378 378 listdataname = []
379 379 listdata = []
380 380
381 381 for item in grp.items():
382 382 name = item[0]
383 383 listdataname.append(name)
384 384
385 385 array = self.__setDataArray(grp[name],self.listShapes[name])
386 386 listdata.append(array)
387 387
388 388 self.listDataname = listdataname
389 389 self.listData = listdata
390 390 return
391 391
392 392 def __setDataArray(self, dataset, shapes):
393 393
394 394 nDims = shapes[0]
395 395
396 396 nDim2 = shapes[1] #Dimension 0
397 397
398 398 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
399 399
400 400 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
401 401
402 402 mode = shapes[4] #Mode of storing
403 403
404 404 blockList = self.blockList
405 405
406 406 blocksPerFile = self.blocksPerFile
407 407
408 408 #Depending on what mode the data was stored
409 409 # if mode == 0: #Divided in channels
410 410 # strds = 'channel'
411 411 # nDatas = nDim2
412 412 # newShapes = (blocksPerFile,nDim1,nDim0)
413 413 if mode == 1: #Divided in parameter
414 414 strds = 'param'
415 415 nDatas = nDim1
416 416 newShapes = (blocksPerFile,nDim2,nDim0)
417 417 elif mode==2: #Concatenated in a table
418 418 strds = 'table0'
419 419 arrayData = dataset[strds].value
420 420 #Selecting part of the dataset
421 421 utctime = arrayData[:,0]
422 422 u, indices = numpy.unique(utctime, return_index=True)
423 423
424 424 if blockList.size != indices.size:
425 425 indMin = indices[blockList[0]]
426 426 indMax = indices[blockList[-1] + 1]
427 427 arrayData = arrayData[indMin:indMax,:]
428 428 return arrayData
429 429
430 430 #------- One dimension ---------------
431 431 if nDims == 1:
432 432 arrayData = dataset.value.astype(numpy.float)[0][blockList]
433 433
434 434 #------- Two dimensions -----------
435 435 elif nDims == 2:
436 436 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
437 437 newShapes = (blocksPerFile,nDim0)
438 438 nDatas = nDim1
439 439
440 440 for i in range(nDatas):
441 441 data = dataset[strds + str(i)].value
442 442 arrayData[:,i,:] = data[blockList,:]
443 443
444 444 #------- Three dimensions ---------
445 445 else:
446 446 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
447 447 for i in range(nDatas):
448 448
449 449 data = dataset[strds + str(i)].value
450 450 data = data[blockList,:,:]
451 451 data = data.reshape(newShapes)
452 452 # if mode == 0:
453 453 # arrayData[:,i,:,:] = data
454 454 # else:
455 455 arrayData[:,:,i,:] = data
456 456
457 457 return arrayData
458 458
459 459 def __setDataOut(self):
460 460 listMeta = self.listMeta
461 461 listMetaname = self.listMetaname
462 462 listDataname = self.listDataname
463 463 listData = self.listData
464 464 listShapes = self.listShapes
465 465
466 466 blockIndex = self.blockIndex
467 467 # blockList = self.blockList
468 468
469 469 for i in range(len(listMeta)):
470 470 setattr(self.dataOut,listMetaname[i],listMeta[i])
471 471
472 472 for j in range(len(listData)):
473 473 nShapes = listShapes[listDataname[j]][0]
474 474 mode = listShapes[listDataname[j]][4]
475 475 if nShapes == 1:
476 476 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
477 477 elif nShapes > 1:
478 478 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
479 479 #Mode Meteors
480 480 elif mode ==2:
481 481 selectedData = self.__selectDataMode2(listData[j], blockIndex)
482 482 setattr(self.dataOut, listDataname[j], selectedData)
483 483 return
484 484
485 485 def __selectDataMode2(self, data, blockIndex):
486 486 utctime = data[:,0]
487 487 aux, indices = numpy.unique(utctime, return_inverse=True)
488 488 selInd = numpy.where(indices == blockIndex)[0]
489 489 selData = data[selInd,:]
490 490
491 491 return selData
492 492
493 493 def getData(self):
494 494
495 495 # if self.flagNoMoreFiles:
496 496 # self.dataOut.flagNoData = True
497 497 # print 'Process finished'
498 498 # return 0
499 499 #
500 500 if self.blockIndex==self.blocksPerFile:
501 501 if not( self.__setNextFileOffline() ):
502 502 self.dataOut.flagNoData = True
503 503 return 0
504 504
505 505 # if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
506 506 # self.dataOut.flagNoData = True
507 507 # return 0
508 508 # self.__readData()
509 509 self.__setDataOut()
510 510 self.dataOut.flagNoData = False
511 511
512 512 self.blockIndex += 1
513 513
514 514 return
515 515
516 516 def run(self, **kwargs):
517 517
518 518 if not(self.isConfig):
519 519 self.setup(**kwargs)
520 520 # self.setObjProperties()
521 521 self.isConfig = True
522 522
523 523 self.getData()
524 524
525 525 return
526 526
527 527 class HDF5Writer(Operation):
528 528 '''
529 529 HDF5 Writer, stores parameters data in HDF5 format files
530 530
531 531 path: path where the files will be stored
532 532
533 533 blocksPerFile: number of blocks that will be saved in per HDF5 format file
534 534
535 535 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
536 536
537 537 metadataList: list of attributes that will be stored as metadata
538 538
539 539 dataList: list of attributes that will be stores as data
540 540
541 541 '''
542 542
543 543
544 544 ext = ".hdf5"
545 545
546 546 optchar = "D"
547 547
548 548 metaoptchar = "M"
549 549
550 550 metaFile = None
551 551
552 552 filename = None
553 553
554 554 path = None
555 555
556 556 setFile = None
557 557
558 558 fp = None
559 559
560 560 grp = None
561 561
562 562 ds = None
563 563
564 564 firsttime = True
565 565
566 566 #Configurations
567 567
568 568 blocksPerFile = None
569 569
570 570 blockIndex = None
571 571
572 572 dataOut = None
573 573
574 574 #Data Arrays
575 575
576 576 dataList = None
577 577
578 578 metadataList = None
579 579
580 arrayDim = None
580 # arrayDim = None
581
582 dsList = None #List of dictionaries
581 583
582 584 tableDim = None
583 585
584 586 # dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
585 587
586 588 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
587 589
588 590 mode = None
589 591
590 592 nDatas = None #Number of datasets to be stored per array
591 593
592 594 nDims = None #Number Dimensions in each dataset
593 595
594 596 nDimsForDs = None
595 597
596 598 currentDay = None
597 599
598 600 def __init__(self):
599 601
600 602 Operation.__init__(self)
601 603 self.isConfig = False
602 604 return
603 605
604 606 def setup(self, dataOut, **kwargs):
605 607
606 608 self.path = kwargs['path']
607 609
608 610 if kwargs.has_key('blocksPerFile'):
609 611 self.blocksPerFile = kwargs['blocksPerFile']
610 612 else:
611 613 self.blocksPerFile = 10
612 614
613 615 self.metadataList = kwargs['metadataList']
614
615 616 self.dataList = kwargs['dataList']
616
617 617 self.dataOut = dataOut
618 618
619 619 if kwargs.has_key('mode'):
620 620 mode = kwargs['mode']
621 621
622 622 if type(mode) == int:
623 623 mode = numpy.zeros(len(self.dataList)) + mode
624 624 else:
625 625 mode = numpy.ones(len(self.dataList))
626 626
627 627 self.mode = mode
628 628
629 629 arrayDim = numpy.zeros((len(self.dataList),5))
630 630
631 631 #Table dimensions
632
633 632 dtype0 = self.dtype
634
635 633 tableList = []
636 634
635 #Dictionary and list of tables
636 dsList = []
637
637 638 for i in range(len(self.dataList)):
638
639 dsDict = {}
639 640 dataAux = getattr(self.dataOut, self.dataList[i])
640
641 dsDict['variable'] = self.dataList[i]
641 642 #--------------------- Conditionals ------------------------
642 643 #There is no data
643 644 if dataAux == None:
644 645 return 0
645 646
646 647 #Not array, just a number
647 if type(dataAux)==float or type(dataAux)==int:
648 #Mode 0
649 if type(dataAux)==float or type(dataAux)==int:
650 dsDict['mode'] = 0
651 dsDict['nDim'] = 0
648 652 arrayDim[i,0] = 1
649 mode[i] = 0
650
651 #Mode meteors
652 elif mode[i] == 2:
653 dsList.append(dsDict)
654
655 #Mode 2: meteors
656 elif mode[i] == 2:
657 # dsDict['nDim'] = 0
658 dsDict['dsName'] = 'table0'
659 dsDict['mode'] = 2 # Mode meteors
660 dsDict['shape'] = dataAux.shape[-1]
661 dsDict['nDim'] = 0
662
653 663 arrayDim[i,3] = dataAux.shape[-1]
654 664 arrayDim[i,4] = mode[i] #Mode the data was stored
665
666 dsList.append(dsDict)
655 667
656 #All the rest
668 #Mode 1
657 669 else:
658 670 arrayDim0 = dataAux.shape #Data dimensions
659 671 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
660 672 arrayDim[i,4] = mode[i] #Mode the data was stored
661 673
674 strtable = 'table'
675 dsDict['mode'] = 1 # Mode parameters
676
662 677 # Three-dimension arrays
663 678 if len(arrayDim0) == 3:
664 679 arrayDim[i,1:-1] = numpy.array(arrayDim0)
665
680 nTables = int(arrayDim[i,2])
681 dsDict['dsNumber'] = nTables
682 dsDict['shape'] = arrayDim[i,2:4]
683 dsDict['nDim'] = 3
684
685 for j in range(nTables):
686 dsDict = dsDict.copy()
687 dsDict['dsName'] = strtable + str(j)
688 dsList.append(dsDict)
689
666 690 # Two-dimension arrays
667 691 elif len(arrayDim0) == 2:
668 692 arrayDim[i,2:-1] = numpy.array(arrayDim0)
669
693 nTables = int(arrayDim[i,2])
694 dsDict['dsNumber'] = nTables
695 dsDict['shape'] = arrayDim[i,3]
696 dsDict['nDim'] = 2
697
698 for j in range(nTables):
699 dsDict = dsDict.copy()
700 dsDict['dsName'] = strtable + str(j)
701 dsList.append(dsDict)
702
670 703 # One-dimension arrays
671 704 elif len(arrayDim0) == 1:
672 arrayDim[i,3] = arrayDim0
673
674 # No array, just a number
675 elif len(arrayDim0) == 0:
676 arrayDim[i,0] = 1
677 arrayDim[i,3] = 1
705 arrayDim[i,3] = arrayDim0[0]
706 dsDict['shape'] = arrayDim0[0]
707 dsDict['dsNumber'] = 1
708 dsDict['dsName'] = strtable + str(0)
709 dsDict['nDim'] = 1
710 dsList.append(dsDict)
678 711
679 712 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
680 713 tableList.append(table)
681 714
682 self.arrayDim = arrayDim
715 # self.arrayDim = arrayDim
716 self.dsList = dsList
683 717 self.tableDim = numpy.array(tableList, dtype = dtype0)
684 718 self.blockIndex = 0
685 719
686 720 timeTuple = time.localtime(dataOut.utctime)
687 721 self.currentDay = timeTuple.tm_yday
688 722 return 1
689 723
690 724 def putMetadata(self):
691 725
692 726 fp = self.createMetadataFile()
693 727 self.writeMetadata(fp)
694 728 fp.close()
695 729 return
696 730
697 731 def createMetadataFile(self):
698 732 ext = self.ext
699 733 path = self.path
700 734 setFile = self.setFile
701 735
702 736 timeTuple = time.localtime(self.dataOut.utctime)
703 737
704 738 subfolder = ''
705 739 fullpath = os.path.join( path, subfolder )
706 740
707 741 if not( os.path.exists(fullpath) ):
708 742 os.mkdir(fullpath)
709 743 setFile = -1 #inicializo mi contador de seteo
710 744
711 745 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
712 746 fullpath = os.path.join( path, subfolder )
713 747
714 748 if not( os.path.exists(fullpath) ):
715 749 os.mkdir(fullpath)
716 750 setFile = -1 #inicializo mi contador de seteo
717 751
718 752 else:
719 753 filesList = os.listdir( fullpath )
720 754 filesList = sorted( filesList, key=str.lower )
721 755 if len( filesList ) > 0:
722 756 filesList = [k for k in filesList if 'M' in k]
723 757 filen = filesList[-1]
724 758 # el filename debera tener el siguiente formato
725 759 # 0 1234 567 89A BCDE (hex)
726 760 # x YYYY DDD SSS .ext
727 761 if isNumber( filen[8:11] ):
728 762 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
729 763 else:
730 764 setFile = -1
731 765 else:
732 766 setFile = -1 #inicializo mi contador de seteo
733 767
734 768 setFile += 1
735 769
736 770 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
737 771 timeTuple.tm_year,
738 772 timeTuple.tm_yday,
739 773 setFile,
740 774 ext )
741 775
742 776 filename = os.path.join( path, subfolder, file )
743 777 self.metaFile = file
744 778 #Setting HDF5 File
745 779 fp = h5py.File(filename,'w')
746 780
747 781 return fp
748 782
749 783 def writeMetadata(self, fp):
750 784
751 785 grp = fp.create_group("Metadata")
752 786 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
753 787
754 788 for i in range(len(self.metadataList)):
755 789 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
756 790 return
757 791
758 792 def dateFlag(self):
759 793
760 794 timeTuple = time.localtime(self.dataOut.utctime)
761 795 dataDay = timeTuple.tm_yday
762 796
763 797 if dataDay == self.currentDay:
764 798 return False
765 799
766 800 self.currentDay = dataDay
767 801 return True
768 802
769 803 def setNextFile(self):
770 804
771 805 ext = self.ext
772 806 path = self.path
773 807 setFile = self.setFile
774 808 mode = self.mode
775 809
776 810 timeTuple = time.localtime(self.dataOut.utctime)
777 811 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
778 812
779 813 fullpath = os.path.join( path, subfolder )
780 814
781 815 if os.path.exists(fullpath):
782 816 filesList = os.listdir( fullpath )
783 817 filesList = [k for k in filesList if 'D' in k]
784 818 if len( filesList ) > 0:
785 819 filesList = sorted( filesList, key=str.lower )
786 820 filen = filesList[-1]
787 821 # el filename debera tener el siguiente formato
788 822 # 0 1234 567 89A BCDE (hex)
789 823 # x YYYY DDD SSS .ext
790 824 if isNumber( filen[8:11] ):
791 825 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
792 826 else:
793 827 setFile = -1
794 828 else:
795 829 setFile = -1 #inicializo mi contador de seteo
796 830 else:
797 831 os.mkdir(fullpath)
798 832 setFile = -1 #inicializo mi contador de seteo
799 833
800 834 setFile += 1
801 835
802 836 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
803 837 timeTuple.tm_year,
804 838 timeTuple.tm_yday,
805 839 setFile,
806 840 ext )
807 841
808 842 filename = os.path.join( path, subfolder, file )
809 843
810 844 #Setting HDF5 File
811 845 fp = h5py.File(filename,'w')
846 #write metadata
847 self.writeMetadata(fp)
848 #Write data
849 grp = fp.create_group("Data")
850 # grp.attrs['metadata'] = self.metaFile
851
852 # grp.attrs['blocksPerFile'] = 0
853 ds = []
854 data = []
855 dsList = self.dsList
856 i = 0
857 while i < len(dsList):
858 dsInfo = dsList[i]
859 #One-dimension data
860 if dsInfo['mode'] == 0:
861 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
862 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
863 ds.append(ds0)
864 data.append([])
865 i += 1
866 continue
867 # nDimsForDs.append(nDims[i])
868
869 elif dsInfo['mode'] == 2:
870 grp0 = grp.create_group(dsInfo['variable'])
871 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
872 ds.append(ds0)
873 data.append([])
874 i += 1
875 continue
876
877 elif dsInfo['mode'] == 1:
878 grp0 = grp.create_group(dsInfo['variable'])
879
880 for j in range(dsInfo['dsNumber']):
881 dsInfo = dsList[i]
882 tableName = dsInfo['dsName']
883 shape = dsInfo['shape']
884
885 if dsInfo['nDim'] == 3:
886 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
887 else:
888 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
889
890 ds.append(ds0)
891 data.append([])
892 i += 1
893 # nDimsForDs.append(nDims[i])
894
895 fp.flush()
896 fp.close()
897
898 # self.nDatas = nDatas
899 # self.nDims = nDims
900 # self.nDimsForDs = nDimsForDs
901 #Saving variables
902 print 'Writing the file: %s'%filename
903 self.filename = filename
904 # self.fp = fp
905 # self.grp = grp
906 # self.grp.attrs.modify('nRecords', 1)
907 self.ds = ds
908 self.data = data
909 # self.setFile = setFile
910 self.firsttime = True
911 self.blockIndex = 0
912 return
913
914 def setNextFile1(self):
915
916 ext = self.ext
917 path = self.path
918 setFile = self.setFile
919 mode = self.mode
920
921 timeTuple = time.localtime(self.dataOut.utctime)
922 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
923
924 fullpath = os.path.join( path, subfolder )
925
926 if os.path.exists(fullpath):
927 filesList = os.listdir( fullpath )
928 filesList = [k for k in filesList if 'D' in k]
929 if len( filesList ) > 0:
930 filesList = sorted( filesList, key=str.lower )
931 filen = filesList[-1]
932 # el filename debera tener el siguiente formato
933 # 0 1234 567 89A BCDE (hex)
934 # x YYYY DDD SSS .ext
935 if isNumber( filen[8:11] ):
936 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
937 else:
938 setFile = -1
939 else:
940 setFile = -1 #inicializo mi contador de seteo
941 else:
942 os.mkdir(fullpath)
943 setFile = -1 #inicializo mi contador de seteo
944
945 setFile += 1
946
947 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
948 timeTuple.tm_year,
949 timeTuple.tm_yday,
950 setFile,
951 ext )
952
953 filename = os.path.join( path, subfolder, file )
954
955 #Setting HDF5 File
956 fp = h5py.File(filename,'w')
812 957
813 958 #writemetadata
814 959 self.writeMetadata(fp)
815 960
816 961 grp = fp.create_group("Data")
817 962 # grp.attrs['metadata'] = self.metaFile
818 963
819 964 # grp.attrs['blocksPerFile'] = 0
820 965
821 966 ds = []
822 967 data = []
823 968 nDimsForDs = []
824 969
825 970 nDatas = numpy.zeros(len(self.dataList))
826 971 nDims = self.arrayDim[:,0]
827 972
828 973 nDim1 = self.arrayDim[:,2]
829 974 nDim0 = self.arrayDim[:,3]
830 975
831 976 for i in range(len(self.dataList)):
832 977
833 978 #One-dimension data
834 979 if nDims[i]==1:
835 980 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
836 981 ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
837 982 ds.append(ds0)
838 983 data.append([])
839 984 nDimsForDs.append(nDims[i])
840 985 else:
841 986
842 987 #Channel mode
843 988 # if mode[i] == 0:
844 989 # strMode = "channel"
845 990 #
846 991 # #nDatas is the number of arrays per variable
847 992 # if nDims[i] == 1:
848 993 # nDatas[i] = self.arrayDim[i,1]
849 994 # elif nDims[i] == 2:
850 995 # nDatas[i] = self.arrayDim[i,2]
851 996
852 997 #Parameters mode
853 998 if mode[i] == 1:
854 999 strMode = "param"
855 1000 nDatas[i] = self.arrayDim[i,2]
856 1001
857 1002 #Meteors mode
858 1003 elif mode[i] == 2:
859 1004 strMode = "table"
860 1005 nDatas[i] = 1
861 1006
862 1007 grp0 = grp.create_group(self.dataList[i])
863 1008
864 1009 for j in range(int(nDatas[i])):
865 1010 tableName = strMode + str(j)
866 1011
867 1012 if nDims[i] == 3:
868 1013 ds0 = grp0.create_dataset(tableName, (nDim1[i],nDim0[i],1) , data = numpy.zeros((nDim1[i],nDim0[i],1)) ,maxshape=(None,nDim0[i],None), chunks=True)
869 1014
870 1015 else:
871 1016 ds0 = grp0.create_dataset(tableName, (1,nDim0[i]), data = numpy.zeros((1,nDim0[i])) , maxshape=(None,nDim0[i]), chunks=True)
872 1017
873 1018 ds.append(ds0)
874 1019 data.append([])
875 1020 nDimsForDs.append(nDims[i])
876 1021
877 1022 fp.flush()
878 1023 fp.close()
879 1024
880 1025 self.nDatas = nDatas
881 1026 self.nDims = nDims
882 1027 self.nDimsForDs = nDimsForDs
883 1028 #Saving variables
884 1029 print 'Writing the file: %s'%filename
885 1030 self.filename = filename
886 1031 # self.fp = fp
887 1032 # self.grp = grp
888 1033 # self.grp.attrs.modify('nRecords', 1)
889 1034 self.ds = ds
890 1035 self.data = data
891 1036 #
892 1037 # self.setFile = setFile
893 1038 self.firsttime = True
894 1039 self.blockIndex = 0
895 1040 return
896 1041
897 1042 def putData(self):
898 1043
899 1044 if self.blockIndex == self.blocksPerFile or self.dateFlag():
900 1045 self.setNextFile()
901 1046
902 1047 # if not self.firsttime:
903 1048 self.readBlock()
904 1049 self.setBlock() #Prepare data to be written
905 1050 self.writeBlock() #Write data
906 1051
907 1052 return
908 1053
909 1054 def readBlock(self):
910 1055
911 1056 '''
912 1057 data Array configured
913 1058
914 1059
915 1060 self.data
916 1061 '''
1062 dsList = self.dsList
917 1063 ds = self.ds
918 1064 #Setting HDF5 File
919 1065 fp = h5py.File(self.filename,'r+')
920 1066 grp = fp["Data"]
921 1067 ind = 0
922 1068
923 1069 # grp.attrs['blocksPerFile'] = 0
924 for i in range(len(self.dataList)):
1070 while ind < len(dsList):
1071 dsInfo = dsList[ind]
925 1072
926 if self.nDims[i]==1:
927 ds0 = grp[self.dataList[i]]
1073 if dsInfo['mode'] == 0:
1074 ds0 = grp[dsInfo['variable']]
928 1075 ds[ind] = ds0
929 1076 ind += 1
930 1077 else:
931 # if self.mode[i] == 0:
932 # strMode = "channel"
933 if self.mode[i] == 1:
934 strMode = "param"
935 elif self.mode[i] == 2:
936 strMode = "table"
937 1078
938 grp0 = grp[self.dataList[i]]
1079 grp0 = grp[dsInfo['variable']]
939 1080
940 for j in range(int(self.nDatas[i])):
941 tableName = strMode + str(j)
942 ds0 = grp0[tableName]
1081 for j in range(dsInfo['dsNumber']):
1082 dsInfo = dsList[ind]
1083 ds0 = grp0[dsInfo['dsName']]
943 1084 ds[ind] = ds0
944 1085 ind += 1
945 1086
946 1087 self.fp = fp
947 1088 self.grp = grp
948 1089 self.ds = ds
949 1090
950 1091 return
951 1092
952 1093 def setBlock(self):
953 1094 '''
954 1095 data Array configured
955 1096
956 1097
957 1098 self.data
958 1099 '''
959 1100 #Creating Arrays
1101 dsList = self.dsList
960 1102 data = self.data
961 nDatas = self.nDatas
962 nDims = self.nDims
963 mode = self.mode
964 1103 ind = 0
965 1104
966 for i in range(len(self.dataList)):
967 dataAux = getattr(self.dataOut,self.dataList[i])
1105 while ind < len(dsList):
1106 dsInfo = dsList[ind]
1107 dataAux = getattr(self.dataOut, dsInfo['variable'])
968 1108
969 if nDims[i] == 1 or mode[i] == 2:
1109 mode = dsInfo['mode']
1110 nDim = dsInfo['nDim']
1111
1112 if mode == 0 or mode == 2 or nDim == 1:
970 1113 data[ind] = dataAux
971 ind += 1
972
973 elif nDims[i] == 2:
974 for j in range(int(nDatas[i])):
1114 ind += 1
1115 # elif nDim == 1:
1116 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
1117 # ind += 1
1118 elif nDim == 2:
1119 for j in range(dsInfo['dsNumber']):
975 1120 data[ind] = dataAux[j,:]
976 1121 ind += 1
977
978 elif nDims[i] == 3:
979 for j in range(int(nDatas[i])):
980 # Extinct mode 0
981 # if (mode[i] == 0):
982 # data[ind] = dataAux[j,:,:]
983 # else:
1122 elif nDim == 3:
1123 for j in range(dsInfo['dsNumber']):
984 1124 data[ind] = dataAux[:,j,:]
985 1125 ind += 1
986
1126
987 1127 self.data = data
988 1128 return
989 1129
990 1130 def writeBlock(self):
991 1131 '''
992 1132 Saves the block in the HDF5 file
993 1133 '''
1134 dsList = self.dsList
1135
994 1136 for i in range(len(self.ds)):
1137 dsInfo = dsList[i]
1138 nDim = dsInfo['nDim']
1139 mode = dsInfo['mode']
995 1140
996 1141 # First time
997 1142 if self.firsttime:
998 1143 # self.ds[i].resize(self.data[i].shape)
999 1144 # self.ds[i][self.blockIndex,:] = self.data[i]
1000 1145 if type(self.data[i]) == numpy.ndarray:
1001 nDims1 = len(self.ds[i].shape)
1002 1146
1003 if nDims1 == 3:
1147 if nDim == 3:
1004 1148 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
1005
1006 self.ds[i].resize(self.data[i].shape)
1149 self.ds[i].resize(self.data[i].shape)
1007 1150
1008 1151 self.ds[i][:] = self.data[i]
1009 1152 else:
1010 1153
1011 1154 # From second time
1012 1155 # Meteors!
1013 if self.mode[i] == 2:
1156 if mode == 2:
1014 1157 dataShape = self.data[i].shape
1015 1158 dsShape = self.ds[i].shape
1016 1159 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
1017 1160 self.ds[i][dsShape[0]:,:] = self.data[i]
1018 # One dimension
1019 elif self.nDimsForDs[i] == 1:
1161 # No dimension
1162 elif mode == 0:
1020 1163 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
1021 1164 self.ds[i][0,-1] = self.data[i]
1165 # One dimension
1166 elif nDim == 1:
1167 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
1168 self.ds[i][-1,:] = self.data[i]
1022 1169 # Two dimension
1023 elif self.nDimsForDs[i] == 2:
1170 elif nDim == 2:
1024 1171 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
1025 1172 self.ds[i][self.blockIndex,:] = self.data[i]
1026 1173 # Three dimensions
1027 elif self.nDimsForDs[i] == 3:
1174 elif nDim == 3:
1028 1175 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
1029 1176 self.ds[i][:,:,-1] = self.data[i]
1030 1177
1031 1178 self.firsttime = False
1032 1179 self.blockIndex += 1
1033 1180
1034 1181 #Close to save changes
1035 1182 self.fp.flush()
1036 1183 self.fp.close()
1037 1184 return
1038 1185
1039 1186 def run(self, dataOut, **kwargs):
1040 1187
1041 1188 if not(self.isConfig):
1042 1189 flagdata = self.setup(dataOut, **kwargs)
1043 1190
1044 1191 if not(flagdata):
1045 1192 return
1046 1193
1047 1194 self.isConfig = True
1048 1195 # self.putMetadata()
1049 1196 self.setNextFile()
1050 1197
1051 1198 self.putData()
1052 1199 return
1053 1200
1054 1201
General Comments 0
You need to be logged in to leave comments. Login now