##// END OF EJS Templates
Bug fixes to HDF5 Writer Unit
Julio Valdez -
r820:e3a794954261
parent child
Show More
@@ -1,1201 +1,1067
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
10 10 # from jroIO_base import *
11 11 from schainpy.model.io.jroIO_base import *
12 12 import schainpy
13 13
14 14
15 15 class HDF5Reader(ProcessingUnit):
16 16 '''
17 17 Reads HDF5 format files
18 18
19 19 path
20 20
21 21 startDate
22 22
23 23 endDate
24 24
25 25 startTime
26 26
27 27 endTime
28 28 '''
29 29
30 30 ext = ".hdf5"
31 31
32 32 optchar = "D"
33 33
34 34 timezone = None
35 35
36 36 startTime = None
37 37
38 38 endTime = None
39 39
40 40 fileIndex = None
41 41
42 42 utcList = None #To select data in the utctime list
43 43
44 44 blockList = None #List to blocks to be read from the file
45 45
46 46 blocksPerFile = None #Number of blocks to be read
47 47
48 48 blockIndex = None
49 49
50 50 path = None
51 51
52 52 #List of Files
53 53
54 54 filenameList = None
55 55
56 56 datetimeList = None
57 57
58 58 #Hdf5 File
59 59
60 60 listMetaname = None
61 61
62 62 listMeta = None
63 63
64 64 listDataname = None
65 65
66 66 listData = None
67 67
68 68 listShapes = None
69 69
70 70 fp = None
71 71
72 72 #dataOut reconstruction
73 73
74 74 dataOut = None
75 75
76 76
77 77 def __init__(self):
78 78 self.dataOut = Parameters()
79 79 return
80 80
81 81 def setup(self, **kwargs):
82 82
83 83 path = kwargs['path']
84 84 startDate = kwargs['startDate']
85 85 endDate = kwargs['endDate']
86 86 startTime = kwargs['startTime']
87 87 endTime = kwargs['endTime']
88 88 walk = kwargs['walk']
89 89 if kwargs.has_key('ext'):
90 90 ext = kwargs['ext']
91 91 else:
92 92 ext = '.hdf5'
93 93
94 94 print "[Reading] Searching files in offline mode ..."
95 95 pathList, filenameList = self.__searchFilesOffLine(path, startDate=startDate, endDate=endDate,
96 96 startTime=startTime, endTime=endTime,
97 97 ext=ext, walk=walk)
98 98
99 99 if not(filenameList):
100 100 print "There is no files into the folder: %s"%(path)
101 101 sys.exit(-1)
102 102
103 103 self.fileIndex = -1
104 104 self.startTime = startTime
105 105 self.endTime = endTime
106 106
107 107 self.__readMetadata()
108 108
109 109 self.__setNextFileOffline()
110 110
111 111 return
112 112
113 113 def __searchFilesOffLine(self,
114 114 path,
115 115 startDate=None,
116 116 endDate=None,
117 117 startTime=datetime.time(0,0,0),
118 118 endTime=datetime.time(23,59,59),
119 119 ext='.hdf5',
120 120 walk=True):
121 121
122 122 expLabel = ''
123 123 self.filenameList = []
124 124 self.datetimeList = []
125 125
126 126 pathList = []
127 127
128 128 JRODataObj = JRODataReader()
129 129 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
130 130
131 131 if dateList == []:
132 132 print "[Reading] No *%s files in %s from %s to %s)"%(ext, path,
133 133 datetime.datetime.combine(startDate,startTime).ctime(),
134 134 datetime.datetime.combine(endDate,endTime).ctime())
135 135
136 136 return None, None
137 137
138 138 if len(dateList) > 1:
139 139 print "[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate)
140 140 else:
141 141 print "[Reading] data was found for the date %s" %(dateList[0])
142 142
143 143 filenameList = []
144 144 datetimeList = []
145 145
146 146 #----------------------------------------------------------------------------------
147 147
148 148 for thisPath in pathList:
149 149 # thisPath = pathList[pathDict[file]]
150 150
151 151 fileList = glob.glob1(thisPath, "*%s" %ext)
152 152 fileList.sort()
153 153
154 154 for file in fileList:
155 155
156 156 filename = os.path.join(thisPath,file)
157 157
158 158 if not isFileInDateRange(filename, startDate, endDate):
159 159 continue
160 160
161 161 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
162 162
163 163 if not(thisDatetime):
164 164 continue
165 165
166 166 filenameList.append(filename)
167 167 datetimeList.append(thisDatetime)
168 168
169 169 if not(filenameList):
170 170 print "[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime())
171 171 return None, None
172 172
173 173 print "[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime)
174 174 print
175 175
176 176 for i in range(len(filenameList)):
177 177 print "[Reading] %s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
178 178
179 179 self.filenameList = filenameList
180 180 self.datetimeList = datetimeList
181 181
182 182 return pathList, filenameList
183 183
184 184 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
185 185
186 186 """
187 187 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
188 188
189 189 Inputs:
190 190 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
191 191
192 192 startDate : fecha inicial del rango seleccionado en formato datetime.date
193 193
194 194 endDate : fecha final del rango seleccionado en formato datetime.date
195 195
196 196 startTime : tiempo inicial del rango seleccionado en formato datetime.time
197 197
198 198 endTime : tiempo final del rango seleccionado en formato datetime.time
199 199
200 200 Return:
201 201 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
202 202 fecha especificado, de lo contrario retorna False.
203 203
204 204 Excepciones:
205 205 Si el archivo no existe o no puede ser abierto
206 206 Si la cabecera no puede ser leida.
207 207
208 208 """
209 209
210 210 try:
211 211 fp = h5py.File(filename,'r')
212 212 grp1 = fp['Data']
213 213
214 214 except IOError:
215 215 traceback.print_exc()
216 216 raise IOError, "The file %s can't be opened" %(filename)
217 217 #chino rata
218 218 #In case has utctime attribute
219 219 grp2 = grp1['utctime']
220 220 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
221 221 thisUtcTime = grp2.value[0]
222 222
223 223 fp.close()
224 224
225 225 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
226 226 thisDate = thisDatetime.date()
227 227 thisTime = thisDatetime.time()
228 228
229 229 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
230 230 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
231 231
232 232 #General case
233 233 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
234 234 #-----------o----------------------------o-----------
235 235 # startTime endTime
236 236
237 237 if endTime >= startTime:
238 238 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
239 239 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
240 240 return thisDatetime
241 241 return None
242 242
243 243 #If endTime < startTime then endTime belongs to the next day
244 244 #<<<<<<<<<<<o o>>>>>>>>>>>
245 245 #-----------o----------------------------o-----------
246 246 # endTime startTime
247 247
248 248 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
249 249 return None
250 250
251 251 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
252 252 return None
253 253
254 254 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
255 255 return None
256 256
257 257 return thisDatetime
258 258
259 259 def __setNextFileOffline(self):
260 260
261 261 self.fileIndex += 1
262 262 idFile = self.fileIndex
263 263
264 264 if not(idFile < len(self.filenameList)):
265 265 print "No more Files"
266 266 return 0
267 267
268 268 filename = self.filenameList[idFile]
269 269
270 270 filePointer = h5py.File(filename,'r')
271 271
272 272 self.filename = filename
273 273
274 274 self.fp = filePointer
275 275
276 276 print "Setting the file: %s"%self.filename
277 277
278 278 # self.__readMetadata()
279 279 self.__setBlockList()
280 280 self.__readData()
281 281 # self.nRecords = self.fp['Data'].attrs['blocksPerFile']
282 282 # self.nRecords = self.fp['Data'].attrs['nRecords']
283 283 self.blockIndex = 0
284 284 return 1
285 285
286 286 def __setBlockList(self):
287 287 '''
288 288 Selects the data within the times defined
289 289
290 290 self.fp
291 291 self.startTime
292 292 self.endTime
293 293
294 294 self.blockList
295 295 self.blocksPerFile
296 296
297 297 '''
298 298 fp = self.fp
299 299 startTime = self.startTime
300 300 endTime = self.endTime
301 301
302 302 grp = fp['Data']
303 303 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
304 304
305 305 if self.timezone == 'lt':
306 306 thisUtcTime -= 5*3600
307 307
308 308 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
309 309 thisDate = thisDatetime.date()
310 310 thisTime = thisDatetime.time()
311 311
312 312 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
313 313 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
314 314
315 315 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
316 316
317 317 self.blockList = ind
318 318 self.blocksPerFile = len(ind)
319 319
320 320 return
321 321
322 322 def __readMetadata(self):
323 323 '''
324 324 Reads Metadata
325 325
326 326 self.pathMeta
327 327
328 328 self.listShapes
329 329 self.listMetaname
330 330 self.listMeta
331 331
332 332 '''
333 333
334 334 # grp = self.fp['Data']
335 335 # pathMeta = os.path.join(self.path, grp.attrs['metadata'])
336 336 #
337 337 # if pathMeta == self.pathMeta:
338 338 # return
339 339 # else:
340 340 # self.pathMeta = pathMeta
341 341 #
342 342 # filePointer = h5py.File(self.pathMeta,'r')
343 343 # groupPointer = filePointer['Metadata']
344 344
345 345 filename = self.filenameList[0]
346 346
347 347 fp = h5py.File(filename,'r')
348 348
349 349 gp = fp['Metadata']
350 350
351 351 listMetaname = []
352 352 listMetadata = []
353 353 for item in gp.items():
354 354 name = item[0]
355 355
356 356 if name=='array dimensions':
357 357 table = gp[name][:]
358 358 listShapes = {}
359 359 for shapes in table:
360 360 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
361 361 else:
362 362 data = gp[name].value
363 363 listMetaname.append(name)
364 364 listMetadata.append(data)
365 365
366 366 # if name=='type':
367 367 # self.__initDataOut(data)
368 368
369 369 self.listShapes = listShapes
370 370 self.listMetaname = listMetaname
371 371 self.listMeta = listMetadata
372 372
373 373 fp.close()
374 374 return
375 375
376 376 def __readData(self):
377 377 grp = self.fp['Data']
378 378 listdataname = []
379 379 listdata = []
380 380
381 381 for item in grp.items():
382 382 name = item[0]
383 383 listdataname.append(name)
384 384
385 385 array = self.__setDataArray(grp[name],self.listShapes[name])
386 386 listdata.append(array)
387 387
388 388 self.listDataname = listdataname
389 389 self.listData = listdata
390 390 return
391 391
392 392 def __setDataArray(self, dataset, shapes):
393 393
394 394 nDims = shapes[0]
395 395
396 396 nDim2 = shapes[1] #Dimension 0
397 397
398 398 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
399 399
400 400 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
401 401
402 402 mode = shapes[4] #Mode of storing
403 403
404 404 blockList = self.blockList
405 405
406 406 blocksPerFile = self.blocksPerFile
407 407
408 408 #Depending on what mode the data was stored
409 409 # if mode == 0: #Divided in channels
410 410 # strds = 'channel'
411 411 # nDatas = nDim2
412 412 # newShapes = (blocksPerFile,nDim1,nDim0)
413 413 if mode == 1: #Divided in parameter
414 414 strds = 'param'
415 415 nDatas = nDim1
416 416 newShapes = (blocksPerFile,nDim2,nDim0)
417 417 elif mode==2: #Concatenated in a table
418 418 strds = 'table0'
419 419 arrayData = dataset[strds].value
420 420 #Selecting part of the dataset
421 421 utctime = arrayData[:,0]
422 422 u, indices = numpy.unique(utctime, return_index=True)
423 423
424 424 if blockList.size != indices.size:
425 425 indMin = indices[blockList[0]]
426 426 indMax = indices[blockList[-1] + 1]
427 427 arrayData = arrayData[indMin:indMax,:]
428 428 return arrayData
429 429
430 430 #------- One dimension ---------------
431 431 if nDims == 1:
432 432 arrayData = dataset.value.astype(numpy.float)[0][blockList]
433 433
434 434 #------- Two dimensions -----------
435 435 elif nDims == 2:
436 436 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
437 437 newShapes = (blocksPerFile,nDim0)
438 438 nDatas = nDim1
439 439
440 440 for i in range(nDatas):
441 441 data = dataset[strds + str(i)].value
442 442 arrayData[:,i,:] = data[blockList,:]
443 443
444 444 #------- Three dimensions ---------
445 445 else:
446 446 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
447 447 for i in range(nDatas):
448 448
449 449 data = dataset[strds + str(i)].value
450 450 data = data[blockList,:,:]
451 451 data = data.reshape(newShapes)
452 452 # if mode == 0:
453 453 # arrayData[:,i,:,:] = data
454 454 # else:
455 455 arrayData[:,:,i,:] = data
456 456
457 457 return arrayData
458 458
459 459 def __setDataOut(self):
460 460 listMeta = self.listMeta
461 461 listMetaname = self.listMetaname
462 462 listDataname = self.listDataname
463 463 listData = self.listData
464 464 listShapes = self.listShapes
465 465
466 466 blockIndex = self.blockIndex
467 467 # blockList = self.blockList
468 468
469 469 for i in range(len(listMeta)):
470 470 setattr(self.dataOut,listMetaname[i],listMeta[i])
471 471
472 472 for j in range(len(listData)):
473 473 nShapes = listShapes[listDataname[j]][0]
474 474 mode = listShapes[listDataname[j]][4]
475 475 if nShapes == 1:
476 476 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
477 477 elif nShapes > 1:
478 478 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
479 479 #Mode Meteors
480 480 elif mode ==2:
481 481 selectedData = self.__selectDataMode2(listData[j], blockIndex)
482 482 setattr(self.dataOut, listDataname[j], selectedData)
483 483 return
484 484
485 485 def __selectDataMode2(self, data, blockIndex):
486 486 utctime = data[:,0]
487 487 aux, indices = numpy.unique(utctime, return_inverse=True)
488 488 selInd = numpy.where(indices == blockIndex)[0]
489 489 selData = data[selInd,:]
490 490
491 491 return selData
492 492
493 493 def getData(self):
494 494
495 495 # if self.flagNoMoreFiles:
496 496 # self.dataOut.flagNoData = True
497 497 # print 'Process finished'
498 498 # return 0
499 499 #
500 500 if self.blockIndex==self.blocksPerFile:
501 501 if not( self.__setNextFileOffline() ):
502 502 self.dataOut.flagNoData = True
503 503 return 0
504 504
505 505 # if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
506 506 # self.dataOut.flagNoData = True
507 507 # return 0
508 508 # self.__readData()
509 509 self.__setDataOut()
510 510 self.dataOut.flagNoData = False
511 511
512 512 self.blockIndex += 1
513 513
514 514 return
515 515
516 516 def run(self, **kwargs):
517 517
518 518 if not(self.isConfig):
519 519 self.setup(**kwargs)
520 520 # self.setObjProperties()
521 521 self.isConfig = True
522 522
523 523 self.getData()
524 524
525 525 return
526 526
527 527 class HDF5Writer(Operation):
528 528 '''
529 529 HDF5 Writer, stores parameters data in HDF5 format files
530 530
531 531 path: path where the files will be stored
532 532
533 533 blocksPerFile: number of blocks that will be saved in per HDF5 format file
534 534
535 535 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
536 536
537 537 metadataList: list of attributes that will be stored as metadata
538 538
539 539 dataList: list of attributes that will be stores as data
540 540
541 541 '''
542 542
543 543
544 544 ext = ".hdf5"
545 545
546 546 optchar = "D"
547 547
548 548 metaoptchar = "M"
549 549
550 550 metaFile = None
551 551
552 552 filename = None
553 553
554 554 path = None
555 555
556 556 setFile = None
557 557
558 558 fp = None
559 559
560 560 grp = None
561 561
562 562 ds = None
563 563
564 564 firsttime = True
565 565
566 566 #Configurations
567 567
568 568 blocksPerFile = None
569 569
570 570 blockIndex = None
571 571
572 572 dataOut = None
573 573
574 574 #Data Arrays
575 575
576 576 dataList = None
577 577
578 578 metadataList = None
579 579
580 580 # arrayDim = None
581 581
582 dsList = None #List of dictionaries
582 dsList = None #List of dictionaries with dataset properties
583 583
584 584 tableDim = None
585 585
586 586 # dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
587 587
588 588 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
589
590 mode = None
591
592 nDatas = None #Number of datasets to be stored per array
593
594 nDims = None #Number Dimensions in each dataset
595
596 nDimsForDs = None
597
589
598 590 currentDay = None
599 591
600 592 def __init__(self):
601 593
602 594 Operation.__init__(self)
603 595 self.isConfig = False
604 596 return
605 597
606 598 def setup(self, dataOut, **kwargs):
607 599
608 600 self.path = kwargs['path']
609 601
610 602 if kwargs.has_key('blocksPerFile'):
611 603 self.blocksPerFile = kwargs['blocksPerFile']
612 604 else:
613 605 self.blocksPerFile = 10
614 606
615 607 self.metadataList = kwargs['metadataList']
616 608 self.dataList = kwargs['dataList']
617 609 self.dataOut = dataOut
618 610
619 611 if kwargs.has_key('mode'):
620 612 mode = kwargs['mode']
621 613
622 614 if type(mode) == int:
623 615 mode = numpy.zeros(len(self.dataList)) + mode
624 616 else:
625 617 mode = numpy.ones(len(self.dataList))
626 618
627 619 self.mode = mode
628 620
629 621 arrayDim = numpy.zeros((len(self.dataList),5))
630 622
631 623 #Table dimensions
632 624 dtype0 = self.dtype
633 625 tableList = []
634 626
635 627 #Dictionary and list of tables
636 628 dsList = []
637 629
638 630 for i in range(len(self.dataList)):
639 631 dsDict = {}
640 632 dataAux = getattr(self.dataOut, self.dataList[i])
641 633 dsDict['variable'] = self.dataList[i]
642 634 #--------------------- Conditionals ------------------------
643 635 #There is no data
644 636 if dataAux == None:
645 637 return 0
646 638
647 639 #Not array, just a number
648 640 #Mode 0
649 641 if type(dataAux)==float or type(dataAux)==int:
650 642 dsDict['mode'] = 0
651 643 dsDict['nDim'] = 0
652 arrayDim[i,0] = 1
644 arrayDim[i,0] = 0
653 645 dsList.append(dsDict)
654 646
655 647 #Mode 2: meteors
656 648 elif mode[i] == 2:
657 649 # dsDict['nDim'] = 0
658 650 dsDict['dsName'] = 'table0'
659 651 dsDict['mode'] = 2 # Mode meteors
660 652 dsDict['shape'] = dataAux.shape[-1]
661 653 dsDict['nDim'] = 0
654 dsDict['dsNumber'] = 1
662 655
663 656 arrayDim[i,3] = dataAux.shape[-1]
664 657 arrayDim[i,4] = mode[i] #Mode the data was stored
665 658
666 659 dsList.append(dsDict)
667 660
668 661 #Mode 1
669 662 else:
670 663 arrayDim0 = dataAux.shape #Data dimensions
671 664 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
672 665 arrayDim[i,4] = mode[i] #Mode the data was stored
673 666
674 667 strtable = 'table'
675 668 dsDict['mode'] = 1 # Mode parameters
676 669
677 670 # Three-dimension arrays
678 671 if len(arrayDim0) == 3:
679 672 arrayDim[i,1:-1] = numpy.array(arrayDim0)
680 673 nTables = int(arrayDim[i,2])
681 674 dsDict['dsNumber'] = nTables
682 675 dsDict['shape'] = arrayDim[i,2:4]
683 676 dsDict['nDim'] = 3
684 677
685 678 for j in range(nTables):
686 679 dsDict = dsDict.copy()
687 680 dsDict['dsName'] = strtable + str(j)
688 681 dsList.append(dsDict)
689 682
690 683 # Two-dimension arrays
691 684 elif len(arrayDim0) == 2:
692 685 arrayDim[i,2:-1] = numpy.array(arrayDim0)
693 686 nTables = int(arrayDim[i,2])
694 687 dsDict['dsNumber'] = nTables
695 688 dsDict['shape'] = arrayDim[i,3]
696 689 dsDict['nDim'] = 2
697 690
698 691 for j in range(nTables):
699 692 dsDict = dsDict.copy()
700 693 dsDict['dsName'] = strtable + str(j)
701 694 dsList.append(dsDict)
702 695
703 696 # One-dimension arrays
704 697 elif len(arrayDim0) == 1:
705 698 arrayDim[i,3] = arrayDim0[0]
706 699 dsDict['shape'] = arrayDim0[0]
707 700 dsDict['dsNumber'] = 1
708 701 dsDict['dsName'] = strtable + str(0)
709 702 dsDict['nDim'] = 1
710 703 dsList.append(dsDict)
711 704
712 705 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
713 706 tableList.append(table)
714 707
715 708 # self.arrayDim = arrayDim
716 709 self.dsList = dsList
717 710 self.tableDim = numpy.array(tableList, dtype = dtype0)
718 711 self.blockIndex = 0
719 712
720 713 timeTuple = time.localtime(dataOut.utctime)
721 714 self.currentDay = timeTuple.tm_yday
722 715 return 1
723 716
724 717 def putMetadata(self):
725 718
726 719 fp = self.createMetadataFile()
727 720 self.writeMetadata(fp)
728 721 fp.close()
729 722 return
730 723
731 724 def createMetadataFile(self):
732 725 ext = self.ext
733 726 path = self.path
734 727 setFile = self.setFile
735 728
736 729 timeTuple = time.localtime(self.dataOut.utctime)
737 730
738 731 subfolder = ''
739 732 fullpath = os.path.join( path, subfolder )
740 733
741 734 if not( os.path.exists(fullpath) ):
742 735 os.mkdir(fullpath)
743 736 setFile = -1 #inicializo mi contador de seteo
744 737
745 738 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
746 739 fullpath = os.path.join( path, subfolder )
747 740
748 741 if not( os.path.exists(fullpath) ):
749 742 os.mkdir(fullpath)
750 743 setFile = -1 #inicializo mi contador de seteo
751 744
752 745 else:
753 746 filesList = os.listdir( fullpath )
754 747 filesList = sorted( filesList, key=str.lower )
755 748 if len( filesList ) > 0:
756 749 filesList = [k for k in filesList if 'M' in k]
757 750 filen = filesList[-1]
758 751 # el filename debera tener el siguiente formato
759 752 # 0 1234 567 89A BCDE (hex)
760 753 # x YYYY DDD SSS .ext
761 754 if isNumber( filen[8:11] ):
762 755 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
763 756 else:
764 757 setFile = -1
765 758 else:
766 759 setFile = -1 #inicializo mi contador de seteo
767 760
768 761 setFile += 1
769 762
770 763 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
771 764 timeTuple.tm_year,
772 765 timeTuple.tm_yday,
773 766 setFile,
774 767 ext )
775 768
776 769 filename = os.path.join( path, subfolder, file )
777 770 self.metaFile = file
778 771 #Setting HDF5 File
779 772 fp = h5py.File(filename,'w')
780 773
781 774 return fp
782 775
783 776 def writeMetadata(self, fp):
784 777
785 778 grp = fp.create_group("Metadata")
786 779 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
787 780
788 781 for i in range(len(self.metadataList)):
789 782 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
790 783 return
791 784
792 785 def dateFlag(self):
793 786
794 787 timeTuple = time.localtime(self.dataOut.utctime)
795 788 dataDay = timeTuple.tm_yday
796 789
797 790 if dataDay == self.currentDay:
798 791 return False
799 792
800 793 self.currentDay = dataDay
801 794 return True
802 795
803 796 def setNextFile(self):
804 797
805 798 ext = self.ext
806 799 path = self.path
807 800 setFile = self.setFile
808 801 mode = self.mode
809 802
810 803 timeTuple = time.localtime(self.dataOut.utctime)
811 804 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
812 805
813 806 fullpath = os.path.join( path, subfolder )
814 807
815 808 if os.path.exists(fullpath):
816 809 filesList = os.listdir( fullpath )
817 810 filesList = [k for k in filesList if 'D' in k]
818 811 if len( filesList ) > 0:
819 812 filesList = sorted( filesList, key=str.lower )
820 813 filen = filesList[-1]
821 814 # el filename debera tener el siguiente formato
822 815 # 0 1234 567 89A BCDE (hex)
823 816 # x YYYY DDD SSS .ext
824 817 if isNumber( filen[8:11] ):
825 818 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
826 819 else:
827 820 setFile = -1
828 821 else:
829 822 setFile = -1 #inicializo mi contador de seteo
830 823 else:
831 824 os.mkdir(fullpath)
832 825 setFile = -1 #inicializo mi contador de seteo
833 826
834 827 setFile += 1
835 828
836 829 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
837 830 timeTuple.tm_year,
838 831 timeTuple.tm_yday,
839 832 setFile,
840 833 ext )
841 834
842 835 filename = os.path.join( path, subfolder, file )
843 836
844 837 #Setting HDF5 File
845 838 fp = h5py.File(filename,'w')
846 839 #write metadata
847 840 self.writeMetadata(fp)
848 841 #Write data
849 842 grp = fp.create_group("Data")
850 843 # grp.attrs['metadata'] = self.metaFile
851 844
852 845 # grp.attrs['blocksPerFile'] = 0
853 846 ds = []
854 847 data = []
855 848 dsList = self.dsList
856 849 i = 0
857 850 while i < len(dsList):
858 851 dsInfo = dsList[i]
859 852 #One-dimension data
860 853 if dsInfo['mode'] == 0:
861 854 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
862 855 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
863 856 ds.append(ds0)
864 857 data.append([])
865 858 i += 1
866 859 continue
867 860 # nDimsForDs.append(nDims[i])
868 861
869 862 elif dsInfo['mode'] == 2:
870 863 grp0 = grp.create_group(dsInfo['variable'])
871 864 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
872 865 ds.append(ds0)
873 866 data.append([])
874 867 i += 1
875 868 continue
876 869
877 870 elif dsInfo['mode'] == 1:
878 871 grp0 = grp.create_group(dsInfo['variable'])
879 872
880 873 for j in range(dsInfo['dsNumber']):
881 874 dsInfo = dsList[i]
882 875 tableName = dsInfo['dsName']
883 876 shape = dsInfo['shape']
884 877
885 878 if dsInfo['nDim'] == 3:
886 879 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
887 880 else:
888 881 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
889 882
890 883 ds.append(ds0)
891 884 data.append([])
892 885 i += 1
893 886 # nDimsForDs.append(nDims[i])
894 887
895 888 fp.flush()
896 889 fp.close()
897 890
898 891 # self.nDatas = nDatas
899 892 # self.nDims = nDims
900 893 # self.nDimsForDs = nDimsForDs
901 894 #Saving variables
902 895 print 'Writing the file: %s'%filename
903 896 self.filename = filename
904 897 # self.fp = fp
905 898 # self.grp = grp
906 899 # self.grp.attrs.modify('nRecords', 1)
907 900 self.ds = ds
908 901 self.data = data
909 902 # self.setFile = setFile
910 903 self.firsttime = True
911 904 self.blockIndex = 0
912 905 return
913 906
914 def setNextFile1(self):
915
916 ext = self.ext
917 path = self.path
918 setFile = self.setFile
919 mode = self.mode
920
921 timeTuple = time.localtime(self.dataOut.utctime)
922 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
923
924 fullpath = os.path.join( path, subfolder )
925
926 if os.path.exists(fullpath):
927 filesList = os.listdir( fullpath )
928 filesList = [k for k in filesList if 'D' in k]
929 if len( filesList ) > 0:
930 filesList = sorted( filesList, key=str.lower )
931 filen = filesList[-1]
932 # el filename debera tener el siguiente formato
933 # 0 1234 567 89A BCDE (hex)
934 # x YYYY DDD SSS .ext
935 if isNumber( filen[8:11] ):
936 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
937 else:
938 setFile = -1
939 else:
940 setFile = -1 #inicializo mi contador de seteo
941 else:
942 os.mkdir(fullpath)
943 setFile = -1 #inicializo mi contador de seteo
944
945 setFile += 1
946
947 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
948 timeTuple.tm_year,
949 timeTuple.tm_yday,
950 setFile,
951 ext )
952
953 filename = os.path.join( path, subfolder, file )
954
955 #Setting HDF5 File
956 fp = h5py.File(filename,'w')
957
958 #writemetadata
959 self.writeMetadata(fp)
960
961 grp = fp.create_group("Data")
962 # grp.attrs['metadata'] = self.metaFile
963
964 # grp.attrs['blocksPerFile'] = 0
965
966 ds = []
967 data = []
968 nDimsForDs = []
969
970 nDatas = numpy.zeros(len(self.dataList))
971 nDims = self.arrayDim[:,0]
972
973 nDim1 = self.arrayDim[:,2]
974 nDim0 = self.arrayDim[:,3]
975
976 for i in range(len(self.dataList)):
977
978 #One-dimension data
979 if nDims[i]==1:
980 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
981 ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
982 ds.append(ds0)
983 data.append([])
984 nDimsForDs.append(nDims[i])
985 else:
986
987 #Channel mode
988 # if mode[i] == 0:
989 # strMode = "channel"
990 #
991 # #nDatas is the number of arrays per variable
992 # if nDims[i] == 1:
993 # nDatas[i] = self.arrayDim[i,1]
994 # elif nDims[i] == 2:
995 # nDatas[i] = self.arrayDim[i,2]
996
997 #Parameters mode
998 if mode[i] == 1:
999 strMode = "param"
1000 nDatas[i] = self.arrayDim[i,2]
1001
1002 #Meteors mode
1003 elif mode[i] == 2:
1004 strMode = "table"
1005 nDatas[i] = 1
1006
1007 grp0 = grp.create_group(self.dataList[i])
1008
1009 for j in range(int(nDatas[i])):
1010 tableName = strMode + str(j)
1011
1012 if nDims[i] == 3:
1013 ds0 = grp0.create_dataset(tableName, (nDim1[i],nDim0[i],1) , data = numpy.zeros((nDim1[i],nDim0[i],1)) ,maxshape=(None,nDim0[i],None), chunks=True)
1014
1015 else:
1016 ds0 = grp0.create_dataset(tableName, (1,nDim0[i]), data = numpy.zeros((1,nDim0[i])) , maxshape=(None,nDim0[i]), chunks=True)
1017
1018 ds.append(ds0)
1019 data.append([])
1020 nDimsForDs.append(nDims[i])
1021
1022 fp.flush()
1023 fp.close()
1024
1025 self.nDatas = nDatas
1026 self.nDims = nDims
1027 self.nDimsForDs = nDimsForDs
1028 #Saving variables
1029 print 'Writing the file: %s'%filename
1030 self.filename = filename
1031 # self.fp = fp
1032 # self.grp = grp
1033 # self.grp.attrs.modify('nRecords', 1)
1034 self.ds = ds
1035 self.data = data
1036 #
1037 # self.setFile = setFile
1038 self.firsttime = True
1039 self.blockIndex = 0
1040 return
1041
1042 907 def putData(self):
1043 908
1044 909 if self.blockIndex == self.blocksPerFile or self.dateFlag():
1045 910 self.setNextFile()
1046 911
1047 912 # if not self.firsttime:
1048 913 self.readBlock()
1049 914 self.setBlock() #Prepare data to be written
1050 915 self.writeBlock() #Write data
1051 916
1052 917 return
1053 918
1054 919 def readBlock(self):
1055 920
1056 921 '''
1057 922 data Array configured
1058 923
1059 924
1060 925 self.data
1061 926 '''
1062 927 dsList = self.dsList
1063 928 ds = self.ds
1064 929 #Setting HDF5 File
1065 930 fp = h5py.File(self.filename,'r+')
1066 931 grp = fp["Data"]
1067 932 ind = 0
1068 933
1069 934 # grp.attrs['blocksPerFile'] = 0
1070 935 while ind < len(dsList):
1071 936 dsInfo = dsList[ind]
1072 937
1073 938 if dsInfo['mode'] == 0:
1074 939 ds0 = grp[dsInfo['variable']]
1075 940 ds[ind] = ds0
1076 941 ind += 1
1077 942 else:
1078 943
1079 944 grp0 = grp[dsInfo['variable']]
1080 945
1081 946 for j in range(dsInfo['dsNumber']):
1082 947 dsInfo = dsList[ind]
1083 948 ds0 = grp0[dsInfo['dsName']]
1084 949 ds[ind] = ds0
1085 950 ind += 1
1086 951
1087 952 self.fp = fp
1088 953 self.grp = grp
1089 954 self.ds = ds
1090 955
1091 956 return
1092 957
1093 958 def setBlock(self):
1094 959 '''
1095 960 data Array configured
1096 961
1097 962
1098 963 self.data
1099 964 '''
1100 965 #Creating Arrays
1101 966 dsList = self.dsList
1102 967 data = self.data
1103 968 ind = 0
1104 969
1105 970 while ind < len(dsList):
1106 971 dsInfo = dsList[ind]
1107 972 dataAux = getattr(self.dataOut, dsInfo['variable'])
1108 973
1109 974 mode = dsInfo['mode']
1110 975 nDim = dsInfo['nDim']
1111 976
1112 977 if mode == 0 or mode == 2 or nDim == 1:
1113 978 data[ind] = dataAux
1114 979 ind += 1
1115 980 # elif nDim == 1:
1116 981 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
1117 982 # ind += 1
1118 983 elif nDim == 2:
1119 984 for j in range(dsInfo['dsNumber']):
1120 985 data[ind] = dataAux[j,:]
1121 986 ind += 1
1122 987 elif nDim == 3:
1123 988 for j in range(dsInfo['dsNumber']):
1124 989 data[ind] = dataAux[:,j,:]
1125 990 ind += 1
1126 991
1127 992 self.data = data
1128 993 return
1129 994
1130 995 def writeBlock(self):
1131 996 '''
1132 997 Saves the block in the HDF5 file
1133 998 '''
1134 999 dsList = self.dsList
1135 1000
1136 1001 for i in range(len(self.ds)):
1137 1002 dsInfo = dsList[i]
1138 1003 nDim = dsInfo['nDim']
1139 1004 mode = dsInfo['mode']
1140 1005
1141 1006 # First time
1142 1007 if self.firsttime:
1143 1008 # self.ds[i].resize(self.data[i].shape)
1144 1009 # self.ds[i][self.blockIndex,:] = self.data[i]
1145 1010 if type(self.data[i]) == numpy.ndarray:
1146 1011
1147 1012 if nDim == 3:
1148 1013 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
1149 1014 self.ds[i].resize(self.data[i].shape)
1150
1015 if mode == 2:
1016 self.ds[i].resize(self.data[i].shape)
1151 1017 self.ds[i][:] = self.data[i]
1152 1018 else:
1153 1019
1154 1020 # From second time
1155 1021 # Meteors!
1156 1022 if mode == 2:
1157 1023 dataShape = self.data[i].shape
1158 1024 dsShape = self.ds[i].shape
1159 1025 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
1160 1026 self.ds[i][dsShape[0]:,:] = self.data[i]
1161 1027 # No dimension
1162 1028 elif mode == 0:
1163 1029 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
1164 1030 self.ds[i][0,-1] = self.data[i]
1165 1031 # One dimension
1166 1032 elif nDim == 1:
1167 1033 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
1168 1034 self.ds[i][-1,:] = self.data[i]
1169 1035 # Two dimension
1170 1036 elif nDim == 2:
1171 1037 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
1172 1038 self.ds[i][self.blockIndex,:] = self.data[i]
1173 1039 # Three dimensions
1174 1040 elif nDim == 3:
1175 1041 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
1176 1042 self.ds[i][:,:,-1] = self.data[i]
1177 1043
1178 1044 self.firsttime = False
1179 1045 self.blockIndex += 1
1180 1046
1181 1047 #Close to save changes
1182 1048 self.fp.flush()
1183 1049 self.fp.close()
1184 1050 return
1185 1051
1186 1052 def run(self, dataOut, **kwargs):
1187 1053
1188 1054 if not(self.isConfig):
1189 1055 flagdata = self.setup(dataOut, **kwargs)
1190 1056
1191 1057 if not(flagdata):
1192 1058 return
1193 1059
1194 1060 self.isConfig = True
1195 1061 # self.putMetadata()
1196 1062 self.setNextFile()
1197 1063
1198 1064 self.putData()
1199 1065 return
1200 1066
1201 1067
General Comments 0
You need to be logged in to leave comments. Login now