##// END OF EJS Templates
Bug fix in Reading Unit
Julio Valdez -
r821:375dfaaf810f
parent child
Show More
@@ -1,1067 +1,1067
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
10 10 # from jroIO_base import *
11 11 from schainpy.model.io.jroIO_base import *
12 12 import schainpy
13 13
14 14
15 15 class HDF5Reader(ProcessingUnit):
16 16 '''
17 17 Reads HDF5 format files
18 18
19 19 path
20 20
21 21 startDate
22 22
23 23 endDate
24 24
25 25 startTime
26 26
27 27 endTime
28 28 '''
29 29
30 30 ext = ".hdf5"
31 31
32 32 optchar = "D"
33 33
34 34 timezone = None
35 35
36 36 startTime = None
37 37
38 38 endTime = None
39 39
40 40 fileIndex = None
41 41
42 42 utcList = None #To select data in the utctime list
43 43
44 44 blockList = None #List to blocks to be read from the file
45 45
46 46 blocksPerFile = None #Number of blocks to be read
47 47
48 48 blockIndex = None
49 49
50 50 path = None
51 51
52 52 #List of Files
53 53
54 54 filenameList = None
55 55
56 56 datetimeList = None
57 57
58 58 #Hdf5 File
59 59
60 60 listMetaname = None
61 61
62 62 listMeta = None
63 63
64 64 listDataname = None
65 65
66 66 listData = None
67 67
68 68 listShapes = None
69 69
70 70 fp = None
71 71
72 72 #dataOut reconstruction
73 73
74 74 dataOut = None
75 75
76 76
77 77 def __init__(self):
78 78 self.dataOut = Parameters()
79 79 return
80 80
81 81 def setup(self, **kwargs):
82 82
83 83 path = kwargs['path']
84 84 startDate = kwargs['startDate']
85 85 endDate = kwargs['endDate']
86 86 startTime = kwargs['startTime']
87 87 endTime = kwargs['endTime']
88 88 walk = kwargs['walk']
89 89 if kwargs.has_key('ext'):
90 90 ext = kwargs['ext']
91 91 else:
92 92 ext = '.hdf5'
93 93
94 94 print "[Reading] Searching files in offline mode ..."
95 95 pathList, filenameList = self.__searchFilesOffLine(path, startDate=startDate, endDate=endDate,
96 96 startTime=startTime, endTime=endTime,
97 97 ext=ext, walk=walk)
98 98
99 99 if not(filenameList):
100 100 print "There is no files into the folder: %s"%(path)
101 101 sys.exit(-1)
102 102
103 103 self.fileIndex = -1
104 104 self.startTime = startTime
105 105 self.endTime = endTime
106 106
107 107 self.__readMetadata()
108 108
109 109 self.__setNextFileOffline()
110 110
111 111 return
112 112
113 113 def __searchFilesOffLine(self,
114 114 path,
115 115 startDate=None,
116 116 endDate=None,
117 117 startTime=datetime.time(0,0,0),
118 118 endTime=datetime.time(23,59,59),
119 119 ext='.hdf5',
120 120 walk=True):
121 121
122 122 expLabel = ''
123 123 self.filenameList = []
124 124 self.datetimeList = []
125 125
126 126 pathList = []
127 127
128 128 JRODataObj = JRODataReader()
129 129 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
130 130
131 131 if dateList == []:
132 132 print "[Reading] No *%s files in %s from %s to %s)"%(ext, path,
133 133 datetime.datetime.combine(startDate,startTime).ctime(),
134 134 datetime.datetime.combine(endDate,endTime).ctime())
135 135
136 136 return None, None
137 137
138 138 if len(dateList) > 1:
139 139 print "[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate)
140 140 else:
141 141 print "[Reading] data was found for the date %s" %(dateList[0])
142 142
143 143 filenameList = []
144 144 datetimeList = []
145 145
146 146 #----------------------------------------------------------------------------------
147 147
148 148 for thisPath in pathList:
149 149 # thisPath = pathList[pathDict[file]]
150 150
151 151 fileList = glob.glob1(thisPath, "*%s" %ext)
152 152 fileList.sort()
153 153
154 154 for file in fileList:
155 155
156 156 filename = os.path.join(thisPath,file)
157 157
158 158 if not isFileInDateRange(filename, startDate, endDate):
159 159 continue
160 160
161 161 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
162 162
163 163 if not(thisDatetime):
164 164 continue
165 165
166 166 filenameList.append(filename)
167 167 datetimeList.append(thisDatetime)
168 168
169 169 if not(filenameList):
170 170 print "[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime())
171 171 return None, None
172 172
173 173 print "[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime)
174 174 print
175 175
176 176 for i in range(len(filenameList)):
177 177 print "[Reading] %s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
178 178
179 179 self.filenameList = filenameList
180 180 self.datetimeList = datetimeList
181 181
182 182 return pathList, filenameList
183 183
184 184 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
185 185
186 186 """
187 187 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
188 188
189 189 Inputs:
190 190 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
191 191
192 192 startDate : fecha inicial del rango seleccionado en formato datetime.date
193 193
194 194 endDate : fecha final del rango seleccionado en formato datetime.date
195 195
196 196 startTime : tiempo inicial del rango seleccionado en formato datetime.time
197 197
198 198 endTime : tiempo final del rango seleccionado en formato datetime.time
199 199
200 200 Return:
201 201 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
202 202 fecha especificado, de lo contrario retorna False.
203 203
204 204 Excepciones:
205 205 Si el archivo no existe o no puede ser abierto
206 206 Si la cabecera no puede ser leida.
207 207
208 208 """
209 209
210 210 try:
211 211 fp = h5py.File(filename,'r')
212 212 grp1 = fp['Data']
213 213
214 214 except IOError:
215 215 traceback.print_exc()
216 216 raise IOError, "The file %s can't be opened" %(filename)
217 217 #chino rata
218 218 #In case has utctime attribute
219 219 grp2 = grp1['utctime']
220 220 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
221 221 thisUtcTime = grp2.value[0]
222 222
223 223 fp.close()
224 224
225 225 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
226 226 thisDate = thisDatetime.date()
227 227 thisTime = thisDatetime.time()
228 228
229 229 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
230 230 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
231 231
232 232 #General case
233 233 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
234 234 #-----------o----------------------------o-----------
235 235 # startTime endTime
236 236
237 237 if endTime >= startTime:
238 238 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
239 239 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
240 240 return thisDatetime
241 241 return None
242 242
243 243 #If endTime < startTime then endTime belongs to the next day
244 244 #<<<<<<<<<<<o o>>>>>>>>>>>
245 245 #-----------o----------------------------o-----------
246 246 # endTime startTime
247 247
248 248 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
249 249 return None
250 250
251 251 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
252 252 return None
253 253
254 254 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
255 255 return None
256 256
257 257 return thisDatetime
258 258
259 259 def __setNextFileOffline(self):
260 260
261 261 self.fileIndex += 1
262 262 idFile = self.fileIndex
263 263
264 264 if not(idFile < len(self.filenameList)):
265 265 print "No more Files"
266 266 return 0
267 267
268 268 filename = self.filenameList[idFile]
269 269
270 270 filePointer = h5py.File(filename,'r')
271 271
272 272 self.filename = filename
273 273
274 274 self.fp = filePointer
275 275
276 276 print "Setting the file: %s"%self.filename
277 277
278 278 # self.__readMetadata()
279 279 self.__setBlockList()
280 280 self.__readData()
281 281 # self.nRecords = self.fp['Data'].attrs['blocksPerFile']
282 282 # self.nRecords = self.fp['Data'].attrs['nRecords']
283 283 self.blockIndex = 0
284 284 return 1
285 285
286 286 def __setBlockList(self):
287 287 '''
288 288 Selects the data within the times defined
289 289
290 290 self.fp
291 291 self.startTime
292 292 self.endTime
293 293
294 294 self.blockList
295 295 self.blocksPerFile
296 296
297 297 '''
298 298 fp = self.fp
299 299 startTime = self.startTime
300 300 endTime = self.endTime
301 301
302 302 grp = fp['Data']
303 303 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
304 304
305 305 if self.timezone == 'lt':
306 306 thisUtcTime -= 5*3600
307 307
308 308 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
309 309 thisDate = thisDatetime.date()
310 310 thisTime = thisDatetime.time()
311 311
312 312 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
313 313 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
314 314
315 315 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
316 316
317 317 self.blockList = ind
318 318 self.blocksPerFile = len(ind)
319 319
320 320 return
321 321
322 322 def __readMetadata(self):
323 323 '''
324 324 Reads Metadata
325 325
326 326 self.pathMeta
327 327
328 328 self.listShapes
329 329 self.listMetaname
330 330 self.listMeta
331 331
332 332 '''
333 333
334 334 # grp = self.fp['Data']
335 335 # pathMeta = os.path.join(self.path, grp.attrs['metadata'])
336 336 #
337 337 # if pathMeta == self.pathMeta:
338 338 # return
339 339 # else:
340 340 # self.pathMeta = pathMeta
341 341 #
342 342 # filePointer = h5py.File(self.pathMeta,'r')
343 343 # groupPointer = filePointer['Metadata']
344 344
345 345 filename = self.filenameList[0]
346 346
347 347 fp = h5py.File(filename,'r')
348 348
349 349 gp = fp['Metadata']
350 350
351 351 listMetaname = []
352 352 listMetadata = []
353 353 for item in gp.items():
354 354 name = item[0]
355 355
356 356 if name=='array dimensions':
357 357 table = gp[name][:]
358 358 listShapes = {}
359 359 for shapes in table:
360 360 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
361 361 else:
362 362 data = gp[name].value
363 363 listMetaname.append(name)
364 364 listMetadata.append(data)
365 365
366 366 # if name=='type':
367 367 # self.__initDataOut(data)
368 368
369 369 self.listShapes = listShapes
370 370 self.listMetaname = listMetaname
371 371 self.listMeta = listMetadata
372 372
373 373 fp.close()
374 374 return
375 375
376 376 def __readData(self):
377 377 grp = self.fp['Data']
378 378 listdataname = []
379 379 listdata = []
380 380
381 381 for item in grp.items():
382 382 name = item[0]
383 383 listdataname.append(name)
384 384
385 385 array = self.__setDataArray(grp[name],self.listShapes[name])
386 386 listdata.append(array)
387 387
388 388 self.listDataname = listdataname
389 389 self.listData = listdata
390 390 return
391 391
392 392 def __setDataArray(self, dataset, shapes):
393 393
394 394 nDims = shapes[0]
395 395
396 396 nDim2 = shapes[1] #Dimension 0
397 397
398 398 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
399 399
400 400 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
401 401
402 402 mode = shapes[4] #Mode of storing
403 403
404 404 blockList = self.blockList
405 405
406 406 blocksPerFile = self.blocksPerFile
407 407
408 408 #Depending on what mode the data was stored
409 # if mode == 0: #Divided in channels
410 # strds = 'channel'
411 # nDatas = nDim2
412 # newShapes = (blocksPerFile,nDim1,nDim0)
409 if mode == 0: #Divided in channels
410 arrayData = dataset.value.astype(numpy.float)[0][blockList]
413 411 if mode == 1: #Divided in parameter
414 strds = 'param'
412 strds = 'table'
415 413 nDatas = nDim1
416 414 newShapes = (blocksPerFile,nDim2,nDim0)
417 415 elif mode==2: #Concatenated in a table
418 416 strds = 'table0'
419 417 arrayData = dataset[strds].value
420 418 #Selecting part of the dataset
421 419 utctime = arrayData[:,0]
422 420 u, indices = numpy.unique(utctime, return_index=True)
423 421
424 422 if blockList.size != indices.size:
425 423 indMin = indices[blockList[0]]
426 424 indMax = indices[blockList[-1] + 1]
427 425 arrayData = arrayData[indMin:indMax,:]
428 426 return arrayData
429 427
430 428 #------- One dimension ---------------
431 if nDims == 1:
429 if nDims == 0:
432 430 arrayData = dataset.value.astype(numpy.float)[0][blockList]
433 431
434 432 #------- Two dimensions -----------
435 433 elif nDims == 2:
436 434 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
437 435 newShapes = (blocksPerFile,nDim0)
438 436 nDatas = nDim1
439 437
440 438 for i in range(nDatas):
441 439 data = dataset[strds + str(i)].value
442 440 arrayData[:,i,:] = data[blockList,:]
443 441
444 442 #------- Three dimensions ---------
445 443 else:
446 444 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
447 445 for i in range(nDatas):
448 446
449 447 data = dataset[strds + str(i)].value
450 448 data = data[blockList,:,:]
451 449 data = data.reshape(newShapes)
452 450 # if mode == 0:
453 451 # arrayData[:,i,:,:] = data
454 452 # else:
455 453 arrayData[:,:,i,:] = data
456 454
457 455 return arrayData
458 456
459 457 def __setDataOut(self):
460 458 listMeta = self.listMeta
461 459 listMetaname = self.listMetaname
462 460 listDataname = self.listDataname
463 461 listData = self.listData
464 462 listShapes = self.listShapes
465 463
466 464 blockIndex = self.blockIndex
467 465 # blockList = self.blockList
468 466
469 467 for i in range(len(listMeta)):
470 468 setattr(self.dataOut,listMetaname[i],listMeta[i])
471 469
472 470 for j in range(len(listData)):
473 471 nShapes = listShapes[listDataname[j]][0]
474 472 mode = listShapes[listDataname[j]][4]
475 473 if nShapes == 1:
476 474 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
477 475 elif nShapes > 1:
478 476 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
477 elif mode==0:
478 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
479 479 #Mode Meteors
480 480 elif mode ==2:
481 481 selectedData = self.__selectDataMode2(listData[j], blockIndex)
482 482 setattr(self.dataOut, listDataname[j], selectedData)
483 483 return
484 484
485 485 def __selectDataMode2(self, data, blockIndex):
486 486 utctime = data[:,0]
487 487 aux, indices = numpy.unique(utctime, return_inverse=True)
488 488 selInd = numpy.where(indices == blockIndex)[0]
489 489 selData = data[selInd,:]
490 490
491 491 return selData
492 492
493 493 def getData(self):
494 494
495 495 # if self.flagNoMoreFiles:
496 496 # self.dataOut.flagNoData = True
497 497 # print 'Process finished'
498 498 # return 0
499 499 #
500 500 if self.blockIndex==self.blocksPerFile:
501 501 if not( self.__setNextFileOffline() ):
502 502 self.dataOut.flagNoData = True
503 503 return 0
504 504
505 505 # if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
506 506 # self.dataOut.flagNoData = True
507 507 # return 0
508 508 # self.__readData()
509 509 self.__setDataOut()
510 510 self.dataOut.flagNoData = False
511 511
512 512 self.blockIndex += 1
513 513
514 514 return
515 515
516 516 def run(self, **kwargs):
517 517
518 518 if not(self.isConfig):
519 519 self.setup(**kwargs)
520 520 # self.setObjProperties()
521 521 self.isConfig = True
522 522
523 523 self.getData()
524 524
525 525 return
526 526
527 527 class HDF5Writer(Operation):
528 528 '''
529 529 HDF5 Writer, stores parameters data in HDF5 format files
530 530
531 531 path: path where the files will be stored
532 532
533 533 blocksPerFile: number of blocks that will be saved in per HDF5 format file
534 534
535 535 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
536 536
537 537 metadataList: list of attributes that will be stored as metadata
538 538
539 539 dataList: list of attributes that will be stores as data
540 540
541 541 '''
542 542
543 543
544 544 ext = ".hdf5"
545 545
546 546 optchar = "D"
547 547
548 548 metaoptchar = "M"
549 549
550 550 metaFile = None
551 551
552 552 filename = None
553 553
554 554 path = None
555 555
556 556 setFile = None
557 557
558 558 fp = None
559 559
560 560 grp = None
561 561
562 562 ds = None
563 563
564 564 firsttime = True
565 565
566 566 #Configurations
567 567
568 568 blocksPerFile = None
569 569
570 570 blockIndex = None
571 571
572 572 dataOut = None
573 573
574 574 #Data Arrays
575 575
576 576 dataList = None
577 577
578 578 metadataList = None
579 579
580 580 # arrayDim = None
581 581
582 582 dsList = None #List of dictionaries with dataset properties
583 583
584 584 tableDim = None
585 585
586 586 # dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
587 587
588 588 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
589 589
590 590 currentDay = None
591 591
592 592 def __init__(self):
593 593
594 594 Operation.__init__(self)
595 595 self.isConfig = False
596 596 return
597 597
598 598 def setup(self, dataOut, **kwargs):
599 599
600 600 self.path = kwargs['path']
601 601
602 602 if kwargs.has_key('blocksPerFile'):
603 603 self.blocksPerFile = kwargs['blocksPerFile']
604 604 else:
605 605 self.blocksPerFile = 10
606 606
607 607 self.metadataList = kwargs['metadataList']
608 608 self.dataList = kwargs['dataList']
609 609 self.dataOut = dataOut
610 610
611 611 if kwargs.has_key('mode'):
612 612 mode = kwargs['mode']
613 613
614 614 if type(mode) == int:
615 615 mode = numpy.zeros(len(self.dataList)) + mode
616 616 else:
617 617 mode = numpy.ones(len(self.dataList))
618 618
619 619 self.mode = mode
620 620
621 621 arrayDim = numpy.zeros((len(self.dataList),5))
622 622
623 623 #Table dimensions
624 624 dtype0 = self.dtype
625 625 tableList = []
626 626
627 627 #Dictionary and list of tables
628 628 dsList = []
629 629
630 630 for i in range(len(self.dataList)):
631 631 dsDict = {}
632 632 dataAux = getattr(self.dataOut, self.dataList[i])
633 633 dsDict['variable'] = self.dataList[i]
634 634 #--------------------- Conditionals ------------------------
635 635 #There is no data
636 636 if dataAux == None:
637 637 return 0
638 638
639 639 #Not array, just a number
640 640 #Mode 0
641 641 if type(dataAux)==float or type(dataAux)==int:
642 642 dsDict['mode'] = 0
643 643 dsDict['nDim'] = 0
644 644 arrayDim[i,0] = 0
645 645 dsList.append(dsDict)
646 646
647 647 #Mode 2: meteors
648 648 elif mode[i] == 2:
649 649 # dsDict['nDim'] = 0
650 650 dsDict['dsName'] = 'table0'
651 651 dsDict['mode'] = 2 # Mode meteors
652 652 dsDict['shape'] = dataAux.shape[-1]
653 653 dsDict['nDim'] = 0
654 654 dsDict['dsNumber'] = 1
655 655
656 656 arrayDim[i,3] = dataAux.shape[-1]
657 657 arrayDim[i,4] = mode[i] #Mode the data was stored
658 658
659 659 dsList.append(dsDict)
660 660
661 661 #Mode 1
662 662 else:
663 663 arrayDim0 = dataAux.shape #Data dimensions
664 664 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
665 665 arrayDim[i,4] = mode[i] #Mode the data was stored
666 666
667 667 strtable = 'table'
668 668 dsDict['mode'] = 1 # Mode parameters
669 669
670 670 # Three-dimension arrays
671 671 if len(arrayDim0) == 3:
672 672 arrayDim[i,1:-1] = numpy.array(arrayDim0)
673 673 nTables = int(arrayDim[i,2])
674 674 dsDict['dsNumber'] = nTables
675 675 dsDict['shape'] = arrayDim[i,2:4]
676 676 dsDict['nDim'] = 3
677 677
678 678 for j in range(nTables):
679 679 dsDict = dsDict.copy()
680 680 dsDict['dsName'] = strtable + str(j)
681 681 dsList.append(dsDict)
682 682
683 683 # Two-dimension arrays
684 684 elif len(arrayDim0) == 2:
685 685 arrayDim[i,2:-1] = numpy.array(arrayDim0)
686 686 nTables = int(arrayDim[i,2])
687 687 dsDict['dsNumber'] = nTables
688 688 dsDict['shape'] = arrayDim[i,3]
689 689 dsDict['nDim'] = 2
690 690
691 691 for j in range(nTables):
692 692 dsDict = dsDict.copy()
693 693 dsDict['dsName'] = strtable + str(j)
694 694 dsList.append(dsDict)
695 695
696 696 # One-dimension arrays
697 697 elif len(arrayDim0) == 1:
698 698 arrayDim[i,3] = arrayDim0[0]
699 699 dsDict['shape'] = arrayDim0[0]
700 700 dsDict['dsNumber'] = 1
701 701 dsDict['dsName'] = strtable + str(0)
702 702 dsDict['nDim'] = 1
703 703 dsList.append(dsDict)
704 704
705 705 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
706 706 tableList.append(table)
707 707
708 708 # self.arrayDim = arrayDim
709 709 self.dsList = dsList
710 710 self.tableDim = numpy.array(tableList, dtype = dtype0)
711 711 self.blockIndex = 0
712 712
713 713 timeTuple = time.localtime(dataOut.utctime)
714 714 self.currentDay = timeTuple.tm_yday
715 715 return 1
716 716
717 717 def putMetadata(self):
718 718
719 719 fp = self.createMetadataFile()
720 720 self.writeMetadata(fp)
721 721 fp.close()
722 722 return
723 723
724 724 def createMetadataFile(self):
725 725 ext = self.ext
726 726 path = self.path
727 727 setFile = self.setFile
728 728
729 729 timeTuple = time.localtime(self.dataOut.utctime)
730 730
731 731 subfolder = ''
732 732 fullpath = os.path.join( path, subfolder )
733 733
734 734 if not( os.path.exists(fullpath) ):
735 735 os.mkdir(fullpath)
736 736 setFile = -1 #inicializo mi contador de seteo
737 737
738 738 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
739 739 fullpath = os.path.join( path, subfolder )
740 740
741 741 if not( os.path.exists(fullpath) ):
742 742 os.mkdir(fullpath)
743 743 setFile = -1 #inicializo mi contador de seteo
744 744
745 745 else:
746 746 filesList = os.listdir( fullpath )
747 747 filesList = sorted( filesList, key=str.lower )
748 748 if len( filesList ) > 0:
749 749 filesList = [k for k in filesList if 'M' in k]
750 750 filen = filesList[-1]
751 751 # el filename debera tener el siguiente formato
752 752 # 0 1234 567 89A BCDE (hex)
753 753 # x YYYY DDD SSS .ext
754 754 if isNumber( filen[8:11] ):
755 755 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
756 756 else:
757 757 setFile = -1
758 758 else:
759 759 setFile = -1 #inicializo mi contador de seteo
760 760
761 761 setFile += 1
762 762
763 763 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
764 764 timeTuple.tm_year,
765 765 timeTuple.tm_yday,
766 766 setFile,
767 767 ext )
768 768
769 769 filename = os.path.join( path, subfolder, file )
770 770 self.metaFile = file
771 771 #Setting HDF5 File
772 772 fp = h5py.File(filename,'w')
773 773
774 774 return fp
775 775
776 776 def writeMetadata(self, fp):
777 777
778 778 grp = fp.create_group("Metadata")
779 779 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
780 780
781 781 for i in range(len(self.metadataList)):
782 782 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
783 783 return
784 784
785 785 def dateFlag(self):
786 786
787 787 timeTuple = time.localtime(self.dataOut.utctime)
788 788 dataDay = timeTuple.tm_yday
789 789
790 790 if dataDay == self.currentDay:
791 791 return False
792 792
793 793 self.currentDay = dataDay
794 794 return True
795 795
796 796 def setNextFile(self):
797 797
798 798 ext = self.ext
799 799 path = self.path
800 800 setFile = self.setFile
801 801 mode = self.mode
802 802
803 803 timeTuple = time.localtime(self.dataOut.utctime)
804 804 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
805 805
806 806 fullpath = os.path.join( path, subfolder )
807 807
808 808 if os.path.exists(fullpath):
809 809 filesList = os.listdir( fullpath )
810 810 filesList = [k for k in filesList if 'D' in k]
811 811 if len( filesList ) > 0:
812 812 filesList = sorted( filesList, key=str.lower )
813 813 filen = filesList[-1]
814 814 # el filename debera tener el siguiente formato
815 815 # 0 1234 567 89A BCDE (hex)
816 816 # x YYYY DDD SSS .ext
817 817 if isNumber( filen[8:11] ):
818 818 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
819 819 else:
820 820 setFile = -1
821 821 else:
822 822 setFile = -1 #inicializo mi contador de seteo
823 823 else:
824 824 os.mkdir(fullpath)
825 825 setFile = -1 #inicializo mi contador de seteo
826 826
827 827 setFile += 1
828 828
829 829 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
830 830 timeTuple.tm_year,
831 831 timeTuple.tm_yday,
832 832 setFile,
833 833 ext )
834 834
835 835 filename = os.path.join( path, subfolder, file )
836 836
837 837 #Setting HDF5 File
838 838 fp = h5py.File(filename,'w')
839 839 #write metadata
840 840 self.writeMetadata(fp)
841 841 #Write data
842 842 grp = fp.create_group("Data")
843 843 # grp.attrs['metadata'] = self.metaFile
844 844
845 845 # grp.attrs['blocksPerFile'] = 0
846 846 ds = []
847 847 data = []
848 848 dsList = self.dsList
849 849 i = 0
850 850 while i < len(dsList):
851 851 dsInfo = dsList[i]
852 852 #One-dimension data
853 853 if dsInfo['mode'] == 0:
854 854 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
855 855 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
856 856 ds.append(ds0)
857 857 data.append([])
858 858 i += 1
859 859 continue
860 860 # nDimsForDs.append(nDims[i])
861 861
862 862 elif dsInfo['mode'] == 2:
863 863 grp0 = grp.create_group(dsInfo['variable'])
864 864 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
865 865 ds.append(ds0)
866 866 data.append([])
867 867 i += 1
868 868 continue
869 869
870 870 elif dsInfo['mode'] == 1:
871 871 grp0 = grp.create_group(dsInfo['variable'])
872 872
873 873 for j in range(dsInfo['dsNumber']):
874 874 dsInfo = dsList[i]
875 875 tableName = dsInfo['dsName']
876 876 shape = dsInfo['shape']
877 877
878 878 if dsInfo['nDim'] == 3:
879 879 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
880 880 else:
881 881 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
882 882
883 883 ds.append(ds0)
884 884 data.append([])
885 885 i += 1
886 886 # nDimsForDs.append(nDims[i])
887 887
888 888 fp.flush()
889 889 fp.close()
890 890
891 891 # self.nDatas = nDatas
892 892 # self.nDims = nDims
893 893 # self.nDimsForDs = nDimsForDs
894 894 #Saving variables
895 895 print 'Writing the file: %s'%filename
896 896 self.filename = filename
897 897 # self.fp = fp
898 898 # self.grp = grp
899 899 # self.grp.attrs.modify('nRecords', 1)
900 900 self.ds = ds
901 901 self.data = data
902 902 # self.setFile = setFile
903 903 self.firsttime = True
904 904 self.blockIndex = 0
905 905 return
906 906
907 907 def putData(self):
908 908
909 909 if self.blockIndex == self.blocksPerFile or self.dateFlag():
910 910 self.setNextFile()
911 911
912 912 # if not self.firsttime:
913 913 self.readBlock()
914 914 self.setBlock() #Prepare data to be written
915 915 self.writeBlock() #Write data
916 916
917 917 return
918 918
919 919 def readBlock(self):
920 920
921 921 '''
922 922 data Array configured
923 923
924 924
925 925 self.data
926 926 '''
927 927 dsList = self.dsList
928 928 ds = self.ds
929 929 #Setting HDF5 File
930 930 fp = h5py.File(self.filename,'r+')
931 931 grp = fp["Data"]
932 932 ind = 0
933 933
934 934 # grp.attrs['blocksPerFile'] = 0
935 935 while ind < len(dsList):
936 936 dsInfo = dsList[ind]
937 937
938 938 if dsInfo['mode'] == 0:
939 939 ds0 = grp[dsInfo['variable']]
940 940 ds[ind] = ds0
941 941 ind += 1
942 942 else:
943 943
944 944 grp0 = grp[dsInfo['variable']]
945 945
946 946 for j in range(dsInfo['dsNumber']):
947 947 dsInfo = dsList[ind]
948 948 ds0 = grp0[dsInfo['dsName']]
949 949 ds[ind] = ds0
950 950 ind += 1
951 951
952 952 self.fp = fp
953 953 self.grp = grp
954 954 self.ds = ds
955 955
956 956 return
957 957
958 958 def setBlock(self):
959 959 '''
960 960 data Array configured
961 961
962 962
963 963 self.data
964 964 '''
965 965 #Creating Arrays
966 966 dsList = self.dsList
967 967 data = self.data
968 968 ind = 0
969 969
970 970 while ind < len(dsList):
971 971 dsInfo = dsList[ind]
972 972 dataAux = getattr(self.dataOut, dsInfo['variable'])
973 973
974 974 mode = dsInfo['mode']
975 975 nDim = dsInfo['nDim']
976 976
977 977 if mode == 0 or mode == 2 or nDim == 1:
978 978 data[ind] = dataAux
979 979 ind += 1
980 980 # elif nDim == 1:
981 981 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
982 982 # ind += 1
983 983 elif nDim == 2:
984 984 for j in range(dsInfo['dsNumber']):
985 985 data[ind] = dataAux[j,:]
986 986 ind += 1
987 987 elif nDim == 3:
988 988 for j in range(dsInfo['dsNumber']):
989 989 data[ind] = dataAux[:,j,:]
990 990 ind += 1
991 991
992 992 self.data = data
993 993 return
994 994
995 995 def writeBlock(self):
996 996 '''
997 997 Saves the block in the HDF5 file
998 998 '''
999 999 dsList = self.dsList
1000 1000
1001 1001 for i in range(len(self.ds)):
1002 1002 dsInfo = dsList[i]
1003 1003 nDim = dsInfo['nDim']
1004 1004 mode = dsInfo['mode']
1005 1005
1006 1006 # First time
1007 1007 if self.firsttime:
1008 1008 # self.ds[i].resize(self.data[i].shape)
1009 1009 # self.ds[i][self.blockIndex,:] = self.data[i]
1010 1010 if type(self.data[i]) == numpy.ndarray:
1011 1011
1012 1012 if nDim == 3:
1013 1013 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
1014 1014 self.ds[i].resize(self.data[i].shape)
1015 1015 if mode == 2:
1016 1016 self.ds[i].resize(self.data[i].shape)
1017 1017 self.ds[i][:] = self.data[i]
1018 1018 else:
1019 1019
1020 1020 # From second time
1021 1021 # Meteors!
1022 1022 if mode == 2:
1023 1023 dataShape = self.data[i].shape
1024 1024 dsShape = self.ds[i].shape
1025 1025 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
1026 1026 self.ds[i][dsShape[0]:,:] = self.data[i]
1027 1027 # No dimension
1028 1028 elif mode == 0:
1029 1029 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
1030 1030 self.ds[i][0,-1] = self.data[i]
1031 1031 # One dimension
1032 1032 elif nDim == 1:
1033 1033 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
1034 1034 self.ds[i][-1,:] = self.data[i]
1035 1035 # Two dimension
1036 1036 elif nDim == 2:
1037 1037 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
1038 1038 self.ds[i][self.blockIndex,:] = self.data[i]
1039 1039 # Three dimensions
1040 1040 elif nDim == 3:
1041 1041 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
1042 1042 self.ds[i][:,:,-1] = self.data[i]
1043 1043
1044 1044 self.firsttime = False
1045 1045 self.blockIndex += 1
1046 1046
1047 1047 #Close to save changes
1048 1048 self.fp.flush()
1049 1049 self.fp.close()
1050 1050 return
1051 1051
1052 1052 def run(self, dataOut, **kwargs):
1053 1053
1054 1054 if not(self.isConfig):
1055 1055 flagdata = self.setup(dataOut, **kwargs)
1056 1056
1057 1057 if not(flagdata):
1058 1058 return
1059 1059
1060 1060 self.isConfig = True
1061 1061 # self.putMetadata()
1062 1062 self.setNextFile()
1063 1063
1064 1064 self.putData()
1065 1065 return
1066 1066
1067 1067
General Comments 0
You need to be logged in to leave comments. Login now