##// END OF EJS Templates
Modifications in Param Writer, files now split in two when there is a big difference in time between contiguous data
Julio Valdez -
r853:3ad9b63d79aa
parent child
Show More
@@ -1,1077 +1,1092
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
10 10 # from jroIO_base import *
11 11 from schainpy.model.io.jroIO_base import *
12 12 import schainpy
13 13
14 14
15 15 class ParamReader(ProcessingUnit):
16 16 '''
17 17 Reads HDF5 format files
18 18
19 19 path
20 20
21 21 startDate
22 22
23 23 endDate
24 24
25 25 startTime
26 26
27 27 endTime
28 28 '''
29 29
30 30 ext = ".hdf5"
31 31
32 32 optchar = "D"
33 33
34 34 timezone = None
35 35
36 36 startTime = None
37 37
38 38 endTime = None
39 39
40 40 fileIndex = None
41 41
42 42 utcList = None #To select data in the utctime list
43 43
44 44 blockList = None #List to blocks to be read from the file
45 45
46 46 blocksPerFile = None #Number of blocks to be read
47 47
48 48 blockIndex = None
49 49
50 50 path = None
51 51
52 52 #List of Files
53 53
54 54 filenameList = None
55 55
56 56 datetimeList = None
57 57
58 58 #Hdf5 File
59 59
60 60 listMetaname = None
61 61
62 62 listMeta = None
63 63
64 64 listDataname = None
65 65
66 66 listData = None
67 67
68 68 listShapes = None
69 69
70 70 fp = None
71 71
72 72 #dataOut reconstruction
73 73
74 74 dataOut = None
75 75
76 76
77 77 def __init__(self):
78 78 self.dataOut = Parameters()
79 79 return
80 80
81 81 def setup(self, **kwargs):
82 82
83 83 path = kwargs['path']
84 84 startDate = kwargs['startDate']
85 85 endDate = kwargs['endDate']
86 86 startTime = kwargs['startTime']
87 87 endTime = kwargs['endTime']
88 88 walk = kwargs['walk']
89 89 if kwargs.has_key('ext'):
90 90 ext = kwargs['ext']
91 91 else:
92 92 ext = '.hdf5'
93 93 if kwargs.has_key('timezone'):
94 94 self.timezone = kwargs['timezone']
95 95 else:
96 96 self.timezone = 'lt'
97 97
98 98 print "[Reading] Searching files in offline mode ..."
99 99 pathList, filenameList = self.__searchFilesOffLine(path, startDate=startDate, endDate=endDate,
100 100 startTime=startTime, endTime=endTime,
101 101 ext=ext, walk=walk)
102 102
103 103 if not(filenameList):
104 104 print "There is no files into the folder: %s"%(path)
105 105 sys.exit(-1)
106 106
107 107 self.fileIndex = -1
108 108 self.startTime = startTime
109 109 self.endTime = endTime
110 110
111 111 self.__readMetadata()
112 112
113 113 self.__setNextFileOffline()
114 114
115 115 return
116 116
117 117 def __searchFilesOffLine(self,
118 118 path,
119 119 startDate=None,
120 120 endDate=None,
121 121 startTime=datetime.time(0,0,0),
122 122 endTime=datetime.time(23,59,59),
123 123 ext='.hdf5',
124 124 walk=True):
125 125
126 126 expLabel = ''
127 127 self.filenameList = []
128 128 self.datetimeList = []
129 129
130 130 pathList = []
131 131
132 132 JRODataObj = JRODataReader()
133 133 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
134 134
135 135 if dateList == []:
136 136 print "[Reading] No *%s files in %s from %s to %s)"%(ext, path,
137 137 datetime.datetime.combine(startDate,startTime).ctime(),
138 138 datetime.datetime.combine(endDate,endTime).ctime())
139 139
140 140 return None, None
141 141
142 142 if len(dateList) > 1:
143 143 print "[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate)
144 144 else:
145 145 print "[Reading] data was found for the date %s" %(dateList[0])
146 146
147 147 filenameList = []
148 148 datetimeList = []
149 149
150 150 #----------------------------------------------------------------------------------
151 151
152 152 for thisPath in pathList:
153 153 # thisPath = pathList[pathDict[file]]
154 154
155 155 fileList = glob.glob1(thisPath, "*%s" %ext)
156 156 fileList.sort()
157 157
158 158 for file in fileList:
159 159
160 160 filename = os.path.join(thisPath,file)
161 161
162 162 if not isFileInDateRange(filename, startDate, endDate):
163 163 continue
164 164
165 165 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
166 166
167 167 if not(thisDatetime):
168 168 continue
169 169
170 170 filenameList.append(filename)
171 171 datetimeList.append(thisDatetime)
172 172
173 173 if not(filenameList):
174 174 print "[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime())
175 175 return None, None
176 176
177 177 print "[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime)
178 178 print
179 179
180 180 for i in range(len(filenameList)):
181 181 print "[Reading] %s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
182 182
183 183 self.filenameList = filenameList
184 184 self.datetimeList = datetimeList
185 185
186 186 return pathList, filenameList
187 187
188 188 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
189 189
190 190 """
191 191 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
192 192
193 193 Inputs:
194 194 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
195 195
196 196 startDate : fecha inicial del rango seleccionado en formato datetime.date
197 197
198 198 endDate : fecha final del rango seleccionado en formato datetime.date
199 199
200 200 startTime : tiempo inicial del rango seleccionado en formato datetime.time
201 201
202 202 endTime : tiempo final del rango seleccionado en formato datetime.time
203 203
204 204 Return:
205 205 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
206 206 fecha especificado, de lo contrario retorna False.
207 207
208 208 Excepciones:
209 209 Si el archivo no existe o no puede ser abierto
210 210 Si la cabecera no puede ser leida.
211 211
212 212 """
213 213
214 214 try:
215 215 fp = h5py.File(filename,'r')
216 216 grp1 = fp['Data']
217 217
218 218 except IOError:
219 219 traceback.print_exc()
220 220 raise IOError, "The file %s can't be opened" %(filename)
221 221 #chino rata
222 222 #In case has utctime attribute
223 223 grp2 = grp1['utctime']
224 224 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
225 225 thisUtcTime = grp2.value[0]
226 226
227 227 fp.close()
228 228
229 229 if self.timezone == 'lt':
230 230 thisUtcTime -= 5*3600
231 231
232 232 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
233 233 # thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
234 234 thisDate = thisDatetime.date()
235 235 thisTime = thisDatetime.time()
236 236
237 237 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
238 238 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
239 239
240 240 #General case
241 241 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
242 242 #-----------o----------------------------o-----------
243 243 # startTime endTime
244 244
245 245 if endTime >= startTime:
246 246 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
247 247 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
248 248 return thisDatetime
249 249 return None
250 250
251 251 #If endTime < startTime then endTime belongs to the next day
252 252 #<<<<<<<<<<<o o>>>>>>>>>>>
253 253 #-----------o----------------------------o-----------
254 254 # endTime startTime
255 255
256 256 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
257 257 return None
258 258
259 259 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
260 260 return None
261 261
262 262 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
263 263 return None
264 264
265 265 return thisDatetime
266 266
267 267 def __setNextFileOffline(self):
268 268
269 269 self.fileIndex += 1
270 270 idFile = self.fileIndex
271 271
272 272 if not(idFile < len(self.filenameList)):
273 273 print "No more Files"
274 274 return 0
275 275
276 276 filename = self.filenameList[idFile]
277 277
278 278 filePointer = h5py.File(filename,'r')
279 279
280 280 self.filename = filename
281 281
282 282 self.fp = filePointer
283 283
284 284 print "Setting the file: %s"%self.filename
285 285
286 286 # self.__readMetadata()
287 287 self.__setBlockList()
288 288 self.__readData()
289 289 # self.nRecords = self.fp['Data'].attrs['blocksPerFile']
290 290 # self.nRecords = self.fp['Data'].attrs['nRecords']
291 291 self.blockIndex = 0
292 292 return 1
293 293
294 294 def __setBlockList(self):
295 295 '''
296 296 Selects the data within the times defined
297 297
298 298 self.fp
299 299 self.startTime
300 300 self.endTime
301 301
302 302 self.blockList
303 303 self.blocksPerFile
304 304
305 305 '''
306 306 fp = self.fp
307 307 startTime = self.startTime
308 308 endTime = self.endTime
309 309
310 310 grp = fp['Data']
311 311 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
312 312
313 313 #ERROOOOR
314 314 if self.timezone == 'lt':
315 315 thisUtcTime -= 5*3600
316 316
317 317 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
318 318
319 319 thisDate = thisDatetime.date()
320 320 thisTime = thisDatetime.time()
321 321
322 322 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
323 323 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
324 324
325 325 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
326 326
327 327 self.blockList = ind
328 328 self.blocksPerFile = len(ind)
329 329
330 330 return
331 331
332 332 def __readMetadata(self):
333 333 '''
334 334 Reads Metadata
335 335
336 336 self.pathMeta
337 337
338 338 self.listShapes
339 339 self.listMetaname
340 340 self.listMeta
341 341
342 342 '''
343 343
344 344 # grp = self.fp['Data']
345 345 # pathMeta = os.path.join(self.path, grp.attrs['metadata'])
346 346 #
347 347 # if pathMeta == self.pathMeta:
348 348 # return
349 349 # else:
350 350 # self.pathMeta = pathMeta
351 351 #
352 352 # filePointer = h5py.File(self.pathMeta,'r')
353 353 # groupPointer = filePointer['Metadata']
354 354
355 355 filename = self.filenameList[0]
356 356
357 357 fp = h5py.File(filename,'r')
358 358
359 359 gp = fp['Metadata']
360 360
361 361 listMetaname = []
362 362 listMetadata = []
363 363 for item in gp.items():
364 364 name = item[0]
365 365
366 366 if name=='array dimensions':
367 367 table = gp[name][:]
368 368 listShapes = {}
369 369 for shapes in table:
370 370 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
371 371 else:
372 372 data = gp[name].value
373 373 listMetaname.append(name)
374 374 listMetadata.append(data)
375 375
376 376 # if name=='type':
377 377 # self.__initDataOut(data)
378 378
379 379 self.listShapes = listShapes
380 380 self.listMetaname = listMetaname
381 381 self.listMeta = listMetadata
382 382
383 383 fp.close()
384 384 return
385 385
386 386 def __readData(self):
387 387 grp = self.fp['Data']
388 388 listdataname = []
389 389 listdata = []
390 390
391 391 for item in grp.items():
392 392 name = item[0]
393 393 listdataname.append(name)
394 394
395 395 array = self.__setDataArray(grp[name],self.listShapes[name])
396 396 listdata.append(array)
397 397
398 398 self.listDataname = listdataname
399 399 self.listData = listdata
400 400 return
401 401
402 402 def __setDataArray(self, dataset, shapes):
403 403
404 404 nDims = shapes[0]
405 405
406 406 nDim2 = shapes[1] #Dimension 0
407 407
408 408 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
409 409
410 410 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
411 411
412 412 mode = shapes[4] #Mode of storing
413 413
414 414 blockList = self.blockList
415 415
416 416 blocksPerFile = self.blocksPerFile
417 417
418 418 #Depending on what mode the data was stored
419 419 if mode == 0: #Divided in channels
420 420 arrayData = dataset.value.astype(numpy.float)[0][blockList]
421 421 if mode == 1: #Divided in parameter
422 422 strds = 'table'
423 423 nDatas = nDim1
424 424 newShapes = (blocksPerFile,nDim2,nDim0)
425 425 elif mode==2: #Concatenated in a table
426 426 strds = 'table0'
427 427 arrayData = dataset[strds].value
428 428 #Selecting part of the dataset
429 429 utctime = arrayData[:,0]
430 430 u, indices = numpy.unique(utctime, return_index=True)
431 431
432 432 if blockList.size != indices.size:
433 433 indMin = indices[blockList[0]]
434 434 if blockList[-1] + 1 >= indices.size:
435 435 arrayData = arrayData[indMin:,:]
436 436 else:
437 437 indMax = indices[blockList[-1] + 1]
438 438 arrayData = arrayData[indMin:indMax,:]
439 439 return arrayData
440 440
441 441 #------- One dimension ---------------
442 442 if nDims == 0:
443 443 arrayData = dataset.value.astype(numpy.float)[0][blockList]
444 444
445 445 #------- Two dimensions -----------
446 446 elif nDims == 2:
447 447 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
448 448 newShapes = (blocksPerFile,nDim0)
449 449 nDatas = nDim1
450 450
451 451 for i in range(nDatas):
452 452 data = dataset[strds + str(i)].value
453 453 arrayData[:,i,:] = data[blockList,:]
454 454
455 455 #------- Three dimensions ---------
456 456 else:
457 457 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
458 458 for i in range(nDatas):
459 459
460 460 data = dataset[strds + str(i)].value
461 461
462 462 for b in range(blockList.size):
463 463 arrayData[b,:,i,:] = data[:,:,blockList[b]]
464 464
465 465 return arrayData
466 466
467 467 def __setDataOut(self):
468 468 listMeta = self.listMeta
469 469 listMetaname = self.listMetaname
470 470 listDataname = self.listDataname
471 471 listData = self.listData
472 472 listShapes = self.listShapes
473 473
474 474 blockIndex = self.blockIndex
475 475 # blockList = self.blockList
476 476
477 477 for i in range(len(listMeta)):
478 478 setattr(self.dataOut,listMetaname[i],listMeta[i])
479 479
480 480 for j in range(len(listData)):
481 481 nShapes = listShapes[listDataname[j]][0]
482 482 mode = listShapes[listDataname[j]][4]
483 483 if nShapes == 1:
484 484 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
485 485 elif nShapes > 1:
486 486 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
487 487 elif mode==0:
488 488 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
489 489 #Mode Meteors
490 490 elif mode ==2:
491 491 selectedData = self.__selectDataMode2(listData[j], blockIndex)
492 492 setattr(self.dataOut, listDataname[j], selectedData)
493 493 return
494 494
495 495 def __selectDataMode2(self, data, blockIndex):
496 496 utctime = data[:,0]
497 497 aux, indices = numpy.unique(utctime, return_inverse=True)
498 498 selInd = numpy.where(indices == blockIndex)[0]
499 499 selData = data[selInd,:]
500 500
501 501 return selData
502 502
503 503 def getData(self):
504 504
505 505 # if self.flagNoMoreFiles:
506 506 # self.dataOut.flagNoData = True
507 507 # print 'Process finished'
508 508 # return 0
509 509 #
510 510 if self.blockIndex==self.blocksPerFile:
511 511 if not( self.__setNextFileOffline() ):
512 512 self.dataOut.flagNoData = True
513 513 return 0
514 514
515 515 # if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
516 516 # self.dataOut.flagNoData = True
517 517 # return 0
518 518 # self.__readData()
519 519 self.__setDataOut()
520 520 self.dataOut.flagNoData = False
521 521
522 522 self.blockIndex += 1
523 523
524 524 return
525 525
526 526 def run(self, **kwargs):
527 527
528 528 if not(self.isConfig):
529 529 self.setup(**kwargs)
530 530 # self.setObjProperties()
531 531 self.isConfig = True
532 532
533 533 self.getData()
534 534
535 535 return
536 536
537 537 class ParamWriter(Operation):
538 538 '''
539 539 HDF5 Writer, stores parameters data in HDF5 format files
540 540
541 541 path: path where the files will be stored
542 542
543 543 blocksPerFile: number of blocks that will be saved in per HDF5 format file
544 544
545 545 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
546 546
547 547 metadataList: list of attributes that will be stored as metadata
548 548
549 549 dataList: list of attributes that will be stores as data
550 550
551 551 '''
552 552
553 553
554 554 ext = ".hdf5"
555 555
556 556 optchar = "D"
557 557
558 558 metaoptchar = "M"
559 559
560 560 metaFile = None
561 561
562 562 filename = None
563 563
564 564 path = None
565 565
566 566 setFile = None
567 567
568 568 fp = None
569 569
570 570 grp = None
571 571
572 572 ds = None
573 573
574 574 firsttime = True
575 575
576 576 #Configurations
577 577
578 578 blocksPerFile = None
579 579
580 580 blockIndex = None
581 581
582 582 dataOut = None
583 583
584 584 #Data Arrays
585 585
586 586 dataList = None
587 587
588 588 metadataList = None
589 589
590 590 # arrayDim = None
591 591
592 592 dsList = None #List of dictionaries with dataset properties
593 593
594 594 tableDim = None
595 595
596 596 # dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
597 597
598 598 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
599 599
600 600 currentDay = None
601 601
602 lastTime = None
603
602 604 def __init__(self):
603 605
604 606 Operation.__init__(self)
605 607 self.isConfig = False
606 608 return
607 609
608 610 def setup(self, dataOut, **kwargs):
609 611
610 612 self.path = kwargs['path']
611 613
612 614 if kwargs.has_key('blocksPerFile'):
613 615 self.blocksPerFile = kwargs['blocksPerFile']
614 616 else:
615 617 self.blocksPerFile = 10
616 618
617 619 self.metadataList = kwargs['metadataList']
618 620 self.dataList = kwargs['dataList']
619 621 self.dataOut = dataOut
620 622
621 623 if kwargs.has_key('mode'):
622 624 mode = kwargs['mode']
623 625
624 626 if type(mode) == int:
625 627 mode = numpy.zeros(len(self.dataList)) + mode
626 628 else:
627 629 mode = numpy.ones(len(self.dataList))
628 630
629 631 self.mode = mode
630 632
631 633 arrayDim = numpy.zeros((len(self.dataList),5))
632 634
633 635 #Table dimensions
634 636 dtype0 = self.dtype
635 637 tableList = []
636 638
637 639 #Dictionary and list of tables
638 640 dsList = []
639 641
640 642 for i in range(len(self.dataList)):
641 643 dsDict = {}
642 644 dataAux = getattr(self.dataOut, self.dataList[i])
643 645 dsDict['variable'] = self.dataList[i]
644 646 #--------------------- Conditionals ------------------------
645 647 #There is no data
646 648 if dataAux == None:
647 649 return 0
648 650
649 651 #Not array, just a number
650 652 #Mode 0
651 653 if type(dataAux)==float or type(dataAux)==int:
652 654 dsDict['mode'] = 0
653 655 dsDict['nDim'] = 0
654 656 arrayDim[i,0] = 0
655 657 dsList.append(dsDict)
656 658
657 659 #Mode 2: meteors
658 660 elif mode[i] == 2:
659 661 # dsDict['nDim'] = 0
660 662 dsDict['dsName'] = 'table0'
661 663 dsDict['mode'] = 2 # Mode meteors
662 664 dsDict['shape'] = dataAux.shape[-1]
663 665 dsDict['nDim'] = 0
664 666 dsDict['dsNumber'] = 1
665 667
666 668 arrayDim[i,3] = dataAux.shape[-1]
667 669 arrayDim[i,4] = mode[i] #Mode the data was stored
668 670
669 671 dsList.append(dsDict)
670 672
671 673 #Mode 1
672 674 else:
673 675 arrayDim0 = dataAux.shape #Data dimensions
674 676 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
675 677 arrayDim[i,4] = mode[i] #Mode the data was stored
676 678
677 679 strtable = 'table'
678 680 dsDict['mode'] = 1 # Mode parameters
679 681
680 682 # Three-dimension arrays
681 683 if len(arrayDim0) == 3:
682 684 arrayDim[i,1:-1] = numpy.array(arrayDim0)
683 685 nTables = int(arrayDim[i,2])
684 686 dsDict['dsNumber'] = nTables
685 687 dsDict['shape'] = arrayDim[i,2:4]
686 688 dsDict['nDim'] = 3
687 689
688 690 for j in range(nTables):
689 691 dsDict = dsDict.copy()
690 692 dsDict['dsName'] = strtable + str(j)
691 693 dsList.append(dsDict)
692 694
693 695 # Two-dimension arrays
694 696 elif len(arrayDim0) == 2:
695 697 arrayDim[i,2:-1] = numpy.array(arrayDim0)
696 698 nTables = int(arrayDim[i,2])
697 699 dsDict['dsNumber'] = nTables
698 700 dsDict['shape'] = arrayDim[i,3]
699 701 dsDict['nDim'] = 2
700 702
701 703 for j in range(nTables):
702 704 dsDict = dsDict.copy()
703 705 dsDict['dsName'] = strtable + str(j)
704 706 dsList.append(dsDict)
705 707
706 708 # One-dimension arrays
707 709 elif len(arrayDim0) == 1:
708 710 arrayDim[i,3] = arrayDim0[0]
709 711 dsDict['shape'] = arrayDim0[0]
710 712 dsDict['dsNumber'] = 1
711 713 dsDict['dsName'] = strtable + str(0)
712 714 dsDict['nDim'] = 1
713 715 dsList.append(dsDict)
714 716
715 717 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
716 718 tableList.append(table)
717 719
718 720 # self.arrayDim = arrayDim
719 721 self.dsList = dsList
720 722 self.tableDim = numpy.array(tableList, dtype = dtype0)
721 723 self.blockIndex = 0
722 724
723 725 timeTuple = time.localtime(dataOut.utctime)
724 726 self.currentDay = timeTuple.tm_yday
725 727 return 1
726 728
727 729 def putMetadata(self):
728 730
729 731 fp = self.createMetadataFile()
730 732 self.writeMetadata(fp)
731 733 fp.close()
732 734 return
733 735
734 736 def createMetadataFile(self):
735 737 ext = self.ext
736 738 path = self.path
737 739 setFile = self.setFile
738 740
739 741 timeTuple = time.localtime(self.dataOut.utctime)
740 742
741 743 subfolder = ''
742 744 fullpath = os.path.join( path, subfolder )
743 745
744 746 if not( os.path.exists(fullpath) ):
745 747 os.mkdir(fullpath)
746 748 setFile = -1 #inicializo mi contador de seteo
747 749
748 750 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
749 751 fullpath = os.path.join( path, subfolder )
750 752
751 753 if not( os.path.exists(fullpath) ):
752 754 os.mkdir(fullpath)
753 755 setFile = -1 #inicializo mi contador de seteo
754 756
755 757 else:
756 758 filesList = os.listdir( fullpath )
757 759 filesList = sorted( filesList, key=str.lower )
758 760 if len( filesList ) > 0:
759 761 filesList = [k for k in filesList if 'M' in k]
760 762 filen = filesList[-1]
761 763 # el filename debera tener el siguiente formato
762 764 # 0 1234 567 89A BCDE (hex)
763 765 # x YYYY DDD SSS .ext
764 766 if isNumber( filen[8:11] ):
765 767 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
766 768 else:
767 769 setFile = -1
768 770 else:
769 771 setFile = -1 #inicializo mi contador de seteo
770 772
771 773 setFile += 1
772 774
773 775 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
774 776 timeTuple.tm_year,
775 777 timeTuple.tm_yday,
776 778 setFile,
777 779 ext )
778 780
779 781 filename = os.path.join( path, subfolder, file )
780 782 self.metaFile = file
781 783 #Setting HDF5 File
782 784 fp = h5py.File(filename,'w')
783 785
784 786 return fp
785 787
786 788 def writeMetadata(self, fp):
787 789
788 790 grp = fp.create_group("Metadata")
789 791 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
790 792
791 793 for i in range(len(self.metadataList)):
792 794 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
793 795 return
794 796
795 def dateFlag(self):
797 def timeFlag(self):
798 currentTime = self.dataOut.utctime
796 799
797 timeTuple = time.localtime(self.dataOut.utctime)
800 if self.lastTime is None:
801 self.lastTime = currentTime
802
803 #Day
804 timeTuple = time.localtime(currentTime)
798 805 dataDay = timeTuple.tm_yday
799 806
800 if dataDay == self.currentDay:
801 return False
807 #Time
808 timeDiff = currentTime - self.lastTime
802 809
803 self.currentDay = dataDay
804 return True
810 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
811 if dataDay != self.currentDay:
812 self.currentDay = dataDay
813 return True
814 elif timeDiff > 3*60*60:
815 self.lastTime = currentTime
816 return True
817 else:
818 self.lastTime = currentTime
819 return False
805 820
806 821 def setNextFile(self):
807 822
808 823 ext = self.ext
809 824 path = self.path
810 825 setFile = self.setFile
811 826 mode = self.mode
812 827
813 828 timeTuple = time.localtime(self.dataOut.utctime)
814 829 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
815 830
816 831 fullpath = os.path.join( path, subfolder )
817 832
818 833 if os.path.exists(fullpath):
819 834 filesList = os.listdir( fullpath )
820 835 filesList = [k for k in filesList if 'D' in k]
821 836 if len( filesList ) > 0:
822 837 filesList = sorted( filesList, key=str.lower )
823 838 filen = filesList[-1]
824 839 # el filename debera tener el siguiente formato
825 840 # 0 1234 567 89A BCDE (hex)
826 841 # x YYYY DDD SSS .ext
827 842 if isNumber( filen[8:11] ):
828 843 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
829 844 else:
830 845 setFile = -1
831 846 else:
832 847 setFile = -1 #inicializo mi contador de seteo
833 848 else:
834 849 os.mkdir(fullpath)
835 850 setFile = -1 #inicializo mi contador de seteo
836 851
837 852 setFile += 1
838 853
839 854 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
840 855 timeTuple.tm_year,
841 856 timeTuple.tm_yday,
842 857 setFile,
843 858 ext )
844 859
845 860 filename = os.path.join( path, subfolder, file )
846 861
847 862 #Setting HDF5 File
848 863 fp = h5py.File(filename,'w')
849 864 #write metadata
850 865 self.writeMetadata(fp)
851 866 #Write data
852 867 grp = fp.create_group("Data")
853 868 # grp.attrs['metadata'] = self.metaFile
854 869
855 870 # grp.attrs['blocksPerFile'] = 0
856 871 ds = []
857 872 data = []
858 873 dsList = self.dsList
859 874 i = 0
860 875 while i < len(dsList):
861 876 dsInfo = dsList[i]
862 877 #One-dimension data
863 878 if dsInfo['mode'] == 0:
864 879 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
865 880 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
866 881 ds.append(ds0)
867 882 data.append([])
868 883 i += 1
869 884 continue
870 885 # nDimsForDs.append(nDims[i])
871 886
872 887 elif dsInfo['mode'] == 2:
873 888 grp0 = grp.create_group(dsInfo['variable'])
874 889 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
875 890 ds.append(ds0)
876 891 data.append([])
877 892 i += 1
878 893 continue
879 894
880 895 elif dsInfo['mode'] == 1:
881 896 grp0 = grp.create_group(dsInfo['variable'])
882 897
883 898 for j in range(dsInfo['dsNumber']):
884 899 dsInfo = dsList[i]
885 900 tableName = dsInfo['dsName']
886 901 shape = dsInfo['shape']
887 902
888 903 if dsInfo['nDim'] == 3:
889 904 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
890 905 else:
891 906 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
892 907
893 908 ds.append(ds0)
894 909 data.append([])
895 910 i += 1
896 911 # nDimsForDs.append(nDims[i])
897 912
898 913 fp.flush()
899 914 fp.close()
900 915
901 916 # self.nDatas = nDatas
902 917 # self.nDims = nDims
903 918 # self.nDimsForDs = nDimsForDs
904 919 #Saving variables
905 920 print 'Writing the file: %s'%filename
906 921 self.filename = filename
907 922 # self.fp = fp
908 923 # self.grp = grp
909 924 # self.grp.attrs.modify('nRecords', 1)
910 925 self.ds = ds
911 926 self.data = data
912 927 # self.setFile = setFile
913 928 self.firsttime = True
914 929 self.blockIndex = 0
915 930 return
916 931
917 932 def putData(self):
918 933
919 if self.blockIndex == self.blocksPerFile or self.dateFlag():
934 if self.blockIndex == self.blocksPerFile or self.timeFlag():
920 935 self.setNextFile()
921 936
922 937 # if not self.firsttime:
923 938 self.readBlock()
924 939 self.setBlock() #Prepare data to be written
925 940 self.writeBlock() #Write data
926 941
927 942 return
928 943
929 944 def readBlock(self):
930 945
931 946 '''
932 947 data Array configured
933 948
934 949
935 950 self.data
936 951 '''
937 952 dsList = self.dsList
938 953 ds = self.ds
939 954 #Setting HDF5 File
940 955 fp = h5py.File(self.filename,'r+')
941 956 grp = fp["Data"]
942 957 ind = 0
943 958
944 959 # grp.attrs['blocksPerFile'] = 0
945 960 while ind < len(dsList):
946 961 dsInfo = dsList[ind]
947 962
948 963 if dsInfo['mode'] == 0:
949 964 ds0 = grp[dsInfo['variable']]
950 965 ds[ind] = ds0
951 966 ind += 1
952 967 else:
953 968
954 969 grp0 = grp[dsInfo['variable']]
955 970
956 971 for j in range(dsInfo['dsNumber']):
957 972 dsInfo = dsList[ind]
958 973 ds0 = grp0[dsInfo['dsName']]
959 974 ds[ind] = ds0
960 975 ind += 1
961 976
962 977 self.fp = fp
963 978 self.grp = grp
964 979 self.ds = ds
965 980
966 981 return
967 982
968 983 def setBlock(self):
969 984 '''
970 985 data Array configured
971 986
972 987
973 988 self.data
974 989 '''
975 990 #Creating Arrays
976 991 dsList = self.dsList
977 992 data = self.data
978 993 ind = 0
979 994
980 995 while ind < len(dsList):
981 996 dsInfo = dsList[ind]
982 997 dataAux = getattr(self.dataOut, dsInfo['variable'])
983 998
984 999 mode = dsInfo['mode']
985 1000 nDim = dsInfo['nDim']
986 1001
987 1002 if mode == 0 or mode == 2 or nDim == 1:
988 1003 data[ind] = dataAux
989 1004 ind += 1
990 1005 # elif nDim == 1:
991 1006 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
992 1007 # ind += 1
993 1008 elif nDim == 2:
994 1009 for j in range(dsInfo['dsNumber']):
995 1010 data[ind] = dataAux[j,:]
996 1011 ind += 1
997 1012 elif nDim == 3:
998 1013 for j in range(dsInfo['dsNumber']):
999 1014 data[ind] = dataAux[:,j,:]
1000 1015 ind += 1
1001 1016
1002 1017 self.data = data
1003 1018 return
1004 1019
1005 1020 def writeBlock(self):
1006 1021 '''
1007 1022 Saves the block in the HDF5 file
1008 1023 '''
1009 1024 dsList = self.dsList
1010 1025
1011 1026 for i in range(len(self.ds)):
1012 1027 dsInfo = dsList[i]
1013 1028 nDim = dsInfo['nDim']
1014 1029 mode = dsInfo['mode']
1015 1030
1016 1031 # First time
1017 1032 if self.firsttime:
1018 1033 # self.ds[i].resize(self.data[i].shape)
1019 1034 # self.ds[i][self.blockIndex,:] = self.data[i]
1020 1035 if type(self.data[i]) == numpy.ndarray:
1021 1036
1022 1037 if nDim == 3:
1023 1038 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
1024 1039 self.ds[i].resize(self.data[i].shape)
1025 1040 if mode == 2:
1026 1041 self.ds[i].resize(self.data[i].shape)
1027 1042 self.ds[i][:] = self.data[i]
1028 1043 else:
1029 1044
1030 1045 # From second time
1031 1046 # Meteors!
1032 1047 if mode == 2:
1033 1048 dataShape = self.data[i].shape
1034 1049 dsShape = self.ds[i].shape
1035 1050 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
1036 1051 self.ds[i][dsShape[0]:,:] = self.data[i]
1037 1052 # No dimension
1038 1053 elif mode == 0:
1039 1054 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
1040 1055 self.ds[i][0,-1] = self.data[i]
1041 1056 # One dimension
1042 1057 elif nDim == 1:
1043 1058 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
1044 1059 self.ds[i][-1,:] = self.data[i]
1045 1060 # Two dimension
1046 1061 elif nDim == 2:
1047 1062 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
1048 1063 self.ds[i][self.blockIndex,:] = self.data[i]
1049 1064 # Three dimensions
1050 1065 elif nDim == 3:
1051 1066 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
1052 1067 self.ds[i][:,:,-1] = self.data[i]
1053 1068
1054 1069 self.firsttime = False
1055 1070 self.blockIndex += 1
1056 1071
1057 1072 #Close to save changes
1058 1073 self.fp.flush()
1059 1074 self.fp.close()
1060 1075 return
1061 1076
1062 1077 def run(self, dataOut, **kwargs):
1063 1078
1064 1079 if not(self.isConfig):
1065 1080 flagdata = self.setup(dataOut, **kwargs)
1066 1081
1067 1082 if not(flagdata):
1068 1083 return
1069 1084
1070 1085 self.isConfig = True
1071 1086 # self.putMetadata()
1072 1087 self.setNextFile()
1073 1088
1074 1089 self.putData()
1075 1090 return
1076 1091
1077 1092
General Comments 0
You need to be logged in to leave comments. Login now