##// END OF EJS Templates
Restore method __setDataArray in ParamReader
Juan C. Valdez -
r872:342bbf5dcbe3
parent child
Show More
@@ -1,1029 +1,1092
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6 import datetime
7 7
8 8 from schainpy.model.data.jrodata import *
9 9 from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
10 10 # from jroIO_base import *
11 11 from schainpy.model.io.jroIO_base import *
12 12 import schainpy
13 13
14 14
15 15 class ParamReader(ProcessingUnit):
16 16 '''
17 17 Reads HDF5 format files
18 18
19 19 path
20 20
21 21 startDate
22 22
23 23 endDate
24 24
25 25 startTime
26 26
27 27 endTime
28 28 '''
29 29
30 30 ext = ".hdf5"
31 31
32 32 optchar = "D"
33 33
34 34 timezone = None
35 35
36 36 startTime = None
37 37
38 38 endTime = None
39 39
40 40 fileIndex = None
41 41
42 42 utcList = None #To select data in the utctime list
43 43
44 44 blockList = None #List to blocks to be read from the file
45 45
46 46 blocksPerFile = None #Number of blocks to be read
47 47
48 48 blockIndex = None
49 49
50 50 path = None
51 51
52 52 #List of Files
53 53
54 54 filenameList = None
55 55
56 56 datetimeList = None
57 57
58 58 #Hdf5 File
59 59
60 60 listMetaname = None
61 61
62 62 listMeta = None
63 63
64 64 listDataname = None
65 65
66 66 listData = None
67 67
68 68 listShapes = None
69 69
70 70 fp = None
71 71
72 72 #dataOut reconstruction
73 73
74 74 dataOut = None
75 75
76 76
77 77 def __init__(self):
78 78 self.dataOut = Parameters()
79 79 return
80 80
81 81 def setup(self, **kwargs):
82 82
83 83 path = kwargs['path']
84 84 startDate = kwargs['startDate']
85 85 endDate = kwargs['endDate']
86 86 startTime = kwargs['startTime']
87 87 endTime = kwargs['endTime']
88 88 walk = kwargs['walk']
89 89 if kwargs.has_key('ext'):
90 90 ext = kwargs['ext']
91 91 else:
92 92 ext = '.hdf5'
93 93 if kwargs.has_key('timezone'):
94 94 self.timezone = kwargs['timezone']
95 95 else:
96 96 self.timezone = 'lt'
97 97
98 98 print "[Reading] Searching files in offline mode ..."
99 99 pathList, filenameList = self.__searchFilesOffLine(path, startDate=startDate, endDate=endDate,
100 100 startTime=startTime, endTime=endTime,
101 101 ext=ext, walk=walk)
102 102
103 103 if not(filenameList):
104 104 print "There is no files into the folder: %s"%(path)
105 105 sys.exit(-1)
106 106
107 107 self.fileIndex = -1
108 108 self.startTime = startTime
109 109 self.endTime = endTime
110 110
111 111 self.__readMetadata()
112 112
113 113 self.__setNextFileOffline()
114 114
115 115 return
116 116
117 117 def __searchFilesOffLine(self,
118 118 path,
119 119 startDate=None,
120 120 endDate=None,
121 121 startTime=datetime.time(0,0,0),
122 122 endTime=datetime.time(23,59,59),
123 123 ext='.hdf5',
124 124 walk=True):
125 125
126 126 expLabel = ''
127 127 self.filenameList = []
128 128 self.datetimeList = []
129 129
130 130 pathList = []
131 131
132 132 JRODataObj = JRODataReader()
133 133 dateList, pathList = JRODataObj.findDatafiles(path, startDate, endDate, expLabel, ext, walk, include_path=True)
134 134
135 135 if dateList == []:
136 136 print "[Reading] No *%s files in %s from %s to %s)"%(ext, path,
137 137 datetime.datetime.combine(startDate,startTime).ctime(),
138 138 datetime.datetime.combine(endDate,endTime).ctime())
139 139
140 140 return None, None
141 141
142 142 if len(dateList) > 1:
143 143 print "[Reading] %d days were found in date range: %s - %s" %(len(dateList), startDate, endDate)
144 144 else:
145 145 print "[Reading] data was found for the date %s" %(dateList[0])
146 146
147 147 filenameList = []
148 148 datetimeList = []
149 149
150 150 #----------------------------------------------------------------------------------
151 151
152 152 for thisPath in pathList:
153 153 # thisPath = pathList[pathDict[file]]
154 154
155 155 fileList = glob.glob1(thisPath, "*%s" %ext)
156 156 fileList.sort()
157 157
158 158 for file in fileList:
159 159
160 160 filename = os.path.join(thisPath,file)
161 161
162 162 if not isFileInDateRange(filename, startDate, endDate):
163 163 continue
164 164
165 165 thisDatetime = self.__isFileInTimeRange(filename, startDate, endDate, startTime, endTime)
166 166
167 167 if not(thisDatetime):
168 168 continue
169 169
170 170 filenameList.append(filename)
171 171 datetimeList.append(thisDatetime)
172 172
173 173 if not(filenameList):
174 174 print "[Reading] Any file was found int time range %s - %s" %(datetime.datetime.combine(startDate,startTime).ctime(), datetime.datetime.combine(endDate,endTime).ctime())
175 175 return None, None
176 176
177 177 print "[Reading] %d file(s) was(were) found in time range: %s - %s" %(len(filenameList), startTime, endTime)
178 178 print
179 179
180 180 for i in range(len(filenameList)):
181 181 print "[Reading] %s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
182 182
183 183 self.filenameList = filenameList
184 184 self.datetimeList = datetimeList
185 185
186 186 return pathList, filenameList
187 187
188 188 def __isFileInTimeRange(self,filename, startDate, endDate, startTime, endTime):
189 189
190 190 """
191 191 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
192 192
193 193 Inputs:
194 194 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
195 195
196 196 startDate : fecha inicial del rango seleccionado en formato datetime.date
197 197
198 198 endDate : fecha final del rango seleccionado en formato datetime.date
199 199
200 200 startTime : tiempo inicial del rango seleccionado en formato datetime.time
201 201
202 202 endTime : tiempo final del rango seleccionado en formato datetime.time
203 203
204 204 Return:
205 205 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
206 206 fecha especificado, de lo contrario retorna False.
207 207
208 208 Excepciones:
209 209 Si el archivo no existe o no puede ser abierto
210 210 Si la cabecera no puede ser leida.
211 211
212 212 """
213 213
214 214 try:
215 215 fp = h5py.File(filename,'r')
216 216 grp1 = fp['Data']
217 217
218 218 except IOError:
219 219 traceback.print_exc()
220 220 raise IOError, "The file %s can't be opened" %(filename)
221 221 #chino rata
222 222 #In case has utctime attribute
223 223 grp2 = grp1['utctime']
224 224 # thisUtcTime = grp2.value[0] - 5*3600 #To convert to local time
225 225 thisUtcTime = grp2.value[0]
226 226
227 227 fp.close()
228 228
229 229 if self.timezone == 'lt':
230 230 thisUtcTime -= 5*3600
231 231
232 232 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
233 233 # thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0])
234 234 thisDate = thisDatetime.date()
235 235 thisTime = thisDatetime.time()
236 236
237 237 startUtcTime = (datetime.datetime.combine(thisDate,startTime)- datetime.datetime(1970, 1, 1)).total_seconds()
238 238 endUtcTime = (datetime.datetime.combine(thisDate,endTime)- datetime.datetime(1970, 1, 1)).total_seconds()
239 239
240 240 #General case
241 241 # o>>>>>>>>>>>>>><<<<<<<<<<<<<<o
242 242 #-----------o----------------------------o-----------
243 243 # startTime endTime
244 244
245 245 if endTime >= startTime:
246 246 thisUtcLog = numpy.logical_and(thisUtcTime > startUtcTime, thisUtcTime < endUtcTime)
247 247 if numpy.any(thisUtcLog): #If there is one block between the hours mentioned
248 248 return thisDatetime
249 249 return None
250 250
251 251 #If endTime < startTime then endTime belongs to the next day
252 252 #<<<<<<<<<<<o o>>>>>>>>>>>
253 253 #-----------o----------------------------o-----------
254 254 # endTime startTime
255 255
256 256 if (thisDate == startDate) and numpy.all(thisUtcTime < startUtcTime):
257 257 return None
258 258
259 259 if (thisDate == endDate) and numpy.all(thisUtcTime > endUtcTime):
260 260 return None
261 261
262 262 if numpy.all(thisUtcTime < startUtcTime) and numpy.all(thisUtcTime > endUtcTime):
263 263 return None
264 264
265 265 return thisDatetime
266 266
267 267 def __setNextFileOffline(self):
268 268
269 269 self.fileIndex += 1
270 270 idFile = self.fileIndex
271 271
272 272 if not(idFile < len(self.filenameList)):
273 273 print "No more Files"
274 274 return 0
275 275
276 276 filename = self.filenameList[idFile]
277 277
278 278 filePointer = h5py.File(filename,'r')
279 279
280 280 self.filename = filename
281 281
282 282 self.fp = filePointer
283 283
284 284 print "Setting the file: %s"%self.filename
285 285
286 286 # self.__readMetadata()
287 287 self.__setBlockList()
288 288 self.__readData()
289 289 # self.nRecords = self.fp['Data'].attrs['blocksPerFile']
290 290 # self.nRecords = self.fp['Data'].attrs['nRecords']
291 291 self.blockIndex = 0
292 292 return 1
293 293
294 294 def __setBlockList(self):
295 295 '''
296 296 Selects the data within the times defined
297 297
298 298 self.fp
299 299 self.startTime
300 300 self.endTime
301 301
302 302 self.blockList
303 303 self.blocksPerFile
304 304
305 305 '''
306 306 fp = self.fp
307 307 startTime = self.startTime
308 308 endTime = self.endTime
309 309
310 310 grp = fp['Data']
311 311 thisUtcTime = grp['utctime'].value.astype(numpy.float)[0]
312 312
313 313 #ERROOOOR
314 314 if self.timezone == 'lt':
315 315 thisUtcTime -= 5*3600
316 316
317 317 thisDatetime = datetime.datetime.fromtimestamp(thisUtcTime[0] + 5*3600)
318 318
319 319 thisDate = thisDatetime.date()
320 320 thisTime = thisDatetime.time()
321 321
322 322 startUtcTime = (datetime.datetime.combine(thisDate,startTime) - datetime.datetime(1970, 1, 1)).total_seconds()
323 323 endUtcTime = (datetime.datetime.combine(thisDate,endTime) - datetime.datetime(1970, 1, 1)).total_seconds()
324 324
325 325 ind = numpy.where(numpy.logical_and(thisUtcTime >= startUtcTime, thisUtcTime < endUtcTime))[0]
326 326
327 327 self.blockList = ind
328 328 self.blocksPerFile = len(ind)
329 329
330 330 return
331 331
332 332 def __readMetadata(self):
333 333 '''
334 334 Reads Metadata
335 335
336 336 self.pathMeta
337 337
338 338 self.listShapes
339 339 self.listMetaname
340 340 self.listMeta
341 341
342 342 '''
343 343
344 344 # grp = self.fp['Data']
345 345 # pathMeta = os.path.join(self.path, grp.attrs['metadata'])
346 346 #
347 347 # if pathMeta == self.pathMeta:
348 348 # return
349 349 # else:
350 350 # self.pathMeta = pathMeta
351 351 #
352 352 # filePointer = h5py.File(self.pathMeta,'r')
353 353 # groupPointer = filePointer['Metadata']
354 354
355 355 filename = self.filenameList[0]
356 356
357 357 fp = h5py.File(filename,'r')
358 358
359 359 gp = fp['Metadata']
360 360
361 361 listMetaname = []
362 362 listMetadata = []
363 363 for item in gp.items():
364 364 name = item[0]
365 365
366 366 if name=='array dimensions':
367 367 table = gp[name][:]
368 368 listShapes = {}
369 369 for shapes in table:
370 370 listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4],shapes[5]])
371 371 else:
372 372 data = gp[name].value
373 373 listMetaname.append(name)
374 374 listMetadata.append(data)
375 375
376 376 # if name=='type':
377 377 # self.__initDataOut(data)
378 378
379 379 self.listShapes = listShapes
380 380 self.listMetaname = listMetaname
381 381 self.listMeta = listMetadata
382 382
383 383 fp.close()
384 384 return
385 385
386 386 def __readData(self):
387 387 grp = self.fp['Data']
388 388 listdataname = []
389 389 listdata = []
390 390
391 391 for item in grp.items():
392 392 name = item[0]
393 393 listdataname.append(name)
394 394
395 395 array = self.__setDataArray(grp[name],self.listShapes[name])
396 396 listdata.append(array)
397 397
398 398 self.listDataname = listdataname
399 399 self.listData = listdata
400 400 return
401 401
402 def __setDataArray(self, dataset, shapes):
403
404 nDims = shapes[0]
405
406 nDim2 = shapes[1] #Dimension 0
402 407
408 nDim1 = shapes[2] #Dimension 1, number of Points or Parameters
409
410 nDim0 = shapes[3] #Dimension 2, number of samples or ranges
411
412 mode = shapes[4] #Mode of storing
413
414 blockList = self.blockList
415
416 blocksPerFile = self.blocksPerFile
417
418 #Depending on what mode the data was stored
419 if mode == 0: #Divided in channels
420 arrayData = dataset.value.astype(numpy.float)[0][blockList]
421 if mode == 1: #Divided in parameter
422 strds = 'table'
423 nDatas = nDim1
424 newShapes = (blocksPerFile,nDim2,nDim0)
425 elif mode==2: #Concatenated in a table
426 strds = 'table0'
427 arrayData = dataset[strds].value
428 #Selecting part of the dataset
429 utctime = arrayData[:,0]
430 u, indices = numpy.unique(utctime, return_index=True)
431
432 if blockList.size != indices.size:
433 indMin = indices[blockList[0]]
434 if blockList[1] + 1 >= indices.size:
435 arrayData = arrayData[indMin:,:]
436 else:
437 indMax = indices[blockList[1] + 1]
438 arrayData = arrayData[indMin:indMax,:]
439 return arrayData
440
441 # One dimension
442 if nDims == 0:
443 arrayData = dataset.value.astype(numpy.float)[0][blockList]
444
445 # Two dimensions
446 elif nDims == 2:
447 arrayData = numpy.zeros((blocksPerFile,nDim1,nDim0))
448 newShapes = (blocksPerFile,nDim0)
449 nDatas = nDim1
450
451 for i in range(nDatas):
452 data = dataset[strds + str(i)].value
453 arrayData[:,i,:] = data[blockList,:]
454
455 # Three dimensions
456 else:
457 arrayData = numpy.zeros((blocksPerFile,nDim2,nDim1,nDim0))
458 for i in range(nDatas):
459
460 data = dataset[strds + str(i)].value
461
462 for b in range(blockList.size):
463 arrayData[b,:,i,:] = data[:,:,blockList[b]]
464
465 return arrayData
403 466
404 467 def __setDataOut(self):
405 468 listMeta = self.listMeta
406 469 listMetaname = self.listMetaname
407 470 listDataname = self.listDataname
408 471 listData = self.listData
409 472 listShapes = self.listShapes
410 473
411 474 blockIndex = self.blockIndex
412 475 # blockList = self.blockList
413 476
414 477 for i in range(len(listMeta)):
415 478 setattr(self.dataOut,listMetaname[i],listMeta[i])
416 479
417 480 for j in range(len(listData)):
418 481 nShapes = listShapes[listDataname[j]][0]
419 482 mode = listShapes[listDataname[j]][4]
420 483 if nShapes == 1:
421 484 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
422 485 elif nShapes > 1:
423 486 setattr(self.dataOut,listDataname[j],listData[j][blockIndex,:])
424 487 elif mode==0:
425 488 setattr(self.dataOut,listDataname[j],listData[j][blockIndex])
426 489 #Mode Meteors
427 490 elif mode ==2:
428 491 selectedData = self.__selectDataMode2(listData[j], blockIndex)
429 492 setattr(self.dataOut, listDataname[j], selectedData)
430 493 return
431 494
432 495 def __selectDataMode2(self, data, blockIndex):
433 496 utctime = data[:,0]
434 497 aux, indices = numpy.unique(utctime, return_inverse=True)
435 498 selInd = numpy.where(indices == blockIndex)[0]
436 499 selData = data[selInd,:]
437 500
438 501 return selData
439 502
440 503 def getData(self):
441 504
442 505 # if self.flagNoMoreFiles:
443 506 # self.dataOut.flagNoData = True
444 507 # print 'Process finished'
445 508 # return 0
446 509 #
447 510 if self.blockIndex==self.blocksPerFile:
448 511 if not( self.__setNextFileOffline() ):
449 512 self.dataOut.flagNoData = True
450 513 return 0
451 514
452 515 # if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
453 516 # self.dataOut.flagNoData = True
454 517 # return 0
455 518 # self.__readData()
456 519 self.__setDataOut()
457 520 self.dataOut.flagNoData = False
458 521
459 522 self.blockIndex += 1
460 523
461 524 return
462 525
463 526 def run(self, **kwargs):
464 527
465 528 if not(self.isConfig):
466 529 self.setup(**kwargs)
467 530 # self.setObjProperties()
468 531 self.isConfig = True
469 532
470 533 self.getData()
471 534
472 535 return
473 536
474 537 class ParamWriter(Operation):
475 538 '''
476 539 HDF5 Writer, stores parameters data in HDF5 format files
477 540
478 541 path: path where the files will be stored
479 542
480 543 blocksPerFile: number of blocks that will be saved in per HDF5 format file
481 544
482 545 mode: selects the data stacking mode: '0' channels, '1' parameters, '3' table (for meteors)
483 546
484 547 metadataList: list of attributes that will be stored as metadata
485 548
486 549 dataList: list of attributes that will be stores as data
487 550
488 551 '''
489 552
490 553
491 554 ext = ".hdf5"
492 555
493 556 optchar = "D"
494 557
495 558 metaoptchar = "M"
496 559
497 560 metaFile = None
498 561
499 562 filename = None
500 563
501 564 path = None
502 565
503 566 setFile = None
504 567
505 568 fp = None
506 569
507 570 grp = None
508 571
509 572 ds = None
510 573
511 574 firsttime = True
512 575
513 576 #Configurations
514 577
515 578 blocksPerFile = None
516 579
517 580 blockIndex = None
518 581
519 582 dataOut = None
520 583
521 584 #Data Arrays
522 585
523 586 dataList = None
524 587
525 588 metadataList = None
526 589
527 590 # arrayDim = None
528 591
529 592 dsList = None #List of dictionaries with dataset properties
530 593
531 594 tableDim = None
532 595
533 596 # dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
534 597
535 598 dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
536 599
537 600 currentDay = None
538 601
539 602 lastTime = None
540 603
541 604 def __init__(self):
542 605
543 606 Operation.__init__(self)
544 607 self.isConfig = False
545 608 return
546 609
547 610 def setup(self, dataOut, **kwargs):
548 611
549 612 self.path = kwargs['path']
550 613
551 614 if kwargs.has_key('blocksPerFile'):
552 615 self.blocksPerFile = kwargs['blocksPerFile']
553 616 else:
554 617 self.blocksPerFile = 10
555 618
556 619 self.metadataList = kwargs['metadataList']
557 620 self.dataList = kwargs['dataList']
558 621 self.dataOut = dataOut
559 622
560 623 if kwargs.has_key('mode'):
561 624 mode = kwargs['mode']
562 625
563 626 if type(mode) == int:
564 627 mode = numpy.zeros(len(self.dataList)) + mode
565 628 else:
566 629 mode = numpy.ones(len(self.dataList))
567 630
568 631 self.mode = mode
569 632
570 633 arrayDim = numpy.zeros((len(self.dataList),5))
571 634
572 635 #Table dimensions
573 636 dtype0 = self.dtype
574 637 tableList = []
575 638
576 639 #Dictionary and list of tables
577 640 dsList = []
578 641
579 642 for i in range(len(self.dataList)):
580 643 dsDict = {}
581 644 dataAux = getattr(self.dataOut, self.dataList[i])
582 645 dsDict['variable'] = self.dataList[i]
583 646 #--------------------- Conditionals ------------------------
584 647 #There is no data
585 648 if dataAux == None:
586 649 return 0
587 650
588 651 #Not array, just a number
589 652 #Mode 0
590 653 if type(dataAux)==float or type(dataAux)==int:
591 654 dsDict['mode'] = 0
592 655 dsDict['nDim'] = 0
593 656 arrayDim[i,0] = 0
594 657 dsList.append(dsDict)
595 658
596 659 #Mode 2: meteors
597 660 elif mode[i] == 2:
598 661 # dsDict['nDim'] = 0
599 662 dsDict['dsName'] = 'table0'
600 663 dsDict['mode'] = 2 # Mode meteors
601 664 dsDict['shape'] = dataAux.shape[-1]
602 665 dsDict['nDim'] = 0
603 666 dsDict['dsNumber'] = 1
604 667
605 668 arrayDim[i,3] = dataAux.shape[-1]
606 669 arrayDim[i,4] = mode[i] #Mode the data was stored
607 670
608 671 dsList.append(dsDict)
609 672
610 673 #Mode 1
611 674 else:
612 675 arrayDim0 = dataAux.shape #Data dimensions
613 676 arrayDim[i,0] = len(arrayDim0) #Number of array dimensions
614 677 arrayDim[i,4] = mode[i] #Mode the data was stored
615 678
616 679 strtable = 'table'
617 680 dsDict['mode'] = 1 # Mode parameters
618 681
619 682 # Three-dimension arrays
620 683 if len(arrayDim0) == 3:
621 684 arrayDim[i,1:-1] = numpy.array(arrayDim0)
622 685 nTables = int(arrayDim[i,2])
623 686 dsDict['dsNumber'] = nTables
624 687 dsDict['shape'] = arrayDim[i,2:4]
625 688 dsDict['nDim'] = 3
626 689
627 690 for j in range(nTables):
628 691 dsDict = dsDict.copy()
629 692 dsDict['dsName'] = strtable + str(j)
630 693 dsList.append(dsDict)
631 694
632 695 # Two-dimension arrays
633 696 elif len(arrayDim0) == 2:
634 697 arrayDim[i,2:-1] = numpy.array(arrayDim0)
635 698 nTables = int(arrayDim[i,2])
636 699 dsDict['dsNumber'] = nTables
637 700 dsDict['shape'] = arrayDim[i,3]
638 701 dsDict['nDim'] = 2
639 702
640 703 for j in range(nTables):
641 704 dsDict = dsDict.copy()
642 705 dsDict['dsName'] = strtable + str(j)
643 706 dsList.append(dsDict)
644 707
645 708 # One-dimension arrays
646 709 elif len(arrayDim0) == 1:
647 710 arrayDim[i,3] = arrayDim0[0]
648 711 dsDict['shape'] = arrayDim0[0]
649 712 dsDict['dsNumber'] = 1
650 713 dsDict['dsName'] = strtable + str(0)
651 714 dsDict['nDim'] = 1
652 715 dsList.append(dsDict)
653 716
654 717 table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
655 718 tableList.append(table)
656 719
657 720 # self.arrayDim = arrayDim
658 721 self.dsList = dsList
659 722 self.tableDim = numpy.array(tableList, dtype = dtype0)
660 723 self.blockIndex = 0
661 724
662 725 timeTuple = time.localtime(dataOut.utctime)
663 726 self.currentDay = timeTuple.tm_yday
664 727 return 1
665 728
666 729 def putMetadata(self):
667 730
668 731 fp = self.createMetadataFile()
669 732 self.writeMetadata(fp)
670 733 fp.close()
671 734 return
672 735
673 736 def createMetadataFile(self):
674 737 ext = self.ext
675 738 path = self.path
676 739 setFile = self.setFile
677 740
678 741 timeTuple = time.localtime(self.dataOut.utctime)
679 742
680 743 subfolder = ''
681 744 fullpath = os.path.join( path, subfolder )
682 745
683 746 if not( os.path.exists(fullpath) ):
684 747 os.mkdir(fullpath)
685 748 setFile = -1 #inicializo mi contador de seteo
686 749
687 750 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
688 751 fullpath = os.path.join( path, subfolder )
689 752
690 753 if not( os.path.exists(fullpath) ):
691 754 os.mkdir(fullpath)
692 755 setFile = -1 #inicializo mi contador de seteo
693 756
694 757 else:
695 758 filesList = os.listdir( fullpath )
696 759 filesList = sorted( filesList, key=str.lower )
697 760 if len( filesList ) > 0:
698 761 filesList = [k for k in filesList if 'M' in k]
699 762 filen = filesList[-1]
700 763 # el filename debera tener el siguiente formato
701 764 # 0 1234 567 89A BCDE (hex)
702 765 # x YYYY DDD SSS .ext
703 766 if isNumber( filen[8:11] ):
704 767 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
705 768 else:
706 769 setFile = -1
707 770 else:
708 771 setFile = -1 #inicializo mi contador de seteo
709 772
710 773 setFile += 1
711 774
712 775 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
713 776 timeTuple.tm_year,
714 777 timeTuple.tm_yday,
715 778 setFile,
716 779 ext )
717 780
718 781 filename = os.path.join( path, subfolder, file )
719 782 self.metaFile = file
720 783 #Setting HDF5 File
721 784 fp = h5py.File(filename,'w')
722 785
723 786 return fp
724 787
725 788 def writeMetadata(self, fp):
726 789
727 790 grp = fp.create_group("Metadata")
728 791 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
729 792
730 793 for i in range(len(self.metadataList)):
731 794 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
732 795 return
733 796
734 797 def timeFlag(self):
735 798 currentTime = self.dataOut.utctime
736 799
737 800 if self.lastTime is None:
738 801 self.lastTime = currentTime
739 802
740 803 #Day
741 804 timeTuple = time.localtime(currentTime)
742 805 dataDay = timeTuple.tm_yday
743 806
744 807 #Time
745 808 timeDiff = currentTime - self.lastTime
746 809
747 810 #Si el dia es diferente o si la diferencia entre un dato y otro supera la hora
748 811 if dataDay != self.currentDay:
749 812 self.currentDay = dataDay
750 813 return True
751 814 elif timeDiff > 3*60*60:
752 815 self.lastTime = currentTime
753 816 return True
754 817 else:
755 818 self.lastTime = currentTime
756 819 return False
757 820
758 821 def setNextFile(self):
759 822
760 823 ext = self.ext
761 824 path = self.path
762 825 setFile = self.setFile
763 826 mode = self.mode
764 827
765 828 timeTuple = time.localtime(self.dataOut.utctime)
766 829 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
767 830
768 831 fullpath = os.path.join( path, subfolder )
769 832
770 833 if os.path.exists(fullpath):
771 834 filesList = os.listdir( fullpath )
772 835 filesList = [k for k in filesList if 'D' in k]
773 836 if len( filesList ) > 0:
774 837 filesList = sorted( filesList, key=str.lower )
775 838 filen = filesList[-1]
776 839 # el filename debera tener el siguiente formato
777 840 # 0 1234 567 89A BCDE (hex)
778 841 # x YYYY DDD SSS .ext
779 842 if isNumber( filen[8:11] ):
780 843 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
781 844 else:
782 845 setFile = -1
783 846 else:
784 847 setFile = -1 #inicializo mi contador de seteo
785 848 else:
786 849 os.mkdir(fullpath)
787 850 setFile = -1 #inicializo mi contador de seteo
788 851
789 852 setFile += 1
790 853
791 854 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
792 855 timeTuple.tm_year,
793 856 timeTuple.tm_yday,
794 857 setFile,
795 858 ext )
796 859
797 860 filename = os.path.join( path, subfolder, file )
798 861
799 862 #Setting HDF5 File
800 863 fp = h5py.File(filename,'w')
801 864 #write metadata
802 865 self.writeMetadata(fp)
803 866 #Write data
804 867 grp = fp.create_group("Data")
805 868 # grp.attrs['metadata'] = self.metaFile
806 869
807 870 # grp.attrs['blocksPerFile'] = 0
808 871 ds = []
809 872 data = []
810 873 dsList = self.dsList
811 874 i = 0
812 875 while i < len(dsList):
813 876 dsInfo = dsList[i]
814 877 #One-dimension data
815 878 if dsInfo['mode'] == 0:
816 879 # ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
817 880 ds0 = grp.create_dataset(dsInfo['variable'], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
818 881 ds.append(ds0)
819 882 data.append([])
820 883 i += 1
821 884 continue
822 885 # nDimsForDs.append(nDims[i])
823 886
824 887 elif dsInfo['mode'] == 2:
825 888 grp0 = grp.create_group(dsInfo['variable'])
826 889 ds0 = grp0.create_dataset(dsInfo['dsName'], (1,dsInfo['shape']), data = numpy.zeros((1,dsInfo['shape'])) , maxshape=(None,dsInfo['shape']), chunks=True)
827 890 ds.append(ds0)
828 891 data.append([])
829 892 i += 1
830 893 continue
831 894
832 895 elif dsInfo['mode'] == 1:
833 896 grp0 = grp.create_group(dsInfo['variable'])
834 897
835 898 for j in range(dsInfo['dsNumber']):
836 899 dsInfo = dsList[i]
837 900 tableName = dsInfo['dsName']
838 901 shape = dsInfo['shape']
839 902
840 903 if dsInfo['nDim'] == 3:
841 904 ds0 = grp0.create_dataset(tableName, (shape[0],shape[1],1) , data = numpy.zeros((shape[0],shape[1],1)), maxshape = (None,shape[1],None), chunks=True)
842 905 else:
843 906 ds0 = grp0.create_dataset(tableName, (1,shape), data = numpy.zeros((1,shape)) , maxshape=(None,shape), chunks=True)
844 907
845 908 ds.append(ds0)
846 909 data.append([])
847 910 i += 1
848 911 # nDimsForDs.append(nDims[i])
849 912
850 913 fp.flush()
851 914 fp.close()
852 915
853 916 # self.nDatas = nDatas
854 917 # self.nDims = nDims
855 918 # self.nDimsForDs = nDimsForDs
856 919 #Saving variables
857 920 print 'Writing the file: %s'%filename
858 921 self.filename = filename
859 922 # self.fp = fp
860 923 # self.grp = grp
861 924 # self.grp.attrs.modify('nRecords', 1)
862 925 self.ds = ds
863 926 self.data = data
864 927 # self.setFile = setFile
865 928 self.firsttime = True
866 929 self.blockIndex = 0
867 930 return
868 931
869 932 def putData(self):
870 933
871 934 if self.blockIndex == self.blocksPerFile or self.timeFlag():
872 935 self.setNextFile()
873 936
874 937 # if not self.firsttime:
875 938 self.readBlock()
876 939 self.setBlock() #Prepare data to be written
877 940 self.writeBlock() #Write data
878 941
879 942 return
880 943
881 944 def readBlock(self):
882 945
883 946 '''
884 947 data Array configured
885 948
886 949
887 950 self.data
888 951 '''
889 952 dsList = self.dsList
890 953 ds = self.ds
891 954 #Setting HDF5 File
892 955 fp = h5py.File(self.filename,'r+')
893 956 grp = fp["Data"]
894 957 ind = 0
895 958
896 959 # grp.attrs['blocksPerFile'] = 0
897 960 while ind < len(dsList):
898 961 dsInfo = dsList[ind]
899 962
900 963 if dsInfo['mode'] == 0:
901 964 ds0 = grp[dsInfo['variable']]
902 965 ds[ind] = ds0
903 966 ind += 1
904 967 else:
905 968
906 969 grp0 = grp[dsInfo['variable']]
907 970
908 971 for j in range(dsInfo['dsNumber']):
909 972 dsInfo = dsList[ind]
910 973 ds0 = grp0[dsInfo['dsName']]
911 974 ds[ind] = ds0
912 975 ind += 1
913 976
914 977 self.fp = fp
915 978 self.grp = grp
916 979 self.ds = ds
917 980
918 981 return
919 982
920 983 def setBlock(self):
921 984 '''
922 985 data Array configured
923 986
924 987
925 988 self.data
926 989 '''
927 990 #Creating Arrays
928 991 dsList = self.dsList
929 992 data = self.data
930 993 ind = 0
931 994
932 995 while ind < len(dsList):
933 996 dsInfo = dsList[ind]
934 997 dataAux = getattr(self.dataOut, dsInfo['variable'])
935 998
936 999 mode = dsInfo['mode']
937 1000 nDim = dsInfo['nDim']
938 1001
939 1002 if mode == 0 or mode == 2 or nDim == 1:
940 1003 data[ind] = dataAux
941 1004 ind += 1
942 1005 # elif nDim == 1:
943 1006 # data[ind] = numpy.reshape(dataAux,(numpy.size(dataAux),1))
944 1007 # ind += 1
945 1008 elif nDim == 2:
946 1009 for j in range(dsInfo['dsNumber']):
947 1010 data[ind] = dataAux[j,:]
948 1011 ind += 1
949 1012 elif nDim == 3:
950 1013 for j in range(dsInfo['dsNumber']):
951 1014 data[ind] = dataAux[:,j,:]
952 1015 ind += 1
953 1016
954 1017 self.data = data
955 1018 return
956 1019
957 1020 def writeBlock(self):
958 1021 '''
959 1022 Saves the block in the HDF5 file
960 1023 '''
961 1024 dsList = self.dsList
962 1025
963 1026 for i in range(len(self.ds)):
964 1027 dsInfo = dsList[i]
965 1028 nDim = dsInfo['nDim']
966 1029 mode = dsInfo['mode']
967 1030
968 1031 # First time
969 1032 if self.firsttime:
970 1033 # self.ds[i].resize(self.data[i].shape)
971 1034 # self.ds[i][self.blockIndex,:] = self.data[i]
972 1035 if type(self.data[i]) == numpy.ndarray:
973 1036
974 1037 if nDim == 3:
975 1038 self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
976 1039 self.ds[i].resize(self.data[i].shape)
977 1040 if mode == 2:
978 1041 self.ds[i].resize(self.data[i].shape)
979 1042 self.ds[i][:] = self.data[i]
980 1043 else:
981 1044
982 1045 # From second time
983 1046 # Meteors!
984 1047 if mode == 2:
985 1048 dataShape = self.data[i].shape
986 1049 dsShape = self.ds[i].shape
987 1050 self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1]))
988 1051 self.ds[i][dsShape[0]:,:] = self.data[i]
989 1052 # No dimension
990 1053 elif mode == 0:
991 1054 self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
992 1055 self.ds[i][0,-1] = self.data[i]
993 1056 # One dimension
994 1057 elif nDim == 1:
995 1058 self.ds[i].resize((self.ds[i].shape[0] + 1, self.ds[i].shape[1]))
996 1059 self.ds[i][-1,:] = self.data[i]
997 1060 # Two dimension
998 1061 elif nDim == 2:
999 1062 self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
1000 1063 self.ds[i][self.blockIndex,:] = self.data[i]
1001 1064 # Three dimensions
1002 1065 elif nDim == 3:
1003 1066 self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
1004 1067 self.ds[i][:,:,-1] = self.data[i]
1005 1068
1006 1069 self.firsttime = False
1007 1070 self.blockIndex += 1
1008 1071
1009 1072 #Close to save changes
1010 1073 self.fp.flush()
1011 1074 self.fp.close()
1012 1075 return
1013 1076
1014 1077 def run(self, dataOut, **kwargs):
1015 1078
1016 1079 if not(self.isConfig):
1017 1080 flagdata = self.setup(dataOut, **kwargs)
1018 1081
1019 1082 if not(flagdata):
1020 1083 return
1021 1084
1022 1085 self.isConfig = True
1023 1086 # self.putMetadata()
1024 1087 self.setNextFile()
1025 1088
1026 1089 self.putData()
1027 1090 return
1028 1091
1029 1092
General Comments 0
You need to be logged in to leave comments. Login now