##// END OF EJS Templates
Corrections to HDF5 Writer
Julio Valdez -
r515:87cf9df29125
parent child
Show More
@@ -1,656 +1,678
1 1 import numpy
2 2 import time
3 3 import os
4 4 import h5py
5 5 import re
6 6
7 7 from model.data.jrodata import *
8 8 from model.proc.jroproc_base import ProcessingUnit, Operation
9 9 from model.io.jroIO_base import *
10 10
11 11
12 12 class HDF5Reader(ProcessingUnit):
13 13
14 14 ext = ".hdf5"
15 15
16 16 optchar = "D"
17 17
18 18 timezone = None
19 19
20 20 fileIndex = None
21 21
22 22 blockIndex = None
23 23
24 24 path = None
25 25
26 26 #Hdf5 File
27 27
28 28 fpMetadata = None
29 29
30 30 listMetaname = None
31 31
32 32 listMetadata = None
33 33
34 34 fp = None
35 35
36 36 #dataOut reconstruction
37 37
38 38
39 39 dataOut = None
40 40
41 41 nChannels = None #Dimension 0
42 42
43 43 nPoints = None #Dimension 1, number of Points or Parameters
44 44
45 45 nSamples = None #Dimension 2, number of samples or ranges
46 46
47 47
48 48 def __init__(self):
49 49
50 50 return
51 51
52 52 def setup(self,path=None,
53 53 startDate=None,
54 54 endDate=None,
55 55 startTime=datetime.time(0,0,0),
56 56 endTime=datetime.time(23,59,59),
57 57 walk=True,
58 58 timezone='ut',
59 59 all=0,
60 60 online=False,
61 61 ext=None):
62 62
63 63 if ext==None:
64 64 ext = self.ext
65 65 self.timezone = timezone
66 66 # self.all = all
67 67 # self.online = online
68 68 self.path = path
69 69
70 70
71 71 if not(online):
72 72 #Busqueda de archivos offline
73 73 self.__searchFilesOffline(path, startDate, endDate, ext, startTime, endTime, walk)
74 74 else:
75 75 self.__searchFilesOnline(path, walk)
76 76
77 77 if not(self.filenameList):
78 78 print "There is no files into the folder: %s"%(path)
79 79 sys.exit(-1)
80 80
81 81 # self.__getExpParameters()
82 82
83 83 self.fileIndex = -1
84 84
85 85 self.__setNextFileOffline()
86 86
87 87 self.__readMetadata()
88 88
89 89 self.blockIndex = 0
90 90
91 91 return
92 92
93 93 def __searchFilesOffline(self,
94 94 path,
95 95 startDate,
96 96 endDate,
97 97 ext,
98 98 startTime=datetime.time(0,0,0),
99 99 endTime=datetime.time(23,59,59),
100 100 walk=True):
101 101
102 102 # self.__setParameters(path, startDate, endDate, startTime, endTime, walk)
103 103 #
104 104 # self.__checkPath()
105 105 #
106 106 # self.__findDataForDates()
107 107 #
108 108 # self.__selectDataForTimes()
109 109 #
110 110 # for i in range(len(self.filenameList)):
111 111 # print "%s" %(self.filenameList[i])
112 112
113 113 pathList = []
114 114
115 115 if not walk:
116 116 #pathList.append(path)
117 117 multi_path = path.split(',')
118 118 for single_path in multi_path:
119 119 pathList.append(single_path)
120 120
121 121 else:
122 122 #dirList = []
123 123 multi_path = path.split(',')
124 124 for single_path in multi_path:
125 125 dirList = []
126 126 for thisPath in os.listdir(single_path):
127 127 if not os.path.isdir(os.path.join(single_path,thisPath)):
128 128 continue
129 129 if not isDoyFolder(thisPath):
130 130 continue
131 131
132 132 dirList.append(thisPath)
133 133
134 134 if not(dirList):
135 135 return None, None
136 136
137 137 thisDate = startDate
138 138
139 139 while(thisDate <= endDate):
140 140 year = thisDate.timetuple().tm_year
141 141 doy = thisDate.timetuple().tm_yday
142 142
143 143 matchlist = fnmatch.filter(dirList, '?' + '%4.4d%3.3d' % (year,doy) + '*')
144 144 if len(matchlist) == 0:
145 145 thisDate += datetime.timedelta(1)
146 146 continue
147 147 for match in matchlist:
148 148 pathList.append(os.path.join(single_path,match))
149 149
150 150 thisDate += datetime.timedelta(1)
151 151
152 152 if pathList == []:
153 153 print "Any folder was found for the date range: %s-%s" %(startDate, endDate)
154 154 return None, None
155 155
156 156 print "%d folder(s) was(were) found for the date range: %s - %s" %(len(pathList), startDate, endDate)
157 157
158 158 filenameList = []
159 159 datetimeList = []
160 160 pathDict = {}
161 161 filenameList_to_sort = []
162 162
163 163 for i in range(len(pathList)):
164 164
165 165 thisPath = pathList[i]
166 166
167 167 fileList = glob.glob1(thisPath, "*%s" %ext)
168 168 fileList.sort()
169 169 pathDict.setdefault(fileList[0])
170 170 pathDict[fileList[0]] = i
171 171 filenameList_to_sort.append(fileList[0])
172 172
173 173 filenameList_to_sort.sort()
174 174
175 175 for file in filenameList_to_sort:
176 176 thisPath = pathList[pathDict[file]]
177 177
178 178 fileList = glob.glob1(thisPath, "*%s" %ext)
179 179 fileList.sort()
180 180
181 181 for file in fileList:
182 182
183 183 filename = os.path.join(thisPath,file)
184 184 thisDatetime = self.__isFileinThisTime(filename, startTime, endTime)
185 185
186 186 if not(thisDatetime):
187 187 continue
188 188
189 189 filenameList.append(filename)
190 190 datetimeList.append(thisDatetime)
191 191
192 192 if not(filenameList):
193 193 print "Any file was found for the time range %s - %s" %(startTime, endTime)
194 194 return None, None
195 195
196 196 print "%d file(s) was(were) found for the time range: %s - %s" %(len(filenameList), startTime, endTime)
197 197 print
198 198
199 199 for i in range(len(filenameList)):
200 200 print "%s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
201 201
202 202 self.filenameList = filenameList
203 203 self.datetimeList = datetimeList
204 204
205 205 return pathList, filenameList
206 206
207 207 def __isFileinThisTime(self, filename, startTime, endTime):
208 208 """
209 209 Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
210 210
211 211 Inputs:
212 212 filename : nombre completo del archivo de datos en formato Jicamarca (.r)
213 213
214 214 startTime : tiempo inicial del rango seleccionado en formato datetime.time
215 215
216 216 endTime : tiempo final del rango seleccionado en formato datetime.time
217 217
218 218 Return:
219 219 Boolean : Retorna True si el archivo de datos contiene datos en el rango de
220 220 fecha especificado, de lo contrario retorna False.
221 221
222 222 Excepciones:
223 223 Si el archivo no existe o no puede ser abierto
224 224 Si la cabecera no puede ser leida.
225 225
226 226 """
227 227
228 228
229 229 try:
230 230 fp = fp = h5py.File(filename,'r')
231 231 except IOError:
232 232 traceback.print_exc()
233 233 raise IOError, "The file %s can't be opened" %(filename)
234 234
235 235 grp = fp['Data']
236 236 time = grp['time']
237 237 time0 = time[:][0]
238 238
239 239 fp.close()
240 240
241 241 thisDatetime = datetime.datetime.utcfromtimestamp(time0)
242 242
243 243 if self.timezone == 'lt':
244 244 thisDatetime = thisDatetime - datetime.timedelta(minutes = 300)
245 245
246 246 thisTime = thisDatetime.time()
247 247
248 248 if not ((startTime <= thisTime) and (endTime > thisTime)):
249 249 return None
250 250
251 251 return thisDatetime
252 252
253 253 def __checkPath(self):
254 254 if os.path.exists(self.path):
255 255 self.status = 1
256 256 else:
257 257 self.status = 0
258 258 print 'Path:%s does not exists'%self.path
259 259
260 260 return
261 261
262 262 def __setNextFileOffline(self):
263 263 idFile = self.fileIndex
264 264 idFile += 1
265 265
266 266 if not(idFile < len(self.filenameList)):
267 267 self.flagNoMoreFiles = 1
268 268 print "No more Files"
269 269 return 0
270 270
271 271 filename = self.filenameList[idFile]
272 272
273 273 filePointer = h5py.File(filename,'r')
274 274
275 275 self.flagIsNewFile = 1
276 276 self.fileIndex = idFile
277 277 self.filename = filename
278 278
279 279 self.fp = filePointer
280 280
281 281 print "Setting the file: %s"%self.filename
282 282
283 283 self.__readMetadata()
284 284
285 285 return 1
286 286
287 287 def __readMetadata(self):
288 288 grp = self.fp['Data']
289 289 self.pathMeta = os.path.join(self.path, grp.attrs['metadata'])
290 290 filePointer = h5py.File(self.pathMeta,'r')
291 291 groupPointer = filePointer['Metadata']
292 292
293 293 listMetaname = []
294 294 listMetadata = []
295 295 for item in groupPointer.items():
296 296 name = item[0]
297 297
298 298 if name=='data shape':
299 299 self.nSamples = 1
300 300 self.nPoints = 1
301 301 self.nChannels = 1
302 302 else:
303 303 data = groupPointer[name][:]
304 304 listMetaname.append(name)
305 305 listMetadata.append(data)
306 306
307 307 if name=='type':
308 308 self.__initDataOut(name)
309 309
310 310 filePointer.close()
311 311
312 312 self.listMetadata = listMetaname
313 313 self.listMetadata = listMetadata
314 314
315 315 return
316 316
317 317 def __initDataOut(self, type):
318 318
319 319 if 'type'=='Parameters':
320 320 self.dataOut = Parameters()
321 321 elif 'type'=='Spectra':
322 322 self.dataOut = Spectra()
323 323 elif 'type'=='Voltage':
324 324 self.dataOut = Voltage()
325 325 elif 'type'=='Correlation':
326 326 self.dataOut = Correlation()
327 327
328 328 return
329 329
330 330 def __setDataOut(self):
331 331 listMetadata = self.listMetadata
332 332 listMetaname = self.listMetaname
333 333 listDataname = self.listDataname
334 334 listData = self.listData
335 335
336 336 blockIndex = self.blockIndex
337 337
338 338 for i in range(len(listMetadata)):
339 339 setattr(self.dataOut,listMetaname[i],listMetadata[i])
340 340
341 341 for j in range(len(listData)):
342 342 setattr(self.dataOut,listDataname[j][blockIndex,:],listData[j][blockIndex,:])
343 343
344 344 return
345 345
346 346 def getData(self):
347 347
348 348 if self.flagNoMoreFiles:
349 349 self.dataOut.flagNoData = True
350 350 print 'Process finished'
351 351 return 0
352 352
353 353 if self.__hasNotDataInBuffer():
354 354 self.__setNextFile()
355 355
356 356
357 357 if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
358 358 self.dataOut.flagNoData = True
359 359 return 0
360 360
361 361 self.__setDataOut()
362 362 self.dataOut.flagNoData = False
363 363
364 364 self.blockIndex += 1
365 365
366 366 return self.dataOut.data
367 367
368 368 def run(self, **kwargs):
369 369
370 370 if not(self.isConfig):
371 371 self.setup(**kwargs)
372 372 self.setObjProperties()
373 373 self.isConfig = True
374 374
375 375 self.getData()
376 376
377 377 return
378 378
379 379 class HDF5Writer(Operation):
380 380
381 381 ext = ".hdf5"
382 382
383 383 optchar = "D"
384 384
385 385 metaoptchar = "M"
386 386
387 387 metaFile = None
388 388
389 389 path = None
390 390
391 391 setFile = None
392 392
393 393 fp = None
394 394
395 395 grp = None
396 396
397 397 ds = None
398 398
399 399 firsttime = True
400 400
401 401 #Configurations
402 402
403 403 blocksPerFile = None
404 404
405 405 blockIndex = None
406 406
407 407 dataOut = None
408 408
409 409 #Data Arrays
410 410
411 411 dataList = None
412 412
413 413 metadataList = None
414 414
415 415 dataDim = None
416 416
417 tableDim = None
418
419 dtype = [('arrayName', 'S10'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i')]
420
417 421 def __init__(self):
418 422
419 423 Operation.__init__(self)
420 424 self.isConfig = False
421 425 return
422 426
423 427
424 428 def setup(self, dataOut, **kwargs):
425 429
426 430 self.path = kwargs['path']
427 431
428 432 if kwargs.has_key('ext'):
429 433 self.ext = kwargs['ext']
430 434 else:
431 435 self.blocksPerFile = 10
432 436
433 437 if kwargs.has_key('blocksPerFile'):
434 438 self.blocksPerFile = kwargs['blocksPerFile']
435 439 else:
436 440 self.blocksPerFile = 10
437 441
438 442 self.dataOut = dataOut
439 443
440 self.metadataList = ['inputUnit','abscissaRange','heightRange']
444 self.metadataList = ['type','inputUnit','abscissaRange','heightRange']
441 445
442 446 self.dataList = ['data_param', 'data_error', 'data_SNR']
443 447
444 448 self.dataDim = numpy.zeros((len(self.dataList),3))
445 449
450 #Data types
451
452 dtype0 = self.dtype
453
454 tableList = []
455
446 456 for i in range(len(self.dataList)):
447 457
448 458 dataDim = getattr(self.dataOut, self.dataList[i]).shape
449 459
450 460 if len(dataDim) == 3:
451 461 self.dataDim[i,:] = numpy.array(dataDim)
452 462 else:
453 self.dataDim[i,:-1] = numpy.array(dataDim)
454 self.dataDim[i,-1] = numpy.nan
455
463 self.dataDim[i,0] = numpy.array(dataDim)[0]
464 self.dataDim[i,2] = numpy.array(dataDim)[1]
465 self.dataDim[i,1] = 1
466
467 table = numpy.array((self.dataList[i],) + tuple(self.dataDim[i,:]),dtype = dtype0)
468 tableList.append(table)
469
470 self.tableDim = numpy.array(tableList, dtype = dtype0)
456 471 self.blockIndex = 0
457 472
458 473 return
459 474
460 475 def putMetadata(self):
461 476
462 477 fp = self.createMetadataFile()
463 478 self.writeMetadata(fp)
464 479 fp.close()
465 480 return
466 481
467 482 def createMetadataFile(self):
468 483 ext = self.ext
469 484 path = self.path
470 485 setFile = self.setFile
471 486
472 487 timeTuple = time.localtime(self.dataOut.utctime)
473 488 subfolder = ''
474 489
475 490 fullpath = os.path.join( path, subfolder )
476 491 if not( os.path.exists(fullpath) ):
477 492 os.mkdir(fullpath)
478 493 setFile = -1 #inicializo mi contador de seteo
479 494 else:
480 495 filesList = os.listdir( fullpath )
481 496 if len( filesList ) > 0:
482 497 filesList = sorted( filesList, key=str.lower )
483 498 filen = filesList[-1]
484 499 # el filename debera tener el siguiente formato
485 500 # 0 1234 567 89A BCDE (hex)
486 501 # x YYYY DDD SSS .ext
487 502 if isNumber( filen[8:11] ):
488 503 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
489 504 else:
490 505 setFile = -1
491 506 else:
492 507 setFile = -1 #inicializo mi contador de seteo
493 508
494 509 setFile += 1
495 510
496 511 file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
497 512 timeTuple.tm_year,
498 513 timeTuple.tm_yday,
499 514 setFile,
500 515 ext )
501 516
502 517 filename = os.path.join( path, subfolder, file )
503 518 self.metaFile = file
504 519 #Setting HDF5 File
505 520 fp = h5py.File(filename,'w')
506 521
507 522 return fp
508 523
509 524 def writeMetadata(self, fp):
510 525
511 526 grp = fp.create_group("Metadata")
527 grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
512 528
513 529 for i in range(len(self.metadataList)):
514 530 grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
515 531 return
516 532
517 533 def setNextFile(self):
518 534
519 535 ext = self.ext
520 536 path = self.path
521 537 setFile = self.setFile
522 538
523 539 if self.fp != None:
524 540 self.fp.close()
525 541
526 542 timeTuple = time.localtime(self.dataOut.utctime)
527 543 subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
528 544
529 545 fullpath = os.path.join( path, subfolder )
530 546 if not( os.path.exists(fullpath) ):
531 547 os.mkdir(fullpath)
532 548 setFile = -1 #inicializo mi contador de seteo
533 549 else:
534 550 filesList = os.listdir( fullpath )
535 551 if len( filesList ) > 0:
536 552 filesList = sorted( filesList, key=str.lower )
537 553 filen = filesList[-1]
538 554 # el filename debera tener el siguiente formato
539 555 # 0 1234 567 89A BCDE (hex)
540 556 # x YYYY DDD SSS .ext
541 557 if isNumber( filen[8:11] ):
542 558 setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
543 559 else:
544 560 setFile = -1
545 561 else:
546 562 setFile = -1 #inicializo mi contador de seteo
547 563
548 564 setFile += 1
549 565
550 566 file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
551 567 timeTuple.tm_year,
552 568 timeTuple.tm_yday,
553 569 setFile,
554 570 ext )
555 571
556 572 filename = os.path.join( path, subfolder, file )
557 573
558 574 #Setting HDF5 File
559 575 fp = h5py.File(filename,'w')
560 576 grp = fp.create_group("Data")
561 577 grp.attrs['metadata'] = self.metaFile
562 578
563
579 grp['blocksPerFile'] = 0
564 580
565 581 ds = []
566 582 data = []
567 583
568 584 for i in range(len(self.dataList)):
569 585
570 586 grp0 = grp.create_group(self.dataList[i])
571 587
572 588 for j in range(int(self.dataDim[i,0])):
573 589 tableName = "channel" + str(j)
574 590
575 if not(numpy.isnan(self.dataDim[i,2])):
591 if not(self.dataDim[i,1] == 1):
576 592 ds0 = grp0.create_dataset(tableName, (1,1,1) , chunks = True)
577 593 else:
578 594 ds0 = grp0.create_dataset(tableName, (1,1) , chunks = True)
579 595
580 596 ds.append(ds0)
581 597 data.append([])
582 598
583 599 ds0 = grp.create_dataset("time", (1,) , chunks = True)
584 600 ds.append(ds0)
585 601 data.append([])
586 602
587 603 #Saving variables
588 604 print 'Writing the file: %s'%filename
589 605 self.fp = fp
590 606 self.grp = grp
591 607 self.ds = ds
592 608 self.data = data
593 609
594 610 self.setFile = setFile
595 611 self.firsttime = True
596 612 self.blockIndex = 0
597 613 return
598 614
599 615 def putData(self):
600 616 self.setBlock()
601 617 self.writeBlock()
602 618
603 619 if self.blockIndex == self.blocksPerFile:
604 620 self.setNextFile()
605 621 return
606 622
607 623 def setBlock(self):
608
624 '''
625 data Array configured
626
627 '''
609 628 #Creating Arrays
610 629 data = self.data
611 630 ind = 0
612 631 for i in range(len(self.dataList)):
613 632 dataAux = getattr(self.dataOut,self.dataList[i])
614 633
615 634 for j in range(int(self.dataDim[i,0])):
616 635 data[ind] = dataAux[j,:]
617 if not(numpy.isnan(self.dataDim[i,2])):
636
637 if not(self.dataDim[i,1] == 1):
618 638 data[ind] = data[ind].reshape((data[ind].shape[0],data[ind].shape[1],1))
619 639 if not self.firsttime:
620 640 data[ind] = numpy.dstack((self.ds[ind][:], data[ind]))
621 641 else:
622 642 data[ind] = data[ind].reshape((1,data[ind].shape[0]))
623 643 if not self.firsttime:
624 644 data[ind] = numpy.vstack((self.ds[ind][:], data[ind]))
625 645 ind += 1
626 646
627 647 data[ind] = numpy.array([self.dataOut.utctime])
628 648 if not self.firsttime:
629 649 self.data[ind] = numpy.hstack((self.ds[ind][:], self.data[ind]))
630 650 self.data = data
631 651
632 652 return
633 653
634 654 def writeBlock(self):
635
655 '''
656 Saves the block in the HDF5 file
657 '''
636 658 for i in range(len(self.ds)):
637 659 self.ds[i].shape = self.data[i].shape
638 660 self.ds[i][:] = self.data[i]
639 661
640 662 self.blockIndex += 1
641 663
642 self.grp['blocksPerFile'] = self.blockIndex
664 self.grp.attrs.modify('blocksPerFile', self.blockIndex)
643 665
644 666 self.firsttime = False
645 667 return
646 668
647 669 def run(self, dataOut, **kwargs):
648 670 if not(self.isConfig):
649 671 self.setup(dataOut, **kwargs)
650 672 self.isConfig = True
651 673 self.putMetadata()
652 674 self.setNextFile()
653 675
654 676 self.putData()
655 677 return
656 678
General Comments 0
You need to be logged in to leave comments. Login now