##// END OF EJS Templates
First Draft HDF5 IO module
First Draft HDF5 IO module

File last commit:

r514:f095b959308c
r514:f095b959308c
Show More
jroIO_HDF5.py
656 lines | 18.7 KiB | text/x-python | PythonLexer
import numpy
import time
import os
import h5py
import re
from model.data.jrodata import *
from model.proc.jroproc_base import ProcessingUnit, Operation
from model.io.jroIO_base import *
class HDF5Reader(ProcessingUnit):
ext = ".hdf5"
optchar = "D"
timezone = None
fileIndex = None
blockIndex = None
path = None
#Hdf5 File
fpMetadata = None
listMetaname = None
listMetadata = None
fp = None
#dataOut reconstruction
dataOut = None
nChannels = None #Dimension 0
nPoints = None #Dimension 1, number of Points or Parameters
nSamples = None #Dimension 2, number of samples or ranges
def __init__(self):
return
def setup(self,path=None,
startDate=None,
endDate=None,
startTime=datetime.time(0,0,0),
endTime=datetime.time(23,59,59),
walk=True,
timezone='ut',
all=0,
online=False,
ext=None):
if ext==None:
ext = self.ext
self.timezone = timezone
# self.all = all
# self.online = online
self.path = path
if not(online):
#Busqueda de archivos offline
self.__searchFilesOffline(path, startDate, endDate, ext, startTime, endTime, walk)
else:
self.__searchFilesOnline(path, walk)
if not(self.filenameList):
print "There is no files into the folder: %s"%(path)
sys.exit(-1)
# self.__getExpParameters()
self.fileIndex = -1
self.__setNextFileOffline()
self.__readMetadata()
self.blockIndex = 0
return
def __searchFilesOffline(self,
path,
startDate,
endDate,
ext,
startTime=datetime.time(0,0,0),
endTime=datetime.time(23,59,59),
walk=True):
# self.__setParameters(path, startDate, endDate, startTime, endTime, walk)
#
# self.__checkPath()
#
# self.__findDataForDates()
#
# self.__selectDataForTimes()
#
# for i in range(len(self.filenameList)):
# print "%s" %(self.filenameList[i])
pathList = []
if not walk:
#pathList.append(path)
multi_path = path.split(',')
for single_path in multi_path:
pathList.append(single_path)
else:
#dirList = []
multi_path = path.split(',')
for single_path in multi_path:
dirList = []
for thisPath in os.listdir(single_path):
if not os.path.isdir(os.path.join(single_path,thisPath)):
continue
if not isDoyFolder(thisPath):
continue
dirList.append(thisPath)
if not(dirList):
return None, None
thisDate = startDate
while(thisDate <= endDate):
year = thisDate.timetuple().tm_year
doy = thisDate.timetuple().tm_yday
matchlist = fnmatch.filter(dirList, '?' + '%4.4d%3.3d' % (year,doy) + '*')
if len(matchlist) == 0:
thisDate += datetime.timedelta(1)
continue
for match in matchlist:
pathList.append(os.path.join(single_path,match))
thisDate += datetime.timedelta(1)
if pathList == []:
print "Any folder was found for the date range: %s-%s" %(startDate, endDate)
return None, None
print "%d folder(s) was(were) found for the date range: %s - %s" %(len(pathList), startDate, endDate)
filenameList = []
datetimeList = []
pathDict = {}
filenameList_to_sort = []
for i in range(len(pathList)):
thisPath = pathList[i]
fileList = glob.glob1(thisPath, "*%s" %ext)
fileList.sort()
pathDict.setdefault(fileList[0])
pathDict[fileList[0]] = i
filenameList_to_sort.append(fileList[0])
filenameList_to_sort.sort()
for file in filenameList_to_sort:
thisPath = pathList[pathDict[file]]
fileList = glob.glob1(thisPath, "*%s" %ext)
fileList.sort()
for file in fileList:
filename = os.path.join(thisPath,file)
thisDatetime = self.__isFileinThisTime(filename, startTime, endTime)
if not(thisDatetime):
continue
filenameList.append(filename)
datetimeList.append(thisDatetime)
if not(filenameList):
print "Any file was found for the time range %s - %s" %(startTime, endTime)
return None, None
print "%d file(s) was(were) found for the time range: %s - %s" %(len(filenameList), startTime, endTime)
print
for i in range(len(filenameList)):
print "%s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
self.filenameList = filenameList
self.datetimeList = datetimeList
return pathList, filenameList
def __isFileinThisTime(self, filename, startTime, endTime):
"""
Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
Inputs:
filename : nombre completo del archivo de datos en formato Jicamarca (.r)
startTime : tiempo inicial del rango seleccionado en formato datetime.time
endTime : tiempo final del rango seleccionado en formato datetime.time
Return:
Boolean : Retorna True si el archivo de datos contiene datos en el rango de
fecha especificado, de lo contrario retorna False.
Excepciones:
Si el archivo no existe o no puede ser abierto
Si la cabecera no puede ser leida.
"""
try:
fp = fp = h5py.File(filename,'r')
except IOError:
traceback.print_exc()
raise IOError, "The file %s can't be opened" %(filename)
grp = fp['Data']
time = grp['time']
time0 = time[:][0]
fp.close()
thisDatetime = datetime.datetime.utcfromtimestamp(time0)
if self.timezone == 'lt':
thisDatetime = thisDatetime - datetime.timedelta(minutes = 300)
thisTime = thisDatetime.time()
if not ((startTime <= thisTime) and (endTime > thisTime)):
return None
return thisDatetime
def __checkPath(self):
if os.path.exists(self.path):
self.status = 1
else:
self.status = 0
print 'Path:%s does not exists'%self.path
return
def __setNextFileOffline(self):
idFile = self.fileIndex
idFile += 1
if not(idFile < len(self.filenameList)):
self.flagNoMoreFiles = 1
print "No more Files"
return 0
filename = self.filenameList[idFile]
filePointer = h5py.File(filename,'r')
self.flagIsNewFile = 1
self.fileIndex = idFile
self.filename = filename
self.fp = filePointer
print "Setting the file: %s"%self.filename
self.__readMetadata()
return 1
def __readMetadata(self):
grp = self.fp['Data']
self.pathMeta = os.path.join(self.path, grp.attrs['metadata'])
filePointer = h5py.File(self.pathMeta,'r')
groupPointer = filePointer['Metadata']
listMetaname = []
listMetadata = []
for item in groupPointer.items():
name = item[0]
if name=='data shape':
self.nSamples = 1
self.nPoints = 1
self.nChannels = 1
else:
data = groupPointer[name][:]
listMetaname.append(name)
listMetadata.append(data)
if name=='type':
self.__initDataOut(name)
filePointer.close()
self.listMetadata = listMetaname
self.listMetadata = listMetadata
return
def __initDataOut(self, type):
if 'type'=='Parameters':
self.dataOut = Parameters()
elif 'type'=='Spectra':
self.dataOut = Spectra()
elif 'type'=='Voltage':
self.dataOut = Voltage()
elif 'type'=='Correlation':
self.dataOut = Correlation()
return
def __setDataOut(self):
listMetadata = self.listMetadata
listMetaname = self.listMetaname
listDataname = self.listDataname
listData = self.listData
blockIndex = self.blockIndex
for i in range(len(listMetadata)):
setattr(self.dataOut,listMetaname[i],listMetadata[i])
for j in range(len(listData)):
setattr(self.dataOut,listDataname[j][blockIndex,:],listData[j][blockIndex,:])
return
def getData(self):
if self.flagNoMoreFiles:
self.dataOut.flagNoData = True
print 'Process finished'
return 0
if self.__hasNotDataInBuffer():
self.__setNextFile()
if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
self.dataOut.flagNoData = True
return 0
self.__setDataOut()
self.dataOut.flagNoData = False
self.blockIndex += 1
return self.dataOut.data
def run(self, **kwargs):
if not(self.isConfig):
self.setup(**kwargs)
self.setObjProperties()
self.isConfig = True
self.getData()
return
class HDF5Writer(Operation):
ext = ".hdf5"
optchar = "D"
metaoptchar = "M"
metaFile = None
path = None
setFile = None
fp = None
grp = None
ds = None
firsttime = True
#Configurations
blocksPerFile = None
blockIndex = None
dataOut = None
#Data Arrays
dataList = None
metadataList = None
dataDim = None
def __init__(self):
Operation.__init__(self)
self.isConfig = False
return
def setup(self, dataOut, **kwargs):
self.path = kwargs['path']
if kwargs.has_key('ext'):
self.ext = kwargs['ext']
else:
self.blocksPerFile = 10
if kwargs.has_key('blocksPerFile'):
self.blocksPerFile = kwargs['blocksPerFile']
else:
self.blocksPerFile = 10
self.dataOut = dataOut
self.metadataList = ['inputUnit','abscissaRange','heightRange']
self.dataList = ['data_param', 'data_error', 'data_SNR']
self.dataDim = numpy.zeros((len(self.dataList),3))
for i in range(len(self.dataList)):
dataDim = getattr(self.dataOut, self.dataList[i]).shape
if len(dataDim) == 3:
self.dataDim[i,:] = numpy.array(dataDim)
else:
self.dataDim[i,:-1] = numpy.array(dataDim)
self.dataDim[i,-1] = numpy.nan
self.blockIndex = 0
return
def putMetadata(self):
fp = self.createMetadataFile()
self.writeMetadata(fp)
fp.close()
return
def createMetadataFile(self):
ext = self.ext
path = self.path
setFile = self.setFile
timeTuple = time.localtime(self.dataOut.utctime)
subfolder = ''
fullpath = os.path.join( path, subfolder )
if not( os.path.exists(fullpath) ):
os.mkdir(fullpath)
setFile = -1 #inicializo mi contador de seteo
else:
filesList = os.listdir( fullpath )
if len( filesList ) > 0:
filesList = sorted( filesList, key=str.lower )
filen = filesList[-1]
# el filename debera tener el siguiente formato
# 0 1234 567 89A BCDE (hex)
# x YYYY DDD SSS .ext
if isNumber( filen[8:11] ):
setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
else:
setFile = -1
else:
setFile = -1 #inicializo mi contador de seteo
setFile += 1
file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
timeTuple.tm_year,
timeTuple.tm_yday,
setFile,
ext )
filename = os.path.join( path, subfolder, file )
self.metaFile = file
#Setting HDF5 File
fp = h5py.File(filename,'w')
return fp
def writeMetadata(self, fp):
grp = fp.create_group("Metadata")
for i in range(len(self.metadataList)):
grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
return
def setNextFile(self):
ext = self.ext
path = self.path
setFile = self.setFile
if self.fp != None:
self.fp.close()
timeTuple = time.localtime(self.dataOut.utctime)
subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
fullpath = os.path.join( path, subfolder )
if not( os.path.exists(fullpath) ):
os.mkdir(fullpath)
setFile = -1 #inicializo mi contador de seteo
else:
filesList = os.listdir( fullpath )
if len( filesList ) > 0:
filesList = sorted( filesList, key=str.lower )
filen = filesList[-1]
# el filename debera tener el siguiente formato
# 0 1234 567 89A BCDE (hex)
# x YYYY DDD SSS .ext
if isNumber( filen[8:11] ):
setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
else:
setFile = -1
else:
setFile = -1 #inicializo mi contador de seteo
setFile += 1
file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
timeTuple.tm_year,
timeTuple.tm_yday,
setFile,
ext )
filename = os.path.join( path, subfolder, file )
#Setting HDF5 File
fp = h5py.File(filename,'w')
grp = fp.create_group("Data")
grp.attrs['metadata'] = self.metaFile
ds = []
data = []
for i in range(len(self.dataList)):
grp0 = grp.create_group(self.dataList[i])
for j in range(int(self.dataDim[i,0])):
tableName = "channel" + str(j)
if not(numpy.isnan(self.dataDim[i,2])):
ds0 = grp0.create_dataset(tableName, (1,1,1) , chunks = True)
else:
ds0 = grp0.create_dataset(tableName, (1,1) , chunks = True)
ds.append(ds0)
data.append([])
ds0 = grp.create_dataset("time", (1,) , chunks = True)
ds.append(ds0)
data.append([])
#Saving variables
print 'Writing the file: %s'%filename
self.fp = fp
self.grp = grp
self.ds = ds
self.data = data
self.setFile = setFile
self.firsttime = True
self.blockIndex = 0
return
def putData(self):
self.setBlock()
self.writeBlock()
if self.blockIndex == self.blocksPerFile:
self.setNextFile()
return
def setBlock(self):
#Creating Arrays
data = self.data
ind = 0
for i in range(len(self.dataList)):
dataAux = getattr(self.dataOut,self.dataList[i])
for j in range(int(self.dataDim[i,0])):
data[ind] = dataAux[j,:]
if not(numpy.isnan(self.dataDim[i,2])):
data[ind] = data[ind].reshape((data[ind].shape[0],data[ind].shape[1],1))
if not self.firsttime:
data[ind] = numpy.dstack((self.ds[ind][:], data[ind]))
else:
data[ind] = data[ind].reshape((1,data[ind].shape[0]))
if not self.firsttime:
data[ind] = numpy.vstack((self.ds[ind][:], data[ind]))
ind += 1
data[ind] = numpy.array([self.dataOut.utctime])
if not self.firsttime:
self.data[ind] = numpy.hstack((self.ds[ind][:], self.data[ind]))
self.data = data
return
def writeBlock(self):
for i in range(len(self.ds)):
self.ds[i].shape = self.data[i].shape
self.ds[i][:] = self.data[i]
self.blockIndex += 1
self.grp['blocksPerFile'] = self.blockIndex
self.firsttime = False
return
def run(self, dataOut, **kwargs):
if not(self.isConfig):
self.setup(dataOut, **kwargs)
self.isConfig = True
self.putMetadata()
self.setNextFile()
self.putData()
return