##// END OF EJS Templates
Se comenta el retorno para evitar la interrupción del programa debido al uso del...
Se comenta el retorno para evitar la interrupción del programa debido al uso del header antiguo, en la ultima version de Signal Chain se genera un error

File last commit:

r724:fb11662812fb
r725:55bd4dcd7f84
Show More
jroIO_HDF5.py
1026 lines | 30.5 KiB | text/x-python | PythonLexer
import numpy
import time
import os
import h5py
import re
from schainpy.model.data.jrodata import *
from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation
from schainpy.model.io.jroIO_base import *
class HDF5Reader(ProcessingUnit):
ext = ".hdf5"
optchar = "D"
timezone = None
secStart = None
secEnd = None
fileIndex = None
blockIndex = None
blocksPerFile = None
path = None
#List of Files
filenameList = None
datetimeList = None
#Hdf5 File
fpMetadata = None
pathMeta = None
listMetaname = None
listMeta = None
listDataname = None
listData = None
listShapes = None
fp = None
#dataOut reconstruction
dataOut = None
nRecords = None
def __init__(self):
self.dataOut = self.__createObjByDefault()
return
def __createObjByDefault(self):
dataObj = Parameters()
return dataObj
def setup(self,path=None,
startDate=None,
endDate=None,
startTime=datetime.time(0,0,0),
endTime=datetime.time(23,59,59),
walk=True,
timezone='ut',
all=0,
online=False,
ext=None):
if ext==None:
ext = self.ext
self.timezone = timezone
# self.all = all
# self.online = online
self.path = path
startDateTime = datetime.datetime.combine(startDate,startTime)
endDateTime = datetime.datetime.combine(endDate,endTime)
secStart = (startDateTime-datetime.datetime(1970,1,1)).total_seconds()
secEnd = (endDateTime-datetime.datetime(1970,1,1)).total_seconds()
self.secStart = secStart
self.secEnd = secEnd
if not(online):
#Busqueda de archivos offline
self.__searchFilesOffline(path, startDate, endDate, ext, startTime, endTime, secStart, secEnd, walk)
else:
self.__searchFilesOnline(path, walk)
if not(self.filenameList):
print "There is no files into the folder: %s"%(path)
sys.exit(-1)
# self.__getExpParameters()
self.fileIndex = -1
self.__setNextFileOffline()
self.__readMetadata()
self.blockIndex = 0
return
def __searchFilesOffline(self,
path,
startDate,
endDate,
ext,
startTime=datetime.time(0,0,0),
endTime=datetime.time(23,59,59),
secStart = 0,
secEnd = numpy.inf,
walk=True):
# self.__setParameters(path, startDate, endDate, startTime, endTime, walk)
#
# self.__checkPath()
#
# self.__findDataForDates()
#
# self.__selectDataForTimes()
#
# for i in range(len(self.filenameList)):
# print "%s" %(self.filenameList[i])
pathList = []
if not walk:
#pathList.append(path)
multi_path = path.split(',')
for single_path in multi_path:
pathList.append(single_path)
else:
#dirList = []
multi_path = path.split(',')
for single_path in multi_path:
dirList = []
for thisPath in os.listdir(single_path):
if not os.path.isdir(os.path.join(single_path,thisPath)):
continue
if not isDoyFolder(thisPath):
continue
dirList.append(thisPath)
if not(dirList):
return None, None
thisDate = startDate
while(thisDate <= endDate):
year = thisDate.timetuple().tm_year
doy = thisDate.timetuple().tm_yday
matchlist = fnmatch.filter(dirList, '?' + '%4.4d%3.3d' % (year,doy) + '*')
if len(matchlist) == 0:
thisDate += datetime.timedelta(1)
continue
for match in matchlist:
pathList.append(os.path.join(single_path,match))
thisDate += datetime.timedelta(1)
if pathList == []:
print "Any folder was found for the date range: %s-%s" %(startDate, endDate)
return None, None
print "%d folder(s) was(were) found for the date range: %s - %s" %(len(pathList), startDate, endDate)
filenameList = []
datetimeList = []
pathDict = {}
filenameList_to_sort = []
for i in range(len(pathList)):
thisPath = pathList[i]
fileList = glob.glob1(thisPath, "*%s" %ext)
fileList.sort()
pathDict.setdefault(fileList[0])
pathDict[fileList[0]] = i
filenameList_to_sort.append(fileList[0])
filenameList_to_sort.sort()
for file in filenameList_to_sort:
thisPath = pathList[pathDict[file]]
fileList = glob.glob1(thisPath, "*%s" %ext)
fileList.sort()
for file in fileList:
filename = os.path.join(thisPath,file)
thisDatetime = self.__isFileinThisTime(filename, secStart, secEnd)
if not(thisDatetime):
continue
filenameList.append(filename)
datetimeList.append(thisDatetime)
if not(filenameList):
print "Any file was found for the time range %s - %s" %(startTime, endTime)
return None, None
print "%d file(s) was(were) found for the time range: %s - %s" %(len(filenameList), startTime, endTime)
print
for i in range(len(filenameList)):
print "%s -> [%s]" %(filenameList[i], datetimeList[i].ctime())
self.filenameList = filenameList
self.datetimeList = datetimeList
return pathList, filenameList
def __isFileinThisTime(self, filename, startSeconds, endSeconds):
"""
Retorna 1 si el archivo de datos se encuentra dentro del rango de horas especificado.
Inputs:
filename : nombre completo del archivo de datos en formato Jicamarca (.r)
startTime : tiempo inicial del rango seleccionado en formato datetime.time
endTime : tiempo final del rango seleccionado en formato datetime.time
Return:
Boolean : Retorna True si el archivo de datos contiene datos en el rango de
fecha especificado, de lo contrario retorna False.
Excepciones:
Si el archivo no existe o no puede ser abierto
Si la cabecera no puede ser leida.
"""
try:
fp = fp = h5py.File(filename,'r')
except IOError:
traceback.print_exc()
raise IOError, "The file %s can't be opened" %(filename)
grp = fp['Data']
timeAux = grp['time']
time0 = timeAux[:][0].astype(numpy.float) #Time Vector
fp.close()
if self.timezone == 'lt':
time0 -= 5*3600
boolTimer = numpy.logical_and(time0 >= startSeconds,time0 < endSeconds)
if not (numpy.any(boolTimer)):
return None
thisDatetime = datetime.datetime.utcfromtimestamp(time0[0])
return thisDatetime
def __checkPath(self):
if os.path.exists(self.path):
self.status = 1
else:
self.status = 0
print 'Path:%s does not exists'%self.path
return
def __setNextFileOffline(self):
idFile = self.fileIndex
idFile += 1
if not(idFile < len(self.filenameList)):
print "No more Files"
return 0
filename = self.filenameList[idFile]
filePointer = h5py.File(filename,'r')
self.flagIsNewFile = 1
self.fileIndex = idFile
self.filename = filename
self.fp = filePointer
print "Setting the file: %s"%self.filename
self.__readMetadata()
self.__setBlockList()
# self.nRecords = self.fp['Data'].attrs['blocksPerFile']
self.nRecords = self.fp['Data'].attrs['nRecords']
self.blockIndex = 0
return 1
def __setBlockList(self):
'''
self.fp
self.startDateTime
self.endDateTime
self.blockList
self.blocksPerFile
'''
filePointer = self.fp
secStart = self.secStart
secEnd = self.secEnd
grp = filePointer['Data']
timeVector = grp['time'].value.astype(numpy.float)[0]
if self.timezone == 'lt':
timeVector -= 5*3600
ind = numpy.where(numpy.logical_and(timeVector >= secStart , timeVector < secEnd))[0]
self.blockList = ind
self.blocksPerFile = len(ind)
return
def __readMetadata(self):
'''
self.pathMeta
self.listShapes
self.listMetaname
self.listMeta
'''
grp = self.fp['Data']
pathMeta = os.path.join(self.path, grp.attrs['metadata'])
if pathMeta == self.pathMeta:
return
else:
self.pathMeta = pathMeta
filePointer = h5py.File(self.pathMeta,'r')
groupPointer = filePointer['Metadata']
listMetaname = []
listMetadata = []
for item in groupPointer.items():
name = item[0]
if name=='array dimensions':
table = groupPointer[name][:]
listShapes = {}
for shapes in table:
listShapes[shapes[0]] = numpy.array([shapes[1],shapes[2],shapes[3],shapes[4]])
else:
data = groupPointer[name].value
listMetaname.append(name)
listMetadata.append(data)
if name=='type':
self.__initDataOut(data)
filePointer.close()
self.listShapes = listShapes
self.listMetaname = listMetaname
self.listMeta = listMetadata
return
def __readData(self):
grp = self.fp['Data']
listdataname = []
listdata = []
for item in grp.items():
name = item[0]
if name == 'time':
listdataname.append('utctime')
timeAux = grp[name].value.astype(numpy.float)[0]
listdata.append(timeAux)
continue
listdataname.append(name)
array = self.__setDataArray(self.nRecords, grp[name],self.listShapes[name])
listdata.append(array)
self.listDataname = listdataname
self.listData = listdata
return
def __setDataArray(self, nRecords, dataset, shapes):
nChannels = shapes[0] #Dimension 0
nPoints = shapes[1] #Dimension 1, number of Points or Parameters
nSamples = shapes[2] #Dimension 2, number of samples or ranges
mode = shapes[3]
# if nPoints>1:
# arrayData = numpy.zeros((nRecords,nChannels,nPoints,nSamples))
# else:
# arrayData = numpy.zeros((nRecords,nChannels,nSamples))
#
# chn = 'channel'
#
# for i in range(nChannels):
#
# data = dataset[chn + str(i)].value
#
# if nPoints>1:
# data = numpy.rollaxis(data,2)
#
# arrayData[:,i,:] = data
arrayData = numpy.zeros((nRecords,nChannels,nPoints,nSamples))
doSqueeze = False
if mode == 0:
strds = 'channel'
nDatas = nChannels
newShapes = (nRecords,nPoints,nSamples)
if nPoints == 1:
doSqueeze = True
axisSqueeze = 2
else:
strds = 'param'
nDatas = nPoints
newShapes = (nRecords,nChannels,nSamples)
if nChannels == 1:
doSqueeze = True
axisSqueeze = 1
for i in range(nDatas):
data = dataset[strds + str(i)].value
data = data.reshape(newShapes)
if mode == 0:
arrayData[:,i,:,:] = data
else:
arrayData[:,:,i,:] = data
if doSqueeze:
arrayData = numpy.squeeze(arrayData, axis=axisSqueeze)
return arrayData
def __initDataOut(self, type):
# if type =='Parameters':
# self.dataOut = Parameters()
# elif type =='Spectra':
# self.dataOut = Spectra()
# elif type =='Voltage':
# self.dataOut = Voltage()
# elif type =='Correlation':
# self.dataOut = Correlation()
return
def __setDataOut(self):
listMeta = self.listMeta
listMetaname = self.listMetaname
listDataname = self.listDataname
listData = self.listData
blockIndex = self.blockIndex
blockList = self.blockList
for i in range(len(listMeta)):
setattr(self.dataOut,listMetaname[i],listMeta[i])
for j in range(len(listData)):
if listDataname[j]=='utctime':
# setattr(self.dataOut,listDataname[j],listData[j][blockList[blockIndex]])
setattr(self.dataOut,'utctimeInit',listData[j][blockList[blockIndex]])
continue
setattr(self.dataOut,listDataname[j],listData[j][blockList[blockIndex],:])
return self.dataOut.data_param
def getData(self):
# if self.flagNoMoreFiles:
# self.dataOut.flagNoData = True
# print 'Process finished'
# return 0
#
if self.blockIndex==self.blocksPerFile:
if not( self.__setNextFileOffline() ):
self.dataOut.flagNoData = True
return 0
#
# if self.datablock == None: # setear esta condicion cuando no hayan datos por leers
# self.dataOut.flagNoData = True
# return 0
self.__readData()
self.__setDataOut()
self.dataOut.flagNoData = False
self.blockIndex += 1
return
def run(self, **kwargs):
if not(self.isConfig):
self.setup(**kwargs)
# self.setObjProperties()
self.isConfig = True
self.getData()
return
class HDF5Writer(Operation):
ext = ".hdf5"
optchar = "D"
metaoptchar = "M"
metaFile = None
filename = None
path = None
setFile = None
fp = None
grp = None
ds = None
firsttime = True
#Configurations
blocksPerFile = None
blockIndex = None
dataOut = None
#Data Arrays
dataList = None
metadataList = None
arrayDim = None
tableDim = None
# dtype = [('arrayName', 'S20'),('nChannels', 'i'), ('nPoints', 'i'), ('nSamples', 'i'),('mode', 'b')]
dtype = [('arrayName', 'S20'),('nDimensions', 'i'), ('dim2', 'i'), ('dim1', 'i'),('dim0', 'i'),('mode', 'b')]
mode = None
nDatas = None #Number of datasets to be stored per array
nDims = None #Number Dimensions in each dataset
nDimsForDs = None
currentDay = None
def __init__(self):
Operation.__init__(self)
self.isConfig = False
return
def setup(self, dataOut, **kwargs):
self.path = kwargs['path']
if kwargs.has_key('ext'):
self.ext = kwargs['ext']
if kwargs.has_key('blocksPerFile'):
self.blocksPerFile = kwargs['blocksPerFile']
else:
self.blocksPerFile = 10
self.metadataList = kwargs['metadataList']
self.dataList = kwargs['dataList']
self.dataOut = dataOut
if kwargs.has_key('mode'):
mode = kwargs['mode']
if type(mode) == int:
mode = numpy.zeros(len(self.dataList)) + mode
else:
mode = numpy.zeros(len(self.dataList))
self.mode = mode
arrayDim = numpy.zeros((len(self.dataList),5))
#Table dimensions
dtype0 = self.dtype
tableList = []
for i in range(len(self.dataList)):
dataAux = getattr(self.dataOut, self.dataList[i])
if type(dataAux)==float or type(dataAux)==int:
arrayDim[i,0] = 1
else:
if dataAux == None:
return 0
arrayDim0 = dataAux.shape
arrayDim[i,0] = len(arrayDim0)
arrayDim[i,4] = mode[i]
if len(arrayDim0) == 3:
arrayDim[i,1:-1] = numpy.array(arrayDim0)
elif len(arrayDim0) == 2:
arrayDim[i,2:-1] = numpy.array(arrayDim0) #nHeights
elif len(arrayDim0) == 1:
arrayDim[i,3] = arrayDim0
elif len(arrayDim0) == 0:
arrayDim[i,0] = 1
arrayDim[i,3] = 1
table = numpy.array((self.dataList[i],) + tuple(arrayDim[i,:]),dtype = dtype0)
tableList.append(table)
self.arrayDim = arrayDim
self.tableDim = numpy.array(tableList, dtype = dtype0)
self.blockIndex = 0
timeTuple = time.localtime(dataOut.utctime)
self.currentDay = timeTuple.tm_yday
return 1
def putMetadata(self):
fp = self.createMetadataFile()
self.writeMetadata(fp)
fp.close()
return
def createMetadataFile(self):
ext = self.ext
path = self.path
setFile = self.setFile
timeTuple = time.localtime(self.dataOut.utctime)
subfolder = ''
fullpath = os.path.join( path, subfolder )
if not( os.path.exists(fullpath) ):
os.mkdir(fullpath)
setFile = -1 #inicializo mi contador de seteo
subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
fullpath = os.path.join( path, subfolder )
if not( os.path.exists(fullpath) ):
os.mkdir(fullpath)
setFile = -1 #inicializo mi contador de seteo
else:
filesList = os.listdir( fullpath )
filesList = sorted( filesList, key=str.lower )
if len( filesList ) > 0:
filesList = [k for k in filesList if 'M' in k]
filen = filesList[-1]
# el filename debera tener el siguiente formato
# 0 1234 567 89A BCDE (hex)
# x YYYY DDD SSS .ext
if isNumber( filen[8:11] ):
setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
else:
setFile = -1
else:
setFile = -1 #inicializo mi contador de seteo
setFile += 1
file = '%s%4.4d%3.3d%3.3d%s' % (self.metaoptchar,
timeTuple.tm_year,
timeTuple.tm_yday,
setFile,
ext )
filename = os.path.join( path, subfolder, file )
self.metaFile = file
#Setting HDF5 File
fp = h5py.File(filename,'w')
return fp
def writeMetadata(self, fp):
grp = fp.create_group("Metadata")
grp.create_dataset('array dimensions', data = self.tableDim, dtype = self.dtype)
for i in range(len(self.metadataList)):
grp.create_dataset(self.metadataList[i], data=getattr(self.dataOut, self.metadataList[i]))
return
def dateFlag(self):
timeTuple = time.localtime(self.dataOut.utctime)
dataDay = timeTuple.tm_yday
if dataDay == self.currentDay:
return False
self.currentDay = dataDay
return True
def setNextFile(self):
ext = self.ext
path = self.path
setFile = self.setFile
mode = self.mode
timeTuple = time.localtime(self.dataOut.utctime)
subfolder = 'd%4.4d%3.3d' % (timeTuple.tm_year,timeTuple.tm_yday)
fullpath = os.path.join( path, subfolder )
if os.path.exists(fullpath):
filesList = os.listdir( fullpath )
filesList = [k for k in filesList if 'D' in k]
if len( filesList ) > 0:
filesList = sorted( filesList, key=str.lower )
filen = filesList[-1]
# el filename debera tener el siguiente formato
# 0 1234 567 89A BCDE (hex)
# x YYYY DDD SSS .ext
if isNumber( filen[8:11] ):
setFile = int( filen[8:11] ) #inicializo mi contador de seteo al seteo del ultimo file
else:
setFile = -1
else:
setFile = -1 #inicializo mi contador de seteo
else:
os.mkdir(fullpath)
setFile = -1 #inicializo mi contador de seteo
setFile += 1
file = '%s%4.4d%3.3d%3.3d%s' % (self.optchar,
timeTuple.tm_year,
timeTuple.tm_yday,
setFile,
ext )
filename = os.path.join( path, subfolder, file )
#Setting HDF5 File
fp = h5py.File(filename,'w')
#writemetadata
self.writeMetadata(fp)
grp = fp.create_group("Data")
# grp.attrs['metadata'] = self.metaFile
# grp.attrs['blocksPerFile'] = 0
ds = []
data = []
nDimsForDs = []
nDatas = numpy.zeros(len(self.dataList))
nDims = self.arrayDim[:,0]
nDim1 = self.arrayDim[:,2]
nDim0 = self.arrayDim[:,3]
for i in range(len(self.dataList)):
if nDims[i]==1:
# ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype='S20')
ds0 = grp.create_dataset(self.dataList[i], (1,1), maxshape=(1,self.blocksPerFile) , chunks = True, dtype=numpy.float64)
ds.append(ds0)
data.append([])
nDimsForDs.append(nDims[i])
else:
if mode[i]==0:
strMode = "channel"
nDatas[i] = self.arrayDim[i,1]
else:
strMode = "param"
nDatas[i] = self.arrayDim[i,2]
if nDims[i]==2:
nDatas[i] = self.arrayDim[i,2]
grp0 = grp.create_group(self.dataList[i])
for j in range(int(nDatas[i])):
tableName = strMode + str(j)
if nDims[i] == 3:
ds0 = grp0.create_dataset(tableName, (nDim1[i],nDim0[i],1) , data = numpy.zeros((nDim1[i],nDim0[i],1)) ,maxshape=(None,nDim0[i],None), chunks=True)
else:
ds0 = grp0.create_dataset(tableName, (1,nDim0[i]), data = numpy.zeros((1,nDim0[i])) , maxshape=(None,nDim0[i]), chunks=True)
ds.append(ds0)
data.append([])
nDimsForDs.append(nDims[i])
self.nDatas = nDatas
self.nDims = nDims
self.nDimsForDs = nDimsForDs
#Saving variables
print 'Writing the file: %s'%filename
self.filename = filename
self.fp = fp
self.grp = grp
self.grp.attrs.modify('nRecords', 1)
self.ds = ds
self.data = data
self.setFile = setFile
self.firsttime = True
self.blockIndex = 0
return
def putData(self):
if not self.firsttime:
self.readBlock()
if self.blockIndex == self.blocksPerFile or self.dateFlag():
self.setNextFile()
self.setBlock()
self.writeBlock()
self.fp.flush()
self.fp.close()
return
def readBlock(self):
'''
data Array configured
self.data
'''
ds = self.ds
#Setting HDF5 File
fp = h5py.File(self.filename,'r+')
grp = fp["Data"]
ind = 0
# grp.attrs['blocksPerFile'] = 0
for i in range(len(self.dataList)):
if self.nDims[i]==1:
ds0 = grp[self.dataList[i]]
ds[ind] = ds0
ind += 1
else:
if self.mode[i]==0:
strMode = "channel"
else:
strMode = "param"
grp0 = grp[self.dataList[i]]
for j in range(int(self.nDatas[i])):
tableName = strMode + str(j)
ds0 = grp0[tableName]
ds[ind] = ds0
ind += 1
self.fp = fp
self.grp = grp
self.ds = ds
return
def setBlock(self):
'''
data Array configured
self.data
'''
#Creating Arrays
data = self.data
nDatas = self.nDatas
nDims = self.nDims
mode = self.mode
ind = 0
for i in range(len(self.dataList)):
dataAux = getattr(self.dataOut,self.dataList[i])
if nDims[i] == 1:
# data[ind] = numpy.array([str(dataAux)]).reshape((1,1))
data[ind] = dataAux
# if not self.firsttime:
# data[ind] = numpy.hstack((self.ds[ind][:], self.data[ind]))
ind += 1
else:
for j in range(int(nDatas[i])):
if (mode[i] == 0) or (nDims[i] == 2): #In case division per channel or Dimensions is only 1
data[ind] = dataAux[j,:]
else:
data[ind] = dataAux[:,j,:]
# if nDims[i] == 3:
# data[ind] = data[ind].reshape((data[ind].shape[0],data[ind].shape[1],1))
# if not self.firsttime:
# data[ind] = numpy.dstack((self.ds[ind][:], data[ind]))
# else:
# data[ind] = data[ind].reshape((1,data[ind].shape[0]))
# if not self.firsttime:
# data[ind] = numpy.vstack((self.ds[ind][:], data[ind]))
ind += 1
self.data = data
return
def writeBlock(self):
'''
Saves the block in the HDF5 file
'''
for i in range(len(self.ds)):
if self.firsttime:
# self.ds[i].resize(self.data[i].shape)
# self.ds[i][self.blockIndex,:] = self.data[i]
if type(self.data[i]) == numpy.ndarray:
nDims1 = len(self.ds[i].shape)
if nDims1 == 3:
self.data[i] = self.data[i].reshape((self.data[i].shape[0],self.data[i].shape[1],1))
self.ds[i].resize(self.data[i].shape)
self.ds[i][:] = self.data[i]
else:
if self.nDimsForDs[i] == 1:
self.ds[i].resize((self.ds[i].shape[0], self.ds[i].shape[1] + 1))
self.ds[i][0,-1] = self.data[i]
elif self.nDimsForDs[i] == 2:
self.ds[i].resize((self.ds[i].shape[0] + 1,self.ds[i].shape[1]))
self.ds[i][self.blockIndex,:] = self.data[i]
elif self.nDimsForDs[i] == 3:
dataShape = self.data[i].shape
dsShape = self.ds[i].shape
if dataShape[0]==dsShape[0]:
self.ds[i].resize((self.ds[i].shape[0],self.ds[i].shape[1],self.ds[i].shape[2]+1))
self.ds[i][:,:,-1] = self.data[i]
else:
self.ds[i].resize((self.ds[i].shape[0] + dataShape[0],self.ds[i].shape[1],self.ds[i].shape[2]))
self.ds[i][dsShape[0]:,:,0] = self.data[i]
self.blockIndex += 1
self.firsttime = False
return
def run(self, dataOut, **kwargs):
if not(self.isConfig):
flagdata = self.setup(dataOut, **kwargs)
if not(flagdata):
return
self.isConfig = True
# self.putMetadata()
self.setNextFile()
self.putData()
return