jroIO_madrigal.py
594 lines
| 19.9 KiB
| text/x-python
|
PythonLexer
|
r1010 | ''' | |
Created on Aug 1, 2017 | |||
@author: Juan C. Espinoza | |||
''' | |||
import os | |||
import sys | |||
import time | |||
|
r1021 | import json | |
r1065 | import glob | ||
|
r1010 | import datetime | |
import numpy | |||
r1065 | import h5py | ||
r1241 | |||
import schainpy.admin | |||
r1254 | from schainpy.model.io.jroIO_base import LOCALTIME, Reader | ||
|
r1189 | from schainpy.model.proc.jroproc_base import ProcessingUnit, Operation, MPDecorator | |
|
r1021 | from schainpy.model.data.jrodata import Parameters | |
r1065 | from schainpy.utils import log | ||
r1070 | try: | ||
|
r1073 | import madrigal.cedar | |
r1070 | except: | ||
|
r1284 | pass | |
|
r1021 | ||
|
r1208 | try: | |
basestring | |||
except: | |||
basestring = str | |||
|
r1021 | DEF_CATALOG = { | |
'principleInvestigator': 'Marco Milla', | |||
|
r1206 | 'expPurpose': '', | |
'cycleTime': '', | |||
'correlativeExp': '', | |||
'sciRemarks': '', | |||
'instRemarks': '' | |||
|
r1021 | } | |
|
r1206 | ||
|
r1021 | DEF_HEADER = { | |
|
r1206 | 'kindatDesc': '', | |
|
r1021 | 'analyst': 'Jicamarca User', | |
|
r1206 | 'comments': '', | |
'history': '' | |||
|
r1021 | } | |
|
r1206 | ||
|
r1021 | MNEMONICS = { | |
10: 'jro', | |||
11: 'jbr', | |||
840: 'jul', | |||
13: 'jas', | |||
1000: 'pbr', | |||
1001: 'hbr', | |||
1002: 'obr', | |||
|
r1206 | 400: 'clr' | |
|
r1021 | } | |
r1065 | UT1970 = datetime.datetime(1970, 1, 1) - datetime.timedelta(seconds=time.timezone) | ||
|
r1021 | def load_json(obj): | |
''' | |||
Parse json as string instead of unicode | |||
''' | |||
if isinstance(obj, str): | |||
r1065 | iterable = json.loads(obj) | ||
r1070 | else: | ||
iterable = obj | |||
r1065 | |||
if isinstance(iterable, dict): | |||
|
r1208 | return {str(k): load_json(v) if isinstance(v, dict) else str(v) if isinstance(v, basestring) else v | |
|
r1167 | for k, v in list(iterable.items())} | |
r1065 | elif isinstance(iterable, (list, tuple)): | ||
|
r1208 | return [str(v) if isinstance(v, basestring) else v for v in iterable] | |
r1065 | |||
return iterable | |||
|
r1287 | ||
r1254 | class MADReader(Reader, ProcessingUnit): | ||
r1065 | |||
|
r1189 | def __init__(self): | |
r1065 | |||
|
r1189 | ProcessingUnit.__init__(self) | |
r1065 | |||
self.dataOut = Parameters() | |||
self.counter_records = 0 | |||
self.nrecords = None | |||
self.flagNoMoreFiles = 0 | |||
self.filename = None | |||
self.intervals = set() | |||
r1254 | self.datatime = datetime.datetime(1900,1,1) | ||
self.format = None | |||
self.filefmt = "***%Y%m%d*******" | |||
r1065 | |||
r1254 | def setup(self, **kwargs): | ||
r1070 | |||
r1254 | self.set_kwargs(**kwargs) | ||
self.oneDDict = load_json(self.oneDDict) | |||
self.twoDDict = load_json(self.twoDDict) | |||
self.ind2DList = load_json(self.ind2DList) | |||
self.independentParam = self.ind2DList[0] | |||
|
r1208 | ||
r1065 | if self.path is None: | ||
|
r1167 | raise ValueError('The path is not valid') | |
r1065 | |||
r1254 | self.open_file = open | ||
self.open_mode = 'rb' | |||
if self.format is None: | |||
|
r1167 | raise ValueError('The format is not valid choose simple or hdf5') | |
r1254 | elif self.format.lower() in ('simple', 'txt'): | ||
r1065 | self.ext = '.txt' | ||
r1254 | elif self.format.lower() in ('cedar',): | ||
r1065 | self.ext = '.001' | ||
else: | |||
self.ext = '.hdf5' | |||
r1254 | self.open_file = h5py.File | ||
self.open_mode = 'r' | |||
r1065 | |||
r1254 | if self.online: | ||
log.log("Searching files in online mode...", self.name) | |||
r1065 | |||
r1254 | for nTries in range(self.nTries): | ||
fullpath = self.searchFilesOnLine(self.path, self.startDate, | |||
self.endDate, self.expLabel, self.ext, self.walk, | |||
self.filefmt, self.folderfmt) | |||
r1065 | |||
r1254 | try: | ||
fullpath = next(fullpath) | |||
except: | |||
fullpath = None | |||
if fullpath: | |||
break | |||
r1065 | |||
r1254 | log.warning( | ||
'Waiting {} sec for a valid file in {}: try {} ...'.format( | |||
self.delay, self.path, nTries + 1), | |||
self.name) | |||
time.sleep(self.delay) | |||
if not(fullpath): | |||
raise schainpy.admin.SchainError( | |||
'There isn\'t any valid file in {}'.format(self.path)) | |||
else: | |||
log.log("Searching files in {}".format(self.path), self.name) | |||
self.filenameList = self.searchFilesOffLine(self.path, self.startDate, | |||
self.endDate, self.expLabel, self.ext, self.walk, self.filefmt, self.folderfmt) | |||
self.setNextFile() | |||
r1065 | |||
r1254 | def readFirstHeader(self): | ||
'''Read header and data''' | |||
r1065 | |||
r1254 | self.parseHeader() | ||
self.parseData() | |||
self.blockIndex = 0 | |||
return | |||
r1065 | |||
def parseHeader(self): | |||
''' | |||
''' | |||
self.output = {} | |||
self.version = '2' | |||
s_parameters = None | |||
if self.ext == '.txt': | |||
|
r1208 | self.parameters = [s.strip().lower() for s in self.fp.readline().decode().strip().split(' ') if s] | |
r1065 | elif self.ext == '.hdf5': | ||
|
r1208 | self.metadata = self.fp['Metadata'] | |
if '_record_layout' in self.metadata: | |||
s_parameters = [s[0].lower().decode() for s in self.metadata['Independent Spatial Parameters']] | |||
r1065 | self.version = '3' | ||
|
r1208 | self.parameters = [s[0].lower().decode() for s in self.metadata['Data Parameters']] | |
r1065 | |||
|
r1206 | log.success('Parameters found: {}'.format(self.parameters), | |
r1065 | 'MADReader') | ||
if s_parameters: | |||
|
r1208 | log.success('Spatial parameters found: {}'.format(s_parameters), | |
r1065 | 'MADReader') | ||
|
r1167 | for param in list(self.oneDDict.keys()): | |
r1065 | if param.lower() not in self.parameters: | ||
r1070 | log.warning( | ||
'Parameter {} not found will be ignored'.format( | |||
param), | |||
'MADReader') | |||
r1065 | self.oneDDict.pop(param, None) | ||
|
r1167 | for param, value in list(self.twoDDict.items()): | |
r1065 | if param.lower() not in self.parameters: | ||
r1070 | log.warning( | ||
'Parameter {} not found, it will be ignored'.format( | |||
param), | |||
'MADReader') | |||
r1065 | self.twoDDict.pop(param, None) | ||
continue | |||
if isinstance(value, list): | |||
if value[0] not in self.output: | |||
self.output[value[0]] = [] | |||
|
r1208 | self.output[value[0]].append([]) | |
r1065 | |||
def parseData(self): | |||
''' | |||
''' | |||
|
r1021 | ||
r1065 | if self.ext == '.txt': | ||
self.data = numpy.genfromtxt(self.fp, missing_values=('missing')) | |||
self.nrecords = self.data.shape[0] | |||
|
r1208 | self.ranges = numpy.unique(self.data[:,self.parameters.index(self.independentParam.lower())]) | |
self.counter_records = 0 | |||
r1065 | elif self.ext == '.hdf5': | ||
|
r1208 | self.data = self.fp['Data'] | |
self.ranges = numpy.unique(self.data['Table Layout'][self.independentParam.lower()]) | |||
self.times = numpy.unique(self.data['Table Layout']['ut1_unix']) | |||
self.counter_records = int(self.data['Table Layout']['recno'][0]) | |||
self.nrecords = int(self.data['Table Layout']['recno'][-1]) | |||
r1065 | |||
def readNextBlock(self): | |||
while True: | |||
r1070 | self.flagDiscontinuousBlock = 0 | ||
r1254 | if self.counter_records == self.nrecords: | ||
self.setNextFile() | |||
r1065 | |||
self.readBlock() | |||
if (self.datatime < datetime.datetime.combine(self.startDate, self.startTime)) or \ | |||
(self.datatime > datetime.datetime.combine(self.endDate, self.endTime)): | |||
r1070 | log.warning( | ||
'Reading Record No. {}/{} -> {} [Skipping]'.format( | |||
self.counter_records, | |||
self.nrecords, | |||
self.datatime.ctime()), | |||
'MADReader') | |||
r1065 | continue | ||
break | |||
r1070 | log.log( | ||
'Reading Record No. {}/{} -> {}'.format( | |||
self.counter_records, | |||
self.nrecords, | |||
self.datatime.ctime()), | |||
'MADReader') | |||
r1065 | |||
return 1 | |||
def readBlock(self): | |||
''' | |||
''' | |||
dum = [] | |||
if self.ext == '.txt': | |||
dt = self.data[self.counter_records][:6].astype(int) | |||
|
r1073 | if datetime.datetime(dt[0], dt[1], dt[2], dt[3], dt[4], dt[5]).date() > self.datatime.date(): | |
self.flagDiscontinuousBlock = 1 | |||
r1065 | self.datatime = datetime.datetime(dt[0], dt[1], dt[2], dt[3], dt[4], dt[5]) | ||
while True: | |||
dt = self.data[self.counter_records][:6].astype(int) | |||
datatime = datetime.datetime(dt[0], dt[1], dt[2], dt[3], dt[4], dt[5]) | |||
if datatime == self.datatime: | |||
dum.append(self.data[self.counter_records]) | |||
self.counter_records += 1 | |||
if self.counter_records == self.nrecords: | |||
break | |||
continue | |||
|
r1073 | self.intervals.add((datatime-self.datatime).seconds) | |
r1065 | break | ||
elif self.ext == '.hdf5': | |||
datatime = datetime.datetime.utcfromtimestamp( | |||
|
r1208 | self.times[self.counter_records]) | |
dum = self.data['Table Layout'][self.data['Table Layout']['recno']==self.counter_records] | |||
r1065 | self.intervals.add((datatime-self.datatime).seconds) | ||
r1070 | if datatime.date()>self.datatime.date(): | ||
self.flagDiscontinuousBlock = 1 | |||
r1065 | self.datatime = datatime | ||
r1254 | self.counter_records += 1 | ||
r1065 | |||
r1070 | self.buffer = numpy.array(dum) | ||
r1065 | return | ||
def set_output(self): | |||
''' | |||
Storing data from buffer to dataOut object | |||
''' | |||
parameters = [None for __ in self.parameters] | |||
|
r1167 | for param, attr in list(self.oneDDict.items()): | |
r1065 | x = self.parameters.index(param.lower()) | ||
setattr(self.dataOut, attr, self.buffer[0][x]) | |||
r1070 | |||
|
r1208 | for param, value in list(self.twoDDict.items()): | |
dummy = numpy.zeros(self.ranges.shape) + numpy.nan | |||
r1065 | if self.ext == '.txt': | ||
|
r1208 | x = self.parameters.index(param.lower()) | |
y = self.parameters.index(self.independentParam.lower()) | |||
r1065 | ranges = self.buffer[:,y] | ||
|
r1206 | #if self.ranges.size == ranges.size: | |
# continue | |||
r1065 | index = numpy.where(numpy.in1d(self.ranges, ranges))[0] | ||
dummy[index] = self.buffer[:,x] | |||
|
r1208 | else: | |
ranges = self.buffer[self.independentParam.lower()] | |||
index = numpy.where(numpy.in1d(self.ranges, ranges))[0] | |||
dummy[index] = self.buffer[param.lower()] | |||
|
r1206 | ||
r1065 | if isinstance(value, str): | ||
|
r1208 | if value not in self.independentParam: | |
r1065 | setattr(self.dataOut, value, dummy.reshape(1,-1)) | ||
elif isinstance(value, list): | |||
self.output[value[0]][value[1]] = dummy | |||
parameters[value[1]] = param | |||
|
r1167 | for key, value in list(self.output.items()): | |
r1065 | setattr(self.dataOut, key, numpy.array(value)) | ||
|
r1208 | ||
r1065 | self.dataOut.parameters = [s for s in parameters if s] | ||
self.dataOut.heightList = self.ranges | |||
|
r1086 | self.dataOut.utctime = (self.datatime - datetime.datetime(1970, 1, 1)).total_seconds() | |
r1065 | self.dataOut.utctimeInit = self.dataOut.utctime | ||
self.dataOut.paramInterval = min(self.intervals) | |||
self.dataOut.useLocalTime = False | |||
r1070 | self.dataOut.flagNoData = False | ||
self.dataOut.nrecords = self.nrecords | |||
self.dataOut.flagDiscontinuousBlock = self.flagDiscontinuousBlock | |||
r1065 | |||
def getData(self): | |||
''' | |||
Storing data from databuffer to dataOut object | |||
''' | |||
if not self.readNextBlock(): | |||
self.dataOut.flagNoData = True | |||
return 0 | |||
self.set_output() | |||
return 1 | |||
|
r1021 | ||
r1254 | def run(self, **kwargs): | ||
if not(self.isConfig): | |||
self.setup(**kwargs) | |||
self.isConfig = True | |||
self.getData() | |||
return | |||
|
r1206 | @MPDecorator | |
r1070 | class MADWriter(Operation): | ||
r1254 | '''Writing module for Madrigal files | ||
type: external | |||
Inputs: | |||
path path where files will be created | |||
oneDDict json of one-dimensional parameters in record where keys | |||
are Madrigal codes (integers or mnemonics) and values the corresponding | |||
dataOut attribute e.g: { | |||
'gdlatr': 'lat', | |||
'gdlonr': 'lon', | |||
'gdlat2':'lat', | |||
'glon2':'lon'} | |||
ind2DList list of independent spatial two-dimensional parameters e.g: | |||
['heigthList'] | |||
twoDDict json of two-dimensional parameters in record where keys | |||
are Madrigal codes (integers or mnemonics) and values the corresponding | |||
dataOut attribute if multidimensional array specify as tupple | |||
('attr', pos) e.g: { | |||
'gdalt': 'heightList', | |||
'vn1p2': ('data_output', 0), | |||
'vn2p2': ('data_output', 1), | |||
'vn3': ('data_output', 2), | |||
'snl': ('data_SNR', 'db') | |||
} | |||
metadata json of madrigal metadata (kinst, kindat, catalog and header) | |||
format hdf5, cedar | |||
blocks number of blocks per file''' | |||
__attrs__ = ['path', 'oneDDict', 'ind2DList', 'twoDDict','metadata', 'format', 'blocks'] | |||
missing = -32767 | |||
|
r1010 | ||
|
r1206 | def __init__(self): | |
r1070 | |||
|
r1206 | Operation.__init__(self) | |
|
r1010 | self.dataOut = Parameters() | |
|
r1086 | self.counter = 0 | |
|
r1010 | self.path = None | |
r1070 | self.fp = None | ||
r1254 | def run(self, dataOut, path, oneDDict, ind2DList='[]', twoDDict='{}', | ||
r1070 | metadata='{}', format='cedar', **kwargs): | ||
r1254 | |||
|
r1021 | if not self.isConfig: | |
r1254 | self.setup(path, oneDDict, ind2DList, twoDDict, metadata, format, **kwargs) | ||
|
r1021 | self.isConfig = True | |
r1070 | |||
self.dataOut = dataOut | |||
|
r1010 | self.putData() | |
|
r1206 | return 1 | |
|
r1010 | ||
r1254 | def setup(self, path, oneDDict, ind2DList, twoDDict, metadata, format, **kwargs): | ||
|
r1010 | ''' | |
|
r1021 | Configure Operation | |
|
r1010 | ''' | |
r1070 | |||
|
r1010 | self.path = path | |
|
r1021 | self.blocks = kwargs.get('blocks', None) | |
r1070 | self.counter = 0 | ||
r1065 | self.oneDDict = load_json(oneDDict) | ||
self.twoDDict = load_json(twoDDict) | |||
r1254 | self.ind2DList = load_json(ind2DList) | ||
|
r1021 | meta = load_json(metadata) | |
self.kinst = meta.get('kinst') | |||
self.kindat = meta.get('kindat') | |||
self.catalog = meta.get('catalog', DEF_CATALOG) | |||
self.header = meta.get('header', DEF_HEADER) | |||
r1070 | if format == 'cedar': | ||
self.ext = '.dat' | |||
self.extra_args = {} | |||
elif format == 'hdf5': | |||
self.ext = '.hdf5' | |||
r1254 | self.extra_args = {'ind2DList': self.ind2DList} | ||
r1070 | |||
self.keys = [k.lower() for k in self.twoDDict] | |||
if 'range' in self.keys: | |||
self.keys.remove('range') | |||
if 'gdalt' in self.keys: | |||
self.keys.remove('gdalt') | |||
|
r1021 | ||
|
r1010 | def setFile(self): | |
''' | |||
|
r1021 | Create new cedar file object | |
''' | |||
self.mnemonic = MNEMONICS[self.kinst] #TODO get mnemonic from madrigal | |||
|
r1086 | date = datetime.datetime.utcfromtimestamp(self.dataOut.utctime) | |
|
r1021 | ||
r1070 | filename = '{}{}{}'.format(self.mnemonic, | ||
date.strftime('%Y%m%d_%H%M%S'), | |||
|
r1086 | self.ext) | |
|
r1010 | ||
|
r1021 | self.fullname = os.path.join(self.path, filename) | |
|
r1189 | ||
|
r1010 | if os.path.isfile(self.fullname) : | |
r1070 | log.warning( | ||
|
r1086 | 'Destination file {} already exists, previous file deleted.'.format( | |
r1070 | self.fullname), | ||
'MADWriter') | |||
|
r1010 | os.remove(self.fullname) | |
try: | |||
r1070 | log.success( | ||
'Creating file: {}'.format(self.fullname), | |||
'MADWriter') | |||
r1254 | if not os.path.exists(self.path): | ||
os.makedirs(self.path) | |||
|
r1073 | self.fp = madrigal.cedar.MadrigalCedarFile(self.fullname, True) | |
|
r1167 | except ValueError as e: | |
r1070 | log.error( | ||
'Impossible to create a cedar object with "madrigal.cedar.MadrigalCedarFile"', | |||
'MADWriter') | |||
|
r1010 | return | |
|
r1073 | ||
return 1 | |||
|
r1010 | def writeBlock(self): | |
''' | |||
r1065 | Add data records to cedar file taking data from oneDDict and twoDDict | ||
|
r1021 | attributes. | |
Allowed parameters in: parcodes.tab | |||
|
r1010 | ''' | |
|
r1086 | startTime = datetime.datetime.utcfromtimestamp(self.dataOut.utctime) | |
|
r1021 | endTime = startTime + datetime.timedelta(seconds=self.dataOut.paramInterval) | |
r1070 | heights = self.dataOut.heightList | ||
|
r1073 | ||
r1070 | if self.ext == '.dat': | ||
|
r1167 | for key, value in list(self.twoDDict.items()): | |
|
r1073 | if isinstance(value, str): | |
data = getattr(self.dataOut, value) | |||
invalid = numpy.isnan(data) | |||
data[invalid] = self.missing | |||
elif isinstance(value, (tuple, list)): | |||
attr, key = value | |||
data = getattr(self.dataOut, attr) | |||
invalid = numpy.isnan(data) | |||
data[invalid] = self.missing | |||
out = {} | |||
|
r1167 | for key, value in list(self.twoDDict.items()): | |
r1070 | key = key.lower() | ||
|
r1073 | if isinstance(value, str): | |
r1070 | if 'db' in value.lower(): | ||
tmp = getattr(self.dataOut, value.replace('_db', '')) | |||
|
r1073 | SNRavg = numpy.average(tmp, axis=0) | |
r1070 | tmp = 10*numpy.log10(SNRavg) | ||
else: | |||
|
r1073 | tmp = getattr(self.dataOut, value) | |
r1254 | out[key] = tmp.flatten()[:len(heights)] | ||
r1070 | elif isinstance(value, (tuple, list)): | ||
|
r1073 | attr, x = value | |
r1254 | data = getattr(self.dataOut, attr) | ||
out[key] = data[int(x)][:len(heights)] | |||
|
r1189 | ||
r1070 | a = numpy.array([out[k] for k in self.keys]) | ||
|
r1073 | nrows = numpy.array([numpy.isnan(a[:, x]).all() for x in range(len(heights))]) | |
r1070 | index = numpy.where(nrows == False)[0] | ||
|
r1073 | ||
|
r1021 | rec = madrigal.cedar.MadrigalDataRecord( | |
self.kinst, | |||
self.kindat, | |||
startTime.year, | |||
startTime.month, | |||
startTime.day, | |||
startTime.hour, | |||
startTime.minute, | |||
startTime.second, | |||
startTime.microsecond/10000, | |||
endTime.year, | |||
endTime.month, | |||
endTime.day, | |||
endTime.hour, | |||
endTime.minute, | |||
endTime.second, | |||
endTime.microsecond/10000, | |||
|
r1167 | list(self.oneDDict.keys()), | |
list(self.twoDDict.keys()), | |||
r1070 | len(index), | ||
**self.extra_args | |||
) | |||
|
r1021 | # Setting 1d values | |
r1065 | for key in self.oneDDict: | ||
rec.set1D(key, getattr(self.dataOut, self.oneDDict[key])) | |||
|
r1010 | ||
# Setting 2d values | |||
r1070 | nrec = 0 | ||
for n in index: | |||
|
r1021 | for key in out: | |
r1070 | rec.set2D(key, nrec, out[key][n]) | ||
nrec += 1 | |||
self.fp.append(rec) | |||
if self.ext == '.hdf5' and self.counter % 500 == 0 and self.counter > 0: | |||
self.fp.dump() | |||
|
r1206 | if self.counter % 20 == 0 and self.counter > 0: | |
r1070 | log.log( | ||
'Writing {} records'.format( | |||
self.counter), | |||
'MADWriter') | |||
|
r1010 | ||
def setHeader(self): | |||
''' | |||
|
r1021 | Create an add catalog and header to cedar file | |
|
r1010 | ''' | |
r1070 | log.success('Closing file {}'.format(self.fullname), 'MADWriter') | ||
if self.ext == '.dat': | |||
self.fp.write() | |||
else: | |||
self.fp.dump() | |||
self.fp.close() | |||
|
r1021 | header = madrigal.cedar.CatalogHeaderCreator(self.fullname) | |
header.createCatalog(**self.catalog) | |||
header.createHeader(**self.header) | |||
header.write() | |||
|
r1010 | ||
def putData(self): | |||
if self.dataOut.flagNoData: | |||
r1070 | return 0 | ||
|
r1010 | ||
r1070 | if self.dataOut.flagDiscontinuousBlock or self.counter == self.blocks: | ||
|
r1073 | if self.counter > 0: | |
r1070 | self.setHeader() | ||
self.counter = 0 | |||
|
r1021 | if self.counter == 0: | |
|
r1073 | self.setFile() | |
|
r1010 | ||
r1070 | self.writeBlock() | ||
self.counter += 1 | |||
def close(self): | |||
|
r1010 | ||
r1070 | if self.counter > 0: | ||
|
r1167 | self.setHeader() |