#!/usr/bin/env python
# -*- coding:utf-8 -*-
from __future__ import unicode_literals
from datetime import datetime, timedelta
from multiprocessing.pool import Pool
from functools import partial
import numpy as np
from functools import wraps
from path import Path
from shutil import SameFileError
from tempfile import mkdtemp
from dypy.tools.dyntools import DynToolsException, run_cmd
from .formats import from_netcdf, to_ascii, from_ascii, to_netcdf
_all__ = ['Tra', 'LagrantoRun']
[docs]class Tra(object):
"""
Class to work with LAGRANTO output
Read trajectories from a LAGRANTO file and return a structured numpy array
Args:
filename (string): File containing lagranto trajectories
usedatetime (bool): Read times as datetime objects, default True
filetype: Depreciated
Returns:
structured array (Tra): trajs(ntra,ntime) with variables as tuple.
Examples
--------
>>> filename = 'mylslfile.nc'
>>> trajs = Tra()
>>> trajs.load_netcdf(filename)
>>> trajs['lon'][0,:] # return the longitudes for the first trajectory.
Author : Nicolas Piaget, ETH Zurich , 2014
Sebastiaan Crezee, ETH Zurich , 2014
"""
_startdate = None
def __init__(self, filename='', typefile=None, usedatetime=True):
if not filename:
self._array = None
return
try:
self.load_netcdf(filename, usedatetime=usedatetime)
except:
try:
self.load_ascii(filename, usedatetime=usedatetime)
except:
raise IOError("Unkown fileformat. Known formats \
are ascii or netcdf")
def __len__(self):
return len(self._array)
def __getattr__(self, attr):
if attr in self.__dict__:
return getattr(self, attr)
return getattr(self._array, attr)
def __getitem__(self, key):
return self._array[key]
def __setitem__(self, key, item):
if type(key) is slice:
self._array = item
elif key in self.dtype.names:
self._array[key] = item
else:
dtypes = self._array.dtype.descr
dtypes.append((key, item.dtype.descr[0][1]))
dtypes = [(str(d[0]), d[1]) for d in dtypes]
newarr = np.zeros(self._array.shape, dtype=dtypes)
for var in self.variables:
newarr[var] = self._array[var]
newarr[key] = item
self._array = newarr
def __repr__(self):
try:
string = " \
{} trajectories with {} time steps. \n \
Available fields: {}\n \
total duration: {} minutes".format(
self.ntra, self.ntime,
"/".join(self.variables),
self.duration
)
except AttributeError:
# Assume it's an empty Tra()
string = "\
Empty trajectories container.\n\
Hint: use load_ascii() or load_netcdf()\n\
to load data"
return string
@property
def ntra(self):
if self.ndim < 2:
print(" \
Be careful with the dimensions, \
you may want to change the shape: \n \
either shape + (1,) or (1,)+shape \
")
return None
return self.shape[0]
@property
def ntime(self):
if self.ndim < 2:
print(" \
Be careful with the dimensions,\
you may want to change the shape: \n \
either shape + (1,) or (1,)+shape \
")
return None
return self.shape[1]
@property
def variables(self):
return list(self.dtype.names)
@property
def duration(self):
""" time duration in minutes """
end = self['time'][0, -1]
start = self['time'][0, 0]
delta = end - start
if type(delta) == np.timedelta64:
return delta.astype(timedelta).total_seconds() / 60.
else:
return delta * 60.
@property
def initial(self):
""" give the initial time of the trajectories """
starttime = self['time'][0, 0]
return starttime.astype(datetime)
@property
def startdate(self):
if self._startdate is None:
time0 = self['time'][0, 0]
if type(time0) == np.datetime64:
self._startdate = time0.astype(datetime)
else:
self._startdate = datetime(1900, 1, 1, 0)
return self._startdate
[docs] def set_array(self, array):
""" To change the trajectories array """
self._array = array
[docs] def concatenate(self, trajs, time=False):
""" To concatenate trajectories together
Returns
-------
Tra
Return a new Tra (trajectories) object
Parameters
----------
time: bool, default False
if True concatenate along the time dimension
"""
if type(trajs) is not tuple:
trajs = (trajs,)
if time:
trajstuple = tuple(tra._array.T for tra in trajs)
trajstuple += (self._array.T,)
test = np.concatenate(trajstuple).T
else:
trajstuple = tuple(tra._array for tra in trajs)
trajstuple += (self._array,)
test = np.concatenate(trajstuple)
newtrajs = Tra()
newtrajs.set_array(test)
return newtrajs
def write(self, filename, fileformat='netcdf'):
globals()['_write_{}'.format(fileformat)](self, filename)
[docs] def load_netcdf(self, filename, usedatetime=True, msv=-999, unit='hours',
exclude=None, date=None):
self._array, self._startdate = from_netcdf(filename,
usedatetime=usedatetime,
msv=msv,
unit=unit,
exclude=exclude,
date=date)
load_netcdf.__doc__ = from_netcdf.__doc__
[docs] def write_netcdf(self, filename, exclude=[], unit='hours'):
to_netcdf(self, filename, exclude=exclude, unit=unit)
write_netcdf.__doc__ = to_netcdf.__doc__
[docs] def write_ascii(self, filename):
to_ascii(self, filename)
write_ascii.__doc__ = to_ascii.__doc__
[docs] def load_ascii(self, filename, usedatetime=True, msv=-999.999, gz=False):
self._array, self._startdate = from_ascii(filename,
usedatetime=usedatetime,
msv=msv,
gz=gz)
load_ascii.__doc__ = from_ascii.__doc__
def intmpdir(f):
@wraps(f)
def wrapper(*args, **kwds):
workingdir = args[0]
if Path(workingdir).parent == '/tmp':
return f(*args, **kwds)
else:
tmpdir = Path(mkdtemp())
lwkds = {key: value for key, value in kwds.items()
if key in ['outputdir', 'version', 'sdate']}
link_files(workingdir, tmpdir, **lwkds)
nargs = (tmpdir,) + args[1:]
out = f(*nargs, **kwds)
tmpdir.rmdir_p()
return out
return wrapper
@intmpdir
def create_startf(workingdir, date, filename, specifier, version='cosmo',
outputdir='', sdate=None, tolist=False, cmd_header=None):
"""creates a startfile for LAGRANTO"""
outputdir = Path(outputdir)
workingdir = Path(workingdir)
create = "startf.{version} "
create += "{date:%Y%m%d_%H} {filename} {specifier}"
if tolist:
filename = Path(filename).splitext()[0] + '.4'
create = create.format(version=version, date=date,
filename=filename, specifier=specifier)
out = run_cmd(create, workingdir, cmd_header=cmd_header)
if tolist:
lslname = filename
filename = Path(lslname).splitext()[0] + '.startf'
lsl2list = 'lsl2list {} {}'.format(lslname, filename)
out = run_cmd(lsl2list, workingdir, cmd_header=cmd_header)
try:
Path.copy(workingdir / filename, outputdir / filename)
except SameFileError:
pass
return out
@intmpdir
def select(workingdir, inpfile, outfile, crit,
outputdir='', sdate=None, version='cosmo',
cmd_header=None, **kwargs):
"""Select trajectories"""
netcdf_format = kwargs.pop('netcdf_format', 'CF')
select_cmd = 'select.{version} {inpfile} {outfile} "{crit}"'
select_cmd = select_cmd.format(version=version, inpfile=inpfile,
outfile=outfile, crit=crit)
out = run_cmd(select_cmd, workingdir,
netcdf_format=netcdf_format, cmd_header=cmd_header)
outputdir = Path(outputdir)
try:
Path.copy(workingdir / outfile, outputdir / outfile)
except SameFileError:
pass
return out
@intmpdir
def trace(workingdir, filename, outputdir='', outfile='', tracevars='',
tracevars_content='', field='', sdate=None, version='cosmo',
cmd_header=None, **kwargs):
"""trace variables along a trajectory"""
netcdf_format = kwargs.pop('netcdf_format', 'CF')
workingdir = Path(workingdir)
outputdir = Path(outputdir)
if not outfile:
outfile = filename
trace_cmd = 'trace.{version} {filename} {outfile}'
tracevars_file = 'tracevars'
if tracevars:
tracevars_file = tracevars
trace_cmd += ' -v ' + tracevars
if tracevars_content:
with (Path(workingdir) / tracevars_file).open('w') as f:
f.write(tracevars_content)
if field:
trace_cmd += ' -f ' + field
trace_cmd = trace_cmd.format(filename=filename, outfile=outfile,
version=version)
out = run_cmd(trace_cmd, workingdir,
netcdf_format=netcdf_format, cmd_header=cmd_header)
try:
Path.copy(workingdir / outfile, outputdir / outfile)
except SameFileError:
pass
return out
@intmpdir
def caltra(workingdir, startdate, enddate, startfile, filename,
jump=True, outputdir='', sdate=None, version='cosmo',
cmd_header=None, **kwargs):
"""Calculate trajectories for air parcels starting at positions specified in
startfile."""
outputdir = Path(outputdir)
netcdf_format = kwargs.pop('netcdf_format', 'CF')
caltra_cmd = 'caltra.{version} {startdate:%Y%m%d_%H} {enddate:%Y%m%d_%H}'
caltra_cmd += ' {startfile} {filename}'
if jump:
caltra_cmd += ' -j'
for key, value in kwargs.items():
caltra_cmd += ' -{key} {value}'.format(key=key, value=value)
caltra_cmd = caltra_cmd.format(startdate=startdate, enddate=enddate,
startfile=startfile, filename=filename,
version=version)
out = run_cmd(caltra_cmd, workingdir,
netcdf_format=netcdf_format, cmd_header=cmd_header)
try:
Path.copy(workingdir / filename, outputdir / filename)
except SameFileError:
pass
return out
@intmpdir
def density(workingdir, inpfile, outfile,
outputdir='', sdate=None, version='cosmo',
cmd_header=None, **kwargs):
"""Calculate trajectories density"""
outputdir = Path(outputdir)
netcdf_format = kwargs.pop('netcdf_format', 'CF')
density_cmd = 'density.{version} {inpfile} {outfile}'
for key, value in kwargs.items():
density_cmd += ' -{key} {value}'.format(key=key, value=value)
density_cmd = density_cmd.format(inpfile=inpfile, outfile=outfile,
version=version)
out = run_cmd(density_cmd, workingdir,
netcdf_format=netcdf_format, cmd_header=cmd_header)
try:
Path.copy(workingdir / outfile, outputdir / outfile)
except SameFileError:
pass
return out
[docs]class LagrantoRun:
"""Perform Lagranto calculation
Parameters
----------
dates: list
list of (startdate, enddate) tuple
workingdir: string, optional
path to the model output directory, default to current
outputdir: string, optional
path to the trajectory utput directory, defautl to current
startf: string, optional
name of the startf to use (or to create), default to startf.4
lslname: string, optional
name of the lsl file, define its type, default to lsl_{:%Y%m%d%H}.4
tracevars: string, optional
name of a tracevars file as used by trace, default to none
field: string, optional
name of a single field to trace, default to none
version: string, optional
name of the model version to use, currently only cosmo (default)
linkfiles: function, optional
function used to overwrite link_files in run.
Should be used if COSMO output is not standard netcdf
nprocesses: int, optional
Number of processes used when running in parallel, default to 10
sdate: datetime object,
Starting date of the simulation;
useful if files are named in forecast mode
fresh: bool, optional
Fresh start. Remove output directory first.
"""
def __init__(self, dates, workingdir='.', outputdir='.',
startf='startf.4', lslname='lsl_{:%Y%m%d%H}.4',
tracevars='', field='', version='cosmo', linkfiles=None,
nprocesses=10, sdate=None, fresh=False):
self.dates = dates
self.workingdir = Path(workingdir)
self.outputdir = Path(outputdir)
if fresh:
self.clear()
self.outputdir.makedirs_p()
self.startf = Path(startf)
self.lslname = lslname
self.tracevars = tracevars
self.field = field
self.version = version
self.link_files = linkfiles if linkfiles else link_files
self.nprocesses = nprocesses
self.sdate = sdate
def create_startf(self, date, specifier, filename='', **kwargs):
if filename:
self.startf = Path(filename)
return create_startf(self.workingdir, date, self.startf,
specifier, outputdir=self.outputdir,
sdate=self.sdate, version=self.version,
**kwargs)
def caltra(self, startdate, enddate, filename='', **kwargs):
if filename:
self.lslname = filename
self.caltra_out = self.lslname.format(startdate)
return caltra(self.workingdir, startdate, enddate, self.startf,
self.caltra_out, outputdir=self.outputdir,
sdate=self.sdate, version=self.version, **kwargs)
[docs] def trace(self, date, filename='', tracevars='', tracevars_content='',
field='', **kwargs):
"""trace variable along a trajectory
filename as a similar form as lslname in LagrantoRun
"""
tracef = tracevars if tracevars else self.tracevars
fieldv = field if field else self.field
filename = filename if filename else self.lslname
filename = filename.format(date)
return trace(self.workingdir, filename, outputdir=self.outputdir,
tracevars=tracef, tracevars_content=tracevars_content,
field=fieldv, sdate=self.sdate,
version=self.version, **kwargs)
def density(self, inpfile=None, outfile=None, **kwargs):
if inpfile is None:
try:
inpfile = self.caltra_out
except AttributeError:
raise ValueError('Provide an input file or run caltra')
if outfile is None:
self.density_out = 'density.4'
else:
self.density_out = outfile
return density(self.workingdir, inpfile=inpfile,
outfile=self.density_out, outputdir=self.outputdir,
sdate=self.sdate, version=self.version,
**kwargs)
def select(self, inpfile=None, outfile=None, **kwargs):
if inpfile is None:
try:
inpfile = self.caltra_out
except AttributeError:
raise ValueError('Provide an input file or run caltra')
if outfile is None:
self.select_out = 'selected.4'
else:
self.select_out = outfile
return select(self.workingdir, inpfile=inpfile,
outfile=self.select_out, outputdir=self.outputdir,
sdate=self.sdate, version=self.version,
**kwargs)
[docs] def run(self, caltra_kw={}, trace_kw={}, **kwargs):
"""run caltra and trace
if kwargs are provided they are passed to the link_files function
"""
for sd, ed in self.dates:
self._single_run(sd, ed, caltra_kw=caltra_kw, trace_kw=trace_kw,
**kwargs)
[docs] def run_parallel(self, caltra_kw={}, trace_kw={}, **kwargs):
""" run caltra and trace in parallel
Similar to run() but using multiprocessing.Pool
"""
single_run = partial(self._single_run, caltra_kw=caltra_kw,
trace_kw=trace_kw, **kwargs)
with Pool(processes=min(self.nprocesses, len(self.dates))) as pool:
results = pool.starmap(single_run, self.dates)
return results
def _single_run(self, sd, ed, caltra_kw={}, trace_kw={}, type='both',
debug=False, **kwargs):
workingdir = self.workingdir
tmpdir = Path(mkdtemp())
self.workingdir = tmpdir
nkwargs = {'version': self.version,
'outputdir': self.outputdir,
'sdate': self.sdate}
nkwargs.update(kwargs)
self.link_files(workingdir, tmpdir, **nkwargs)
try:
if type == 'both':
out = self.caltra(sd, ed, **caltra_kw)
out += self.trace(sd, **trace_kw)
elif type == 'caltra':
out = self.caltra(sd, ed, **caltra_kw)
elif type == 'trace':
out = self.trace(sd, **trace_kw)
except DynToolsException as err:
err.args += (tmpdir,)
if debug:
raise
else:
return err
self.workingdir = workingdir
tmpdir.rmtree_p()
return out
def clear(self):
self.outputdir.rmtree_p()
def link_files(folder, tmpdir, outputdir='', version='cosmo',
sdate=None, sfiles=True):
"""link data from <folder> to <tmpdir> as P,S files
Parameters
----------
folder: string,
path to the origin folder
tmpdir: string,
path to the destination folder
outputdir: string,
path to the folder where results are saved
version: string,
only cosmo is available for now
sdate: datetime object,
useful is the cosmo file are named in forecast mode
"""
folder = Path(folder).abspath()
tmpdir = Path(tmpdir)
files = set(folder.files())
if version == 'cosmo':
constant_file = folder.files('l*c.nc')
if constant_file:
constant_file[0].symlink(tmpdir / 'LMCONSTANTS')
cosmofiles = set(folder.files('lff[df]*[0-9].nc'))
for fname in cosmofiles:
try:
date = datetime.strptime(fname.name, 'lffd%Y%m%d%H.nc')
except ValueError as err:
sfname = fname.name[4:-3]
dd, hh, mm, ss = [int(sfname[i:i + 2])
for i in range(0, len(sfname), 2)]
date = sdate + timedelta(days=dd, hours=hh,
minutes=hh, seconds=ss)
pfile = 'P{:%Y%m%d_%H}'.format(date)
fname.symlink(tmpdir / pfile)
if sfiles:
sfile = 'S{:%Y%m%d_%H}'.format(date)
fname.symlink(tmpdir / sfile)
files = files.difference(cosmofiles)
for fname in files:
fname.symlink(tmpdir / fname.name)
if outputdir:
outputdir = Path(outputdir).abspath()
for fname in outputdir.files():
fname.symlink(tmpdir / fname.name)