Source code for dypy.lagranto.lagranto

#!/usr/bin/env python
# -*- coding:utf-8 -*-
from __future__ import unicode_literals

from datetime import datetime, timedelta
from multiprocessing.pool import Pool

from functools import partial

import numpy as np
from functools import wraps
from path import Path
from shutil import SameFileError
from tempfile import mkdtemp

from dypy.tools.dyntools import DynToolsException, run_cmd
from .formats import from_netcdf, to_ascii, from_ascii, to_netcdf

_all__ = ['Tra', 'LagrantoRun']


[docs]class Tra(object): """ Class to work with LAGRANTO output Read trajectories from a LAGRANTO file and return a structured numpy array Args: filename (string): File containing lagranto trajectories usedatetime (bool): Read times as datetime objects, default True filetype: Depreciated Returns: structured array (Tra): trajs(ntra,ntime) with variables as tuple. Examples -------- >>> filename = 'mylslfile.nc' >>> trajs = Tra() >>> trajs.load_netcdf(filename) >>> trajs['lon'][0,:] # return the longitudes for the first trajectory. Author : Nicolas Piaget, ETH Zurich , 2014 Sebastiaan Crezee, ETH Zurich , 2014 """ _startdate = None def __init__(self, filename='', typefile=None, usedatetime=True): if not filename: self._array = None return try: self.load_netcdf(filename, usedatetime=usedatetime) except: try: self.load_ascii(filename, usedatetime=usedatetime) except: raise IOError("Unkown fileformat. Known formats \ are ascii or netcdf") def __len__(self): return len(self._array) def __getattr__(self, attr): if attr in self.__dict__: return getattr(self, attr) return getattr(self._array, attr) def __getitem__(self, key): return self._array[key] def __setitem__(self, key, item): if type(key) is slice: self._array = item elif key in self.dtype.names: self._array[key] = item else: dtypes = self._array.dtype.descr dtypes.append((key, item.dtype.descr[0][1])) dtypes = [(str(d[0]), d[1]) for d in dtypes] newarr = np.zeros(self._array.shape, dtype=dtypes) for var in self.variables: newarr[var] = self._array[var] newarr[key] = item self._array = newarr def __repr__(self): try: string = " \ {} trajectories with {} time steps. \n \ Available fields: {}\n \ total duration: {} minutes".format( self.ntra, self.ntime, "/".join(self.variables), self.duration ) except AttributeError: # Assume it's an empty Tra() string = "\ Empty trajectories container.\n\ Hint: use load_ascii() or load_netcdf()\n\ to load data" return string @property def ntra(self): if self.ndim < 2: print(" \ Be careful with the dimensions, \ you may want to change the shape: \n \ either shape + (1,) or (1,)+shape \ ") return None return self.shape[0] @property def ntime(self): if self.ndim < 2: print(" \ Be careful with the dimensions,\ you may want to change the shape: \n \ either shape + (1,) or (1,)+shape \ ") return None return self.shape[1] @property def variables(self): return list(self.dtype.names) @property def duration(self): """ time duration in minutes """ end = self['time'][0, -1] start = self['time'][0, 0] delta = end - start if type(delta) == np.timedelta64: return delta.astype(timedelta).total_seconds() / 60. else: return delta * 60. @property def initial(self): """ give the initial time of the trajectories """ starttime = self['time'][0, 0] return starttime.astype(datetime) @property def startdate(self): if self._startdate is None: time0 = self['time'][0, 0] if type(time0) == np.datetime64: self._startdate = time0.astype(datetime) else: self._startdate = datetime(1900, 1, 1, 0) return self._startdate
[docs] def set_array(self, array): """ To change the trajectories array """ self._array = array
[docs] def concatenate(self, trajs, time=False): """ To concatenate trajectories together Returns ------- Tra Return a new Tra (trajectories) object Parameters ---------- time: bool, default False if True concatenate along the time dimension """ if type(trajs) is not tuple: trajs = (trajs,) if time: trajstuple = tuple(tra._array.T for tra in trajs) trajstuple += (self._array.T,) test = np.concatenate(trajstuple).T else: trajstuple = tuple(tra._array for tra in trajs) trajstuple += (self._array,) test = np.concatenate(trajstuple) newtrajs = Tra() newtrajs.set_array(test) return newtrajs
def write(self, filename, fileformat='netcdf'): globals()['_write_{}'.format(fileformat)](self, filename)
[docs] def load_netcdf(self, filename, usedatetime=True, msv=-999, unit='hours', exclude=None, date=None): self._array, self._startdate = from_netcdf(filename, usedatetime=usedatetime, msv=msv, unit=unit, exclude=exclude, date=date)
load_netcdf.__doc__ = from_netcdf.__doc__
[docs] def write_netcdf(self, filename, exclude=[], unit='hours'): to_netcdf(self, filename, exclude=exclude, unit=unit)
write_netcdf.__doc__ = to_netcdf.__doc__
[docs] def write_ascii(self, filename): to_ascii(self, filename)
write_ascii.__doc__ = to_ascii.__doc__
[docs] def load_ascii(self, filename, usedatetime=True, msv=-999.999, gz=False): self._array, self._startdate = from_ascii(filename, usedatetime=usedatetime, msv=msv, gz=gz)
load_ascii.__doc__ = from_ascii.__doc__
def intmpdir(f): @wraps(f) def wrapper(*args, **kwds): workingdir = args[0] if Path(workingdir).parent == '/tmp': return f(*args, **kwds) else: tmpdir = Path(mkdtemp()) lwkds = {key: value for key, value in kwds.items() if key in ['outputdir', 'version', 'sdate']} link_files(workingdir, tmpdir, **lwkds) nargs = (tmpdir,) + args[1:] out = f(*nargs, **kwds) tmpdir.rmdir_p() return out return wrapper @intmpdir def create_startf(workingdir, date, filename, specifier, version='cosmo', outputdir='', sdate=None, tolist=False, cmd_header=None): """creates a startfile for LAGRANTO""" outputdir = Path(outputdir) workingdir = Path(workingdir) create = "startf.{version} " create += "{date:%Y%m%d_%H} {filename} {specifier}" if tolist: filename = Path(filename).splitext()[0] + '.4' create = create.format(version=version, date=date, filename=filename, specifier=specifier) out = run_cmd(create, workingdir, cmd_header=cmd_header) if tolist: lslname = filename filename = Path(lslname).splitext()[0] + '.startf' lsl2list = 'lsl2list {} {}'.format(lslname, filename) out = run_cmd(lsl2list, workingdir, cmd_header=cmd_header) try: Path.copy(workingdir / filename, outputdir / filename) except SameFileError: pass return out @intmpdir def select(workingdir, inpfile, outfile, crit, outputdir='', sdate=None, version='cosmo', cmd_header=None, **kwargs): """Select trajectories""" netcdf_format = kwargs.pop('netcdf_format', 'CF') select_cmd = 'select.{version} {inpfile} {outfile} "{crit}"' select_cmd = select_cmd.format(version=version, inpfile=inpfile, outfile=outfile, crit=crit) out = run_cmd(select_cmd, workingdir, netcdf_format=netcdf_format, cmd_header=cmd_header) outputdir = Path(outputdir) try: Path.copy(workingdir / outfile, outputdir / outfile) except SameFileError: pass return out @intmpdir def trace(workingdir, filename, outputdir='', outfile='', tracevars='', tracevars_content='', field='', sdate=None, version='cosmo', cmd_header=None, **kwargs): """trace variables along a trajectory""" netcdf_format = kwargs.pop('netcdf_format', 'CF') workingdir = Path(workingdir) outputdir = Path(outputdir) if not outfile: outfile = filename trace_cmd = 'trace.{version} {filename} {outfile}' tracevars_file = 'tracevars' if tracevars: tracevars_file = tracevars trace_cmd += ' -v ' + tracevars if tracevars_content: with (Path(workingdir) / tracevars_file).open('w') as f: f.write(tracevars_content) if field: trace_cmd += ' -f ' + field trace_cmd = trace_cmd.format(filename=filename, outfile=outfile, version=version) out = run_cmd(trace_cmd, workingdir, netcdf_format=netcdf_format, cmd_header=cmd_header) try: Path.copy(workingdir / outfile, outputdir / outfile) except SameFileError: pass return out @intmpdir def caltra(workingdir, startdate, enddate, startfile, filename, jump=True, outputdir='', sdate=None, version='cosmo', cmd_header=None, **kwargs): """Calculate trajectories for air parcels starting at positions specified in startfile.""" outputdir = Path(outputdir) netcdf_format = kwargs.pop('netcdf_format', 'CF') caltra_cmd = 'caltra.{version} {startdate:%Y%m%d_%H} {enddate:%Y%m%d_%H}' caltra_cmd += ' {startfile} {filename}' if jump: caltra_cmd += ' -j' for key, value in kwargs.items(): caltra_cmd += ' -{key} {value}'.format(key=key, value=value) caltra_cmd = caltra_cmd.format(startdate=startdate, enddate=enddate, startfile=startfile, filename=filename, version=version) out = run_cmd(caltra_cmd, workingdir, netcdf_format=netcdf_format, cmd_header=cmd_header) try: Path.copy(workingdir / filename, outputdir / filename) except SameFileError: pass return out @intmpdir def density(workingdir, inpfile, outfile, outputdir='', sdate=None, version='cosmo', cmd_header=None, **kwargs): """Calculate trajectories density""" outputdir = Path(outputdir) netcdf_format = kwargs.pop('netcdf_format', 'CF') density_cmd = 'density.{version} {inpfile} {outfile}' for key, value in kwargs.items(): density_cmd += ' -{key} {value}'.format(key=key, value=value) density_cmd = density_cmd.format(inpfile=inpfile, outfile=outfile, version=version) out = run_cmd(density_cmd, workingdir, netcdf_format=netcdf_format, cmd_header=cmd_header) try: Path.copy(workingdir / outfile, outputdir / outfile) except SameFileError: pass return out
[docs]class LagrantoRun: """Perform Lagranto calculation Parameters ---------- dates: list list of (startdate, enddate) tuple workingdir: string, optional path to the model output directory, default to current outputdir: string, optional path to the trajectory utput directory, defautl to current startf: string, optional name of the startf to use (or to create), default to startf.4 lslname: string, optional name of the lsl file, define its type, default to lsl_{:%Y%m%d%H}.4 tracevars: string, optional name of a tracevars file as used by trace, default to none field: string, optional name of a single field to trace, default to none version: string, optional name of the model version to use, currently only cosmo (default) linkfiles: function, optional function used to overwrite link_files in run. Should be used if COSMO output is not standard netcdf nprocesses: int, optional Number of processes used when running in parallel, default to 10 sdate: datetime object, Starting date of the simulation; useful if files are named in forecast mode fresh: bool, optional Fresh start. Remove output directory first. """ def __init__(self, dates, workingdir='.', outputdir='.', startf='startf.4', lslname='lsl_{:%Y%m%d%H}.4', tracevars='', field='', version='cosmo', linkfiles=None, nprocesses=10, sdate=None, fresh=False): self.dates = dates self.workingdir = Path(workingdir) self.outputdir = Path(outputdir) if fresh: self.clear() self.outputdir.makedirs_p() self.startf = Path(startf) self.lslname = lslname self.tracevars = tracevars self.field = field self.version = version self.link_files = linkfiles if linkfiles else link_files self.nprocesses = nprocesses self.sdate = sdate def create_startf(self, date, specifier, filename='', **kwargs): if filename: self.startf = Path(filename) return create_startf(self.workingdir, date, self.startf, specifier, outputdir=self.outputdir, sdate=self.sdate, version=self.version, **kwargs) def caltra(self, startdate, enddate, filename='', **kwargs): if filename: self.lslname = filename self.caltra_out = self.lslname.format(startdate) return caltra(self.workingdir, startdate, enddate, self.startf, self.caltra_out, outputdir=self.outputdir, sdate=self.sdate, version=self.version, **kwargs)
[docs] def trace(self, date, filename='', tracevars='', tracevars_content='', field='', **kwargs): """trace variable along a trajectory filename as a similar form as lslname in LagrantoRun """ tracef = tracevars if tracevars else self.tracevars fieldv = field if field else self.field filename = filename if filename else self.lslname filename = filename.format(date) return trace(self.workingdir, filename, outputdir=self.outputdir, tracevars=tracef, tracevars_content=tracevars_content, field=fieldv, sdate=self.sdate, version=self.version, **kwargs)
def density(self, inpfile=None, outfile=None, **kwargs): if inpfile is None: try: inpfile = self.caltra_out except AttributeError: raise ValueError('Provide an input file or run caltra') if outfile is None: self.density_out = 'density.4' else: self.density_out = outfile return density(self.workingdir, inpfile=inpfile, outfile=self.density_out, outputdir=self.outputdir, sdate=self.sdate, version=self.version, **kwargs) def select(self, inpfile=None, outfile=None, **kwargs): if inpfile is None: try: inpfile = self.caltra_out except AttributeError: raise ValueError('Provide an input file or run caltra') if outfile is None: self.select_out = 'selected.4' else: self.select_out = outfile return select(self.workingdir, inpfile=inpfile, outfile=self.select_out, outputdir=self.outputdir, sdate=self.sdate, version=self.version, **kwargs)
[docs] def run(self, caltra_kw={}, trace_kw={}, **kwargs): """run caltra and trace if kwargs are provided they are passed to the link_files function """ for sd, ed in self.dates: self._single_run(sd, ed, caltra_kw=caltra_kw, trace_kw=trace_kw, **kwargs)
[docs] def run_parallel(self, caltra_kw={}, trace_kw={}, **kwargs): """ run caltra and trace in parallel Similar to run() but using multiprocessing.Pool """ single_run = partial(self._single_run, caltra_kw=caltra_kw, trace_kw=trace_kw, **kwargs) with Pool(processes=min(self.nprocesses, len(self.dates))) as pool: results = pool.starmap(single_run, self.dates) return results
def _single_run(self, sd, ed, caltra_kw={}, trace_kw={}, type='both', debug=False, **kwargs): workingdir = self.workingdir tmpdir = Path(mkdtemp()) self.workingdir = tmpdir nkwargs = {'version': self.version, 'outputdir': self.outputdir, 'sdate': self.sdate} nkwargs.update(kwargs) self.link_files(workingdir, tmpdir, **nkwargs) try: if type == 'both': out = self.caltra(sd, ed, **caltra_kw) out += self.trace(sd, **trace_kw) elif type == 'caltra': out = self.caltra(sd, ed, **caltra_kw) elif type == 'trace': out = self.trace(sd, **trace_kw) except DynToolsException as err: err.args += (tmpdir,) if debug: raise else: return err self.workingdir = workingdir tmpdir.rmtree_p() return out def clear(self): self.outputdir.rmtree_p()
def link_files(folder, tmpdir, outputdir='', version='cosmo', sdate=None, sfiles=True): """link data from <folder> to <tmpdir> as P,S files Parameters ---------- folder: string, path to the origin folder tmpdir: string, path to the destination folder outputdir: string, path to the folder where results are saved version: string, only cosmo is available for now sdate: datetime object, useful is the cosmo file are named in forecast mode """ folder = Path(folder).abspath() tmpdir = Path(tmpdir) files = set(folder.files()) if version == 'cosmo': constant_file = folder.files('l*c.nc') if constant_file: constant_file[0].symlink(tmpdir / 'LMCONSTANTS') cosmofiles = set(folder.files('lff[df]*[0-9].nc')) for fname in cosmofiles: try: date = datetime.strptime(fname.name, 'lffd%Y%m%d%H.nc') except ValueError as err: sfname = fname.name[4:-3] dd, hh, mm, ss = [int(sfname[i:i + 2]) for i in range(0, len(sfname), 2)] date = sdate + timedelta(days=dd, hours=hh, minutes=hh, seconds=ss) pfile = 'P{:%Y%m%d_%H}'.format(date) fname.symlink(tmpdir / pfile) if sfiles: sfile = 'S{:%Y%m%d_%H}'.format(date) fname.symlink(tmpdir / sfile) files = files.difference(cosmofiles) for fname in files: fname.symlink(tmpdir / fname.name) if outputdir: outputdir = Path(outputdir).abspath() for fname in outputdir.files(): fname.symlink(tmpdir / fname.name)