Source code for mhkit.dolfyn.io.api

import numpy as np
import scipy.io as sio
import xarray as xr
from os.path import abspath, dirname, join, normpath, relpath
from .nortek import read_nortek
from .nortek2 import read_signature
from .rdi import read_rdi
from .base import _create_dataset, _get_filetype
from ..rotate.base import _set_coords
from ..time import (
    date2matlab,
    matlab2date,
    date2dt64,
    dt642date,
    date2epoch,
    epoch2date,
)


def _check_file_ext(path, ext):
    filename = path.replace("\\", "/").rsplit("/")[-1]  # windows/linux
    # for a filename like mcrl.water_velocity-1s.b1.20200813.150000.nc
    file_ext = filename.rsplit(".")[-1]
    if "." in filename:
        if file_ext != ext:
            raise IOError("File extension must be of the type {}".format(ext))
        if file_ext == ext:
            return path

    return path + "." + ext


def _decode_cf(dataset: xr.Dataset) -> xr.Dataset:
    """
    Wrapper around `xarray.decode_cf()` which handles additional edge cases.

    This helps ensure that the dataset is formatted and encoded correctly after it has
    been constructed or modified. Handles edge cases for units and data type encodings
    on datetime variables.

    Args:
        dataset (xr.Dataset): The dataset to decode.

    Returns:
        xr.Dataset: The decoded dataset.
    """

    # We have to make sure that time variables do not have units set as attrs, and
    # instead have units set on the encoding or else xarray will crash when trying
    # to save: https://github.com/pydata/xarray/issues/3739
    for variable in dataset.variables.values():
        if (
            np.issubdtype(variable.data.dtype, np.datetime64)
            and "units" in variable.attrs
        ):
            units = variable.attrs["units"]
            del variable.attrs["units"]
            variable.encoding["units"] = units

        # If the _FillValue is already encoded, remove it since it can't be overwritten per xarray
        if "_FillValue" in variable.encoding:
            del variable.encoding["_FillValue"]

    # Leaving the "dtype" entry in the encoding for datetime64 variables causes a crash
    # when saving the dataset. Not fixed by: https://github.com/pydata/xarray/pull/4684
    ds: xr.Dataset = xr.decode_cf(dataset)
    for variable in ds.variables.values():
        if variable.data.dtype.type == np.datetime64:
            if "dtype" in variable.encoding:
                del variable.encoding["dtype"]
    return ds


[docs] def read(fname, userdata=True, nens=None, **kwargs): """ Read a binary Nortek (e.g., .VEC, .wpr, .ad2cp, etc.) or RDI (.000, .PD0, .ENX, etc.) data file. Parameters ---------- filename : string Filename of instrument file to read. userdata : True, False, or string of userdata.json filename (default ``True``) Whether to read the '<base-filename>.userdata.json' file. nens : None, int or 2-element tuple (start, stop) Number of pings or ensembles to read from the file. Default is None, read entire file **kwargs : dict Passed to instrument-specific parser. Returns ------- ds : xarray.Dataset An xarray dataset from instrument datafile. """ file_type = _get_filetype(fname) if file_type == "<GIT-LFS pointer>": raise IOError( "File '{}' looks like a git-lfs pointer. You may need to " "install and initialize git-lfs. See https://git-lfs.github.com" " for details.".format(fname) ) elif file_type is None: raise IOError( "File '{}' is not recognized as a file-type that is readable by " "DOLfYN. If you think it should be readable, try using the " "appropriate read function (`read_rdi`, `read_nortek`, or " "`read_signature`) found in dolfyn.io.api.".format(fname) ) else: func_map = dict(RDI=read_rdi, nortek=read_nortek, signature=read_signature) func = func_map[file_type] return func(fname, userdata=userdata, nens=nens, **kwargs)
[docs] def read_example(name, **kwargs): """ Read an ADCP or ADV datafile from the examples directory. Parameters ---------- name : str A few available files: AWAC_test01.wpr BenchFile01.ad2cp RDI_test01.000 vector_burst_mode01.VEC vector_data01.VEC vector_data_imu01.VEC winriver01.PD0 winriver02.PD0 Returns ------- ds : xarray.Dataset An xarray dataset from the binary instrument data. """ testdir = dirname(abspath(__file__)) exdir = normpath(join(testdir, relpath("../../../examples/data/dolfyn/"))) filename = exdir + "/" + name return read(filename, **kwargs)
[docs] def save(ds, filename, format="NETCDF4", engine="netcdf4", compression=False, **kwargs): """ Save xarray dataset as netCDF (.nc). Parameters ---------- ds : xarray.Dataset Dataset to save filename : str Filename and/or path with the '.nc' extension compression : bool When true, compress all variables with zlib complevel=1. Default is False **kwargs : dict These are passed directly to :func:`xarray.Dataset.to_netcdf` Notes ----- Drops 'config' lines. Rewrites variable encoding dict More detailed compression options can be specified by specifying 'encoding' in kwargs. The values in encoding will take precedence over whatever is set according to the compression option above. See the xarray.to_netcdf documentation for more details. """ filename = _check_file_ext(filename, "nc") # Handling complex values for netCDF4 ds.attrs["complex_vars"] = [] for var in ds.data_vars: if np.iscomplexobj(ds[var]): ds[var + "_real"] = ds[var].real ds[var + "_imag"] = ds[var].imag ds = ds.drop_vars(var) ds.attrs["complex_vars"].append(var) # For variables that get rewritten to float64 elif ds[var].dtype == np.float64: ds[var] = ds[var].astype("float32") # Write variable encoding enc = dict() if "encoding" in kwargs: enc.update(kwargs["encoding"]) for ky in ds.variables: # Save prior encoding enc[ky] = ds[ky].encoding # Remove unexpected netCDF4 encoding parameters # https://github.com/pydata/xarray/discussions/5709 params = [ "szip", "zstd", "bzip2", "blosc", "contiguous", "chunksizes", "preferred_chunks", ] [enc[ky].pop(p) for p in params if p in enc[ky]] if compression: # New netcdf4-c cannot compress variable length strings if ds[ky].size <= 1 or isinstance(ds[ky].data[0], str): continue enc[ky].update(dict(zlib=True, complevel=1)) kwargs["encoding"] = enc # Fix encoding on datetime64 variables. ds = _decode_cf(ds) ds.to_netcdf(filename, format=format, engine=engine, **kwargs)
[docs] def load(filename): """ Load xarray dataset from netCDF (.nc) Parameters ---------- filename : str Filename and/or path with the '.nc' extension Returns ------- ds : xarray.Dataset An xarray dataset from the binary instrument data. """ filename = _check_file_ext(filename, "nc") ds = xr.load_dataset(filename, engine="netcdf4") # Convert numpy arrays and strings back to lists for nm in ds.attrs: if isinstance(ds.attrs[nm], np.ndarray) and ds.attrs[nm].size > 1: # Convert list items from numpy to generic python type ds.attrs[nm] = list(ds.attrs[nm].astype(list)) elif isinstance(ds.attrs[nm], str) and nm in ["rotate_vars"]: ds.attrs[nm] = [ds.attrs[nm]] # Rejoin complex numbers if hasattr(ds, "complex_vars"): if len(ds.complex_vars): if len(ds.complex_vars[0]) == 1: ds.attrs["complex_vars"] = [ds.complex_vars] for var in ds.complex_vars: ds[var] = ds[var + "_real"] + ds[var + "_imag"] * 1j ds = ds.drop_vars([var + "_real", var + "_imag"]) ds.attrs.pop("complex_vars") return ds
[docs] def save_mat(ds, filename, datenum=True): """ Save xarray dataset as a MATLAB (.mat) file Parameters ---------- ds : xarray.Dataset Dataset to save filename : str Filename and/or path with the '.mat' extension datenum : bool If true, converts time to datenum. If false, time will be saved in "epoch time". Notes ----- The xarray data format is saved as a MATLAB structure with the fields 'vars, coords, config, units'. Converts time to datenum See Also -------- scipy.io.savemat() """ def copy_attrs(matfile, ds, key): if hasattr(ds[key], "units"): matfile["units"][key] = ds[key].units if hasattr(ds[key], "long_name"): matfile["long_name"][key] = ds[key].long_name if hasattr(ds[key], "standard_name"): matfile["standard_name"][key] = ds[key].standard_name filename = _check_file_ext(filename, "mat") # Convert time to datenum t_coords = [t for t in ds.coords if np.issubdtype(ds[t].dtype, np.datetime64)] t_data = [t for t in ds.data_vars if np.issubdtype(ds[t].dtype, np.datetime64)] if datenum: func = date2matlab else: func = date2epoch for ky in t_coords: dt = func(dt642date(ds[ky])) ds = ds.assign_coords({ky: (ky, dt, ds[ky].attrs)}) for ky in t_data: dt = func(dt642date(ds[ky])) ds[ky].data = dt ds.attrs["time_coords"] = t_coords ds.attrs["time_data_vars"] = t_data # Save xarray structure with more descriptive structure names matfile = { "vars": {}, "coords": {}, "config": {}, "units": {}, "long_name": {}, "standard_name": {}, } for ky in ds.data_vars: matfile["vars"][ky] = ds[ky].values copy_attrs(matfile, ds, ky) for ky in ds.coords: matfile["coords"][ky] = ds[ky].values copy_attrs(matfile, ds, ky) matfile["config"] = ds.attrs sio.savemat(filename, matfile)
[docs] def load_mat(filename, datenum=True): """ Load xarray dataset from MATLAB (.mat) file, complimentary to `save_mat()` A .mat file must contain the fields: {vars, coords, config, units}, where 'coords' contain the dimensions of all variables in 'vars'. Parameters ---------- filename : str Filename and/or path with the '.mat' extension datenum : bool If true, converts time from datenum. If false, converts time from "epoch time". Returns ------- ds : xarray.Dataset An xarray dataset from the binary instrument data. See Also -------- scipy.io.loadmat() """ filename = _check_file_ext(filename, "mat") data = sio.loadmat(filename, struct_as_record=False, squeeze_me=True) ds_dict = { "vars": {}, "coords": {}, "config": {}, "units": {}, "long_name": {}, "standard_name": {}, } for nm in ds_dict: key_list = data[nm]._fieldnames for ky in key_list: ds_dict[nm][ky] = getattr(data[nm], ky) ds_dict["data_vars"] = ds_dict.pop("vars") ds_dict["attrs"] = ds_dict.pop("config") # Recreate dataset ds = _create_dataset(ds_dict) ds = _set_coords(ds, ds.coord_sys) # Convert numpy arrays and strings back to lists for nm in ds.attrs: if isinstance(ds.attrs[nm], np.ndarray) and ds.attrs[nm].size > 1: try: ds.attrs[nm] = [x.strip(" ") for x in list(ds.attrs[nm].astype(list))] except: ds.attrs[nm] = list(ds.attrs[nm].astype(list)) elif isinstance(ds.attrs[nm], str) and nm in [ "time_coords", "time_data_vars", "rotate_vars", ]: ds.attrs[nm] = [ds.attrs[nm]] if hasattr(ds, "orientation_down"): ds["orientation_down"] = ds["orientation_down"].astype(bool) if datenum: func = matlab2date else: func = epoch2date # Restore datnum to np.dt64 if hasattr(ds, "time_coords"): for ky in ds.attrs["time_coords"]: dt = date2dt64(func(ds[ky].values)) ds = ds.assign_coords({ky: dt}) ds.attrs.pop("time_coords") if hasattr(ds, "time_data_vars"): for ky in ds.attrs["time_data_vars"]: dt = date2dt64(func(ds[ky].values)) ds[ky].data = dt ds.attrs.pop("time_data_vars") return ds