Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Xarray support #23

Merged
merged 28 commits into from
Oct 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
3d458f3
io for xarray
elbeejay Sep 4, 2020
17496b4
make CubeVariable an xarray accessor
elbeejay Sep 4, 2020
3a1408e
minor changes
elbeejay Sep 5, 2020
b43829c
minor tweaks...
elbeejay Sep 5, 2020
1211f6b
use arrays for sectioning
elbeejay Sep 6, 2020
f17289d
Simplify IO for netCDF4/HDF5 handling
elbeejay Sep 10, 2020
927e434
type check in io, more flexible x,y setting in cube
elbeejay Sep 10, 2020
b7de74b
more flexible data shape assignment
elbeejay Sep 11, 2020
18d0e39
alter tests for xarray accessor
elbeejay Sep 12, 2020
9432fdf
io rearrange "known"-things, other small changes for xarray
elbeejay Sep 12, 2020
943e56c
addtl io tests
elbeejay Sep 12, 2020
dffd541
make stratacube[var] a cubevariable, proper access for cube["x"] and …
elbeejay Sep 12, 2020
edd7a2f
crude fixes to work with api / rough 3d plot via pyvista
elbeejay Sep 14, 2020
f082bd2
rebase
elbeejay Oct 19, 2020
cdcae67
remove slow copy.deepcopy and use built-in xarray.copy instead
elbeejay Sep 15, 2020
c26f553
rebase pt2
elbeejay Oct 19, 2020
5a998af
minor rollbacks
elbeejay Sep 15, 2020
de5c4a6
marked the wrong test
elbeejay Sep 15, 2020
9186aeb
add small hdf5 example and corresponding tests
elbeejay Sep 16, 2020
29a4807
minor doc switches to reference xarray instead of numpy
elbeejay Sep 29, 2020
d0c3a27
add getitem, allows consistent api slicing ops
Oct 16, 2020
38c3c2f
add docstring
Oct 16, 2020
54ab721
remove .data from all mask instantiations in tests
Oct 16, 2020
9118d98
update other tests
Oct 16, 2020
3abb033
rebase pt3
elbeejay Oct 19, 2020
04d7b43
fix tests that broke due to dimensionality mis-match
elbeejay Oct 19, 2020
e243d37
minor updates to cube.py
elbeejay Oct 19, 2020
94a8a66
remove redundant mess
elbeejay Oct 19, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[run]
source = deltametrics

omit =
*__init__*
*/sample_data/*
*_version*
193 changes: 124 additions & 69 deletions deltametrics/cube.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os
import copy
import warnings
import time
import abc

import numpy as np
import scipy as sp
import xarray as xr

import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import
Expand All @@ -17,13 +19,15 @@
from . import plot


class CubeVariable(np.ndarray):
@xr.register_dataarray_accessor("cubevar")
class CubeVariable():
elbeejay marked this conversation as resolved.
Show resolved Hide resolved
"""Slice of a Cube.

Slicing an :obj:`~deltametrics.cube.Cube` returns an object of this type.
The ``CubeVariable`` is essentially a thin wrapper around the numpy
``ndarray``, enabling additional iniformation to be augmented, a
lighter-weight __repr__, and flexibility for development.
The ``CubeVariable`` is an accessor of the `xarray.DataArray` class,
enabling additional functions to be added to the object, while retaining
the `xarray` object and it's native functionality under
``CubeVariable.data``.

.. warning::
You probably should not instantiate objects of this type directly.
Expand All @@ -38,55 +42,47 @@ class CubeVariable(np.ndarray):
>>> type(rcm8cube['velocity'])
<class 'deltametrics.cube.CubeVariable'>

>>> type(rcm8cube['velocity'].base)
<class 'numpy.ndarray'>
>>> type(rcm8cube['velocity'].data)
<class 'xarray.DataArray'>

>>> rcm8cube['velocity'].variable
'velocity'
"""

def __new__(cls, *args, **kwargs):
"""Initialize the ndarray.
"""
def __init__(self, xarray_obj):
"""Initialize the ``CubeVariable`` object."""
self.data = xarray_obj

def initialize(self, **kwargs):
"""Initialize with **kwargs."""
self.shape = self.data.shape
self.ndim = len(self.shape)
variable = kwargs.pop('variable', None)
self.variable = variable
coords = kwargs.pop('coords', None)
obj = np.array(*args, **kwargs)
obj = np.asarray(obj).view(cls)
obj.variable = variable
if not coords:
obj.t, obj.x, obj.y = [np.arange(l) for l in obj.shape]
self.t, self.x, self.y = [np.arange(itm) for itm in self.shape]
else:
obj.t, obj.x, obj.y = coords['t'], coords['x'], coords['y']
return obj
self.t, self.x, self.y = coords['t'], coords['x'], coords['y']

def __array_finalize__(self, obj):
"""Place things that must always happen here.
def as_frozen(self):
"""Export variable values as a `numpy.ndarray`."""
return self.data.values

This method is called when any new array is cast. The ``__new__``
method is not called when views are cast of an existing
`CubeVariable`, so anything special about the `CubeVariable` needs to
implemented here in addition to in ``__new__``.
def __getitem__(self, slc):
"""Get items from the underlying data.

This method could be configured to return a standard ndarray if a view
is cast, but currently, it will return another CubeVariable instance.
"""
if obj is None:
return
self.variable = getattr(obj, 'variable', None)
self.t, self.x, self.y = getattr(obj, 'coords', range(3))
Takes a numpy slicing style and slices data from the underlying data.
Note that the underlying data is stored in an :obj:`xarray.DataArray`,
and this method returns a :obj:`xarray.DataArray`.

def __repr__(self):
"""Lighterweight repr
Parameters
----------
slc : a `numpy` slice
A valid `numpy` style slice. For example, :code:`[10, :, :]`.
Dimension validation is not performed before slicing.
"""
if self.size > 5000:
return str(type(self)) + ' variable: `' + self.variable \
+ '`; dtype: ' + str(self.dtype) + ' size: ' + str(self.size)
else:
return str(self)

def as_frozen(self):
"""Export variable as `ndarray`."""
return self.view(np.ndarray)
return self.data[slc]


class BaseCube(abc.ABC):
Expand Down Expand Up @@ -168,9 +164,9 @@ def _connect_to_file(self, data_path):
"""
_, ext = os.path.splitext(data_path)
if ext == '.nc':
self._dataio = io.NetCDFIO(data_path)
elif ext == '.hf5': # ?????
self._dataio = io.HDFIO(data_path)
self._dataio = io.NetCDFIO(data_path, 'netcdf')
elif ext == '.hdf5':
self._dataio = io.NetCDFIO(data_path, 'hdf5')
else:
raise ValueError(
'Invalid file extension for "data_path": %s' % data_path)
Expand All @@ -181,22 +177,30 @@ def _read_meta_from_file(self):
This method is used internally to gather some useful info for
navigating the variable trees in the stored files.
"""
self._variables = self._dataio.keys
self._X = self._dataio['x'] # mesh grid of x values of cube
self._Y = self._dataio['y'] # mesh grid of y values of cube
self._x = np.copy(self._X[0, :].squeeze()) # array of x values of cube
self._y = np.copy(self._Y[:, 0].squeeze()) # array of y values of cube
self._coords = self._dataio.known_coords
self._variables = self._dataio.known_variables
# if x is 2-D then we assume x and y are mesh grid values
if np.ndim(self._dataio['x']) == 2:
self._X = self._dataio['x'] # mesh grid of x values of cube
self._Y = self._dataio['y'] # mesh grid of y values of cube
self._x = np.copy(self._X[0, :].squeeze()) # array of xval of cube
self._y = np.copy(self._Y[:, 0].squeeze()) # array of yval of cube
# if x is 1-D we do mesh-gridding
elif np.ndim(self._dataio['x']) == 1:
self._x = self._dataio['x'] # array of xval of cube
self._y = self._dataio['y'] # array of yval of cube
self._X, self._Y = np.meshgrid(self._x, self._y) # mesh grids x&y

def read(self, variables):
"""Read variable into memory
"""Read variable into memory.

Parameters
----------
variables : :obj:`list` of :obj:`str`, :obj:`str`
Which variables to read into memory.
"""
if variables is True: # special case, read all variables
variables = self._dataio.variables
variables = self.variables
elif type(variables) is str:
variables = [variables]
else:
Expand Down Expand Up @@ -226,7 +230,8 @@ def varset(self, var):
def data_path(self):
""":obj:`str` : Path connected to for file IO.

Returns a string if connected to file, or None if cube initialized from ``dict``.
Returns a string if connected to file, or None if cube initialized
from ``dict``.
"""
return self._data_path

Expand All @@ -236,6 +241,11 @@ def dataio(self):
"""
return self._dataio

@property
def coords(self):
"""`list` : List of coordinate names as strings."""
return self._coords

@property
def variables(self):
"""`list` : List of variable names as strings.
Expand Down Expand Up @@ -375,12 +385,23 @@ def export_frozen_variable(self, var, return_cube=False):
if return_cube:
raise NotImplementedError
else:
return self.__getitem__(var).view(np.ndarray)
return self[var].data

def show_cube(self, var, t=-1, x=-1, y=-1, ax=None):
"""Show the cube in a 3d axis.

3d visualization via `pyvista`.

.. warning::
Implementation is crude and should be revisited.
"""
raise NotImplementedError
try:
import pyvista as pv
except ImportError:
ImportError('3d plotting dependency, pyvista, was not found.')

_grid = pv.wrap(self[var].data.values)
_grid.plot()

def show_plan(self, var, t=-1, ax=None, title=None, ticks=False):
"""Show planform image.
Expand All @@ -389,7 +410,7 @@ def show_plan(self, var, t=-1, ax=None, title=None, ticks=False):
NEEDS TO BE PORTED OVER TO WRAP THE .show() METHOD OF PLAN!
"""

_plan = self[var][t] # REPLACE WITH OBJECT RETURNED FROM PLAN
_plan = self[var].data[t] # REPLACE WITH OBJECT RETURNED FROM PLAN

if not ax:
ax = plt.gca()
Expand Down Expand Up @@ -483,7 +504,18 @@ def __init__(self, data, read=[], varset=None, stratigraphy_from=None):

self._t = np.array(self._dataio['time'], copy=True)
_, self._T, _ = np.meshgrid(self.y, self.t, self.x)
self._H, self._L, self._W = self['eta'].shape

# get shape from a variable that is not x, y, or time
i = 0
while i < len(self.variables):
if self.variables[i] == 'x' or self.variables[i] == 'y' \
or self.variables[i] == 'time':
i += 1
else:
_var = self.variables[i]
i = len(self.variables)

self._H, self._L, self._W = self[_var].data.shape

self._knows_stratigraphy = False

Expand All @@ -506,13 +538,27 @@ def __getitem__(self, var):
CubeVariable : `~deltametrics.cube.CubeVariable`
The instantiated CubeVariable.
"""
if var == 'time':
# a special attribute we add, which matches eta.shape
_t = np.expand_dims(self.dataio['time'], axis=(1, 2))
return CubeVariable(np.tile(_t, (1, *self.shape[1:])),
variable='time')
if var in self._coords:
# ensure coords can be called by cube[var]
_coords = {}
_coords['t'] = self.T
_coords['x'] = self.X
_coords['y'] = self.Y
if var == 'time': # special case for time
_t = np.expand_dims(self.dataio.dataset['time'].values,
axis=(1, 2))
_xrt = xr.DataArray(np.tile(_t, (1, *self.shape[1:])))
_obj = _xrt.cubevar
else:
_obj = self._dataio.dataset[var].cubevar
_obj.initialize(variable=var, coords=_coords)
return _obj

elif var in self._variables:
return CubeVariable(self.dataio[var], variable=var)
_obj = self._dataio.dataset[var].cubevar
_obj.initialize(variable=var)
return _obj

else:
raise AttributeError('No variable of {cube} named {var}'.format(
cube=str(self), var=var))
Expand All @@ -536,9 +582,13 @@ def stratigraphy_from(self, variable='eta', style='mesh', **kwargs):
initializers.
"""
if style == 'mesh':
self.strat_attr = strat.MeshStratigraphyAttributes(elev=self[variable], **kwargs)
self.strat_attr = \
strat.MeshStratigraphyAttributes(elev=self[variable],
**kwargs)
elif style == 'boxy':
self.strat_attr = strat.BoxyStratigraphyAttributes(elev=self[variable], **kwargs)
self.strat_attr = \
strat.BoxyStratigraphyAttributes(elev=self[variable],
**kwargs)
else:
raise ValueError('Bad "style" argument supplied: %s' % str(style))
self._knows_stratigraphy = True
Expand Down Expand Up @@ -635,22 +685,23 @@ def __init__(self, data, read=[], varset=None,
supplied, a new default VariableSet instance is created.
"""
super().__init__(data, read, varset)

if isinstance(data, str):
raise NotImplementedError('Precomputed NetCDF?')
elif isinstance(data, np.ndarray):
raise NotImplementedError('Precomputed numpy array?')
elif isinstance(data, DataCube):
# i.e., creating from a DataCube
_elev = np.array(self.dataio[stratigraphy_from], copy=True)
_elev = copy.deepcopy(data[stratigraphy_from])

# set up coordinates of the array
self._z = strat._determine_strat_coordinates(_elev, dz=dz)
self._z = strat._determine_strat_coordinates(_elev.data, dz=dz)
self._H = len(self.z)
self._L, self._W = _elev.shape[1:]
self._Z = np.tile(self.z, (self.W, self.L, 1)).T

_out = strat.compute_boxy_stratigraphy_coordinates(_elev, z=self.z, return_strata=True)
_out = strat.compute_boxy_stratigraphy_coordinates(_elev,
z=self.z,
return_strata=True)
self.strata_coords, self.data_coords, self.strata = _out
else:
raise TypeError('No other input types implemented yet.')
Expand Down Expand Up @@ -685,9 +736,13 @@ def __getitem__(self, var):
cube=str(self), var=var))

# the following lines apply the data to stratigraphy mapping
_cut = _var[self.data_coords[:, 0], self.data_coords[:, 1], self.data_coords[:, 2]]
_arr[self.strata_coords[:, 0], self.strata_coords[:, 1], self.strata_coords[:, 2]] = _cut
return CubeVariable(_arr, variable=var)
_cut = _var[self.data_coords[:, 0], self.data_coords[:, 1],
self.data_coords[:, 2]]
_arr[self.strata_coords[:, 0], self.strata_coords[:, 1],
self.strata_coords[:, 2]] = _cut
_obj = xr.DataArray(_arr).cubevar
_obj.initialize(variable=var)
return _obj

@property
def strata(self):
Expand Down
Loading