From 7586589e71dbf9dd10be0a7512da31ac68f933cc Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Wed, 20 Apr 2016 10:08:01 +0100 Subject: [PATCH] Added interface class for iris Cube datasets --- .travis.yml | 1 + holoviews/core/data/__init__.py | 12 +- holoviews/core/data/interface.py | 2 +- holoviews/core/data/iris.py | 261 +++++++++++++++++++++++++++++++ tests/testirisinterface.py | 104 ++++++++++++ 5 files changed, 378 insertions(+), 2 deletions(-) create mode 100644 holoviews/core/data/iris.py create mode 100644 tests/testirisinterface.py diff --git a/.travis.yml b/.travis.yml index 329399a786..a74cae635b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -24,6 +24,7 @@ install: # Useful for debugging any issues with conda - conda info -a - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scipy numpy=1.9.3 freetype=2.5.2 nose matplotlib bokeh pandas jupyter ipython param + - conda install -c scitools iris=1.9.2 - source activate test-environment - if [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then conda install python=3.4.3; diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index f58f28d34f..8430cfe469 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -15,13 +15,23 @@ from .grid import GridInterface from .ndelement import NdElementInterface +datatypes = ['array', 'dictionary', 'grid', 'ndelement'] + try: import pandas as pd # noqa (Availability import) from .pandas import PandasInterface + datatypes = ['array', 'dataframe', 'dictionary', 'grid', 'ndelement'] DFColumns = PandasInterface except ImportError: pass +try: + import iris # noqa (Availability import) + from .iris import CubeInterface + datatypes.append('cube') +except ImportError: + pass + from ..dimension import Dimension from ..element import Element from ..spaces import HoloMap @@ -86,7 +96,7 @@ class Dataset(Element): of aggregating or collapsing the data with a supplied function. """ - datatype = param.List(['array', 'dataframe', 'dictionary', 'grid', 'ndelement'], + datatype = param.List(datatypes, doc=""" A priority list of the data types to be used for storage on the .data attribute. If the input supplied to the element constructor cannot be put into the requested format, the next diff --git a/holoviews/core/data/interface.py b/holoviews/core/data/interface.py index 1e5349bb99..3bfecfe7d6 100644 --- a/holoviews/core/data/interface.py +++ b/holoviews/core/data/interface.py @@ -51,7 +51,7 @@ def initialize(cls, eltype, data, kdims, vdims, datatype=None): # Process Element data if isinstance(data, NdElement): kdims = [kdim for kdim in kdims if kdim != 'Index'] - elif hasattr(data, 'interface') and isinstance(data.interface, Interface): + elif hasattr(data, 'interface') and issubclass(data.interface, Interface): data = data.data elif isinstance(data, Element): data = tuple(data.dimension_values(d) for d in kdims+vdims) diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py new file mode 100644 index 0000000000..5e4cf06fa0 --- /dev/null +++ b/holoviews/core/data/iris.py @@ -0,0 +1,261 @@ +from __future__ import absolute_import + +import datetime +from itertools import product +import unittest + +try: + import iris + from iris.util import guess_coord_axis +except: + unittest.SkipTest('Skipping import of iris interface, iris not available.') + +import numpy as np + +from .interface import Interface +from .grid import GridInterface +from ..ndmapping import (NdMapping, item_check, sorted_context) +from ..spaces import HoloMap, DynamicMap +from .. import util + +from holoviews.core.dimension import Dimension + + +def get_date_format(coord): + def date_formatter(val, pos=None): + date = coord.units.num2date(val) + date_format = Dimension.type_formatters.get(datetime.datetime, None) + if date_format: + return date.strftime(date_format) + else: + return date + + return date_formatter + + +def coord_to_dimension(coord): + """ + Converts an iris coordinate to a HoloViews dimension. + """ + kwargs = {} + if coord.units.is_time_reference(): + kwargs['value_format'] = get_date_format(coord) + else: + kwargs['unit'] = str(coord.units) + return Dimension(coord.name(), **kwargs) + + +def sort_coords(coord): + """ + Sorts a list of DimCoords trying to ensure that + dates and pressure levels appear first and the + longitude and latitude appear last in the correct + order. + """ + order = {'T': -2, 'Z': -1, 'X': 1, 'Y': 2} + axis = guess_coord_axis(coord) + return (order.get(axis, 0), coord and coord.name()) + + + +class CubeInterface(GridInterface): + """ + The CubeInterface provides allows HoloViews to interact with iris + Cube data. When passing an iris Cube to a HoloViews Element the + init method will infer the dimensions of the Cube from its + coordinates. Currently the interface only provides the basic + methods required for HoloViews to work with an object. + """ + + types = (iris.cube.Cube,) + + datatype = 'cube' + + @classmethod + def init(cls, eltype, data, kdims, vdims): + if kdims: + kdim_names = [kd.name if isinstance(kd, Dimension) else kd for kd in kdims] + else: + kdim_names = [kd.name for kd in eltype.kdims] + + if not isinstance(data, iris.cube.Cube): + if isinstance(data, tuple): + coords = [iris.coords.DimCoord(vals, long_name=kd) + for kd, vals in zip(kdim_names, data)] + value_array = data[-1] + vdim = vdims[0].name if isinstance(vdims[0], Dimension) else vdims[0] + elif isinstance(data, dict): + vdim = vdims[0].name if isinstance(vdims[0], Dimension) else vdims[0] + coords = [iris.coords.DimCoord(vals, long_name=kd) + for kd, vals in data.items() if kd in kdims] + value_array = data[vdim] + try: + data = iris.cube.Cube(value_array, long_name=vdim, + dim_coords_and_dims=coords) + except: + pass + if not isinstance(data, iris.cube.Cube): + raise TypeError('Data must be be an iris dataset type.') + + if kdims: + coords = [] + for kd in kdims: + coord = data.coords(kd.name if isinstance(kd, Dimension) else kd) + if len(coord) == 0: + raise ValueError('Key dimension %s not found in ' + 'Iris cube.' % kd) + coords.append(coord[0]) + else: + coords = data.dim_coords + coords = sorted(coords, key=sort_coords) + kdims = [coord_to_dimension(crd) for crd in coords] + if vdims is None: + vdims = [Dimension(data.name(), unit=str(data.units))] + + return data, kdims, vdims + + + @classmethod + def validate(cls, dataset): + pass + + + @classmethod + def values(cls, dataset, dim, expanded=True, flat=True): + """ + Returns an array of the values along the supplied dimension. + """ + dim = dataset.get_dimension(dim) + if dim in dataset.vdims: + data = dataset.data.copy().data + coord_names = [c.name() for c in dataset.data.dim_coords + if c.name() in dataset.kdims] + dim_inds = [coord_names.index(d.name) for d in dataset.kdims] + dim_inds += [i for i in range(len(dataset.data.dim_coords)) + if i not in dim_inds] + data = data.transpose(dim_inds) + elif expanded: + idx = dataset.get_dimension_index(dim) + data = util.cartesian_product([dataset.data.coords(d.name)[0].points + for d in dataset.kdims])[idx] + else: + data = dataset.data.coords(dim.name)[0].points + return data.flatten() if flat else data + + + @classmethod + def reindex(cls, dataset, kdims=None, vdims=None): + """ + Since cubes are never indexed directly the data itself + does not need to be reindexed, the Element can simply + reorder its key dimensions. + """ + return dataset.data + + + @classmethod + def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwargs): + """ + Groups the data by one or more dimensions returning a container + indexed by the grouped dimensions containing slices of the + cube wrapped in the group_type. This makes it very easy to + break up a high-dimensional dataset into smaller viewable chunks. + """ + if not isinstance(dims, list): dims = [dims] + dynamic = kwargs.pop('dynamic', False) + dims = [dataset.get_dimension(d) for d in dims] + constraints = [d.name for d in dims] + slice_dims = [d for d in dataset.kdims if d not in dims] + + if dynamic: + def load_subset(*args): + constraint = iris.Constraint(**dict(zip(constraints, args))) + return dataset.clone(dataset.data.extract(constraint), + new_type=group_type, + **dict(kwargs, kdims=slice_dims)) + dynamic_dims = [d(values=list(cls.values(dataset, d, False))) for d in dims] + return DynamicMap(load_subset, kdims=dynamic_dims) + + unique_coords = product(*[cls.values(dataset, d, expanded=False) + for d in dims]) + data = [] + for key in unique_coords: + constraint = iris.Constraint(**dict(zip(constraints, key))) + cube = dataset.clone(dataset.data.extract(constraint), + new_type=group_type, + **dict(kwargs, kdims=slice_dims)) + data.append((key, cube)) + if issubclass(container_type, NdMapping): + with item_check(False), sorted_context(False): + return container_type(data, kdims=dims) + else: + return container_type(data) + + + @classmethod + def range(cls, dataset, dimension): + """ + Computes the range along a particular dimension. + """ + dim = dataset.get_dimension(dimension) + values = dataset.dimension_values(dim, False) + return (np.nanmin(values), np.nanmax(values)) + + + @classmethod + def length(cls, dataset): + """ + Returns the total number of samples in the dataset. + """ + return np.product([len(d.points) for d in dataset.data.coords()]) + + + @classmethod + def sort(cls, columns, by=[]): + """ + Cubes are assumed to be sorted by default. + """ + return columns + + + @classmethod + def aggregate(cls, columns, kdims, function, **kwargs): + """ + Aggregation currently not implemented. + """ + raise NotImplementedError + + + @classmethod + def select_to_constraint(cls, selection): + """ + Transform a selection dictionary to an iris Constraint. + """ + constraint_kwargs = {} + for dim, constraint in selection.items(): + if isinstance(constraint, slice): + constraint = (constraint.start, constraint.stop) + if isinstance(constraint, tuple): + constraint = iris.util.between(*constraint) + constraint_kwargs[dim] = constraint + return iris.Constraint(**constraint_kwargs) + + + @classmethod + def select(cls, dataset, selection_mask=None, **selection): + """ + Apply a selection to the data. + """ + constraint = cls.select_to_constraint(selection) + pre_dim_coords = [c.name() for c in dataset.data.dim_coords] + extracted = dataset.data.extract(constraint) + if not extracted.dim_coords: + return extracted.data.item() + post_dim_coords = [c.name() for c in extracted.dim_coords] + dropped = [c for c in pre_dim_coords if c not in post_dim_coords] + for d in dropped: + extracted = iris.util.new_axis(extracted, d) + return extracted + + +Interface.register(CubeInterface) diff --git a/tests/testirisinterface.py b/tests/testirisinterface.py new file mode 100644 index 0000000000..1345565039 --- /dev/null +++ b/tests/testirisinterface.py @@ -0,0 +1,104 @@ +import numpy as np +import unittest + +try: + from iris.tests.stock import lat_lon_cube +except ImportError: + raise unittest.SkipTest("Could not import iris, skipping iris interface " + "tests.") + +from holoviews.core.data import Dataset +from holoviews.core.data.iris import coord_to_dimension +from holoviews.element.comparison import ComparisonTestCase + +class TestCube(ComparisonTestCase): + + def setUp(self): + self.cube = lat_lon_cube() + + def test_dim_to_coord(self): + dim = coord_to_dimension(self.cube.coords()[0]) + self.assertEqual(dim.name, 'latitude') + self.assertEqual(dim.unit, 'degrees') + + def test_initialize_cube(self): + cube = Dataset(self.cube) + self.assertEqual(cube.dimensions(label=True), + ['longitude', 'latitude', 'unknown']) + + def test_initialize_cube_with_kdims(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.dimensions('key', True), + ['longitude', 'latitude']) + + def test_initialize_cube_with_vdims(self): + cube = Dataset(self.cube, vdims=['Quantity']) + self.assertEqual(cube.dimensions('value', True), + ['Quantity']) + + def test_dimension_values_kdim_expanded(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.dimension_values('longitude'), + np.array([-1, -1, -1, 0, 0, 0, + 1, 1, 1, 2, 2, 2], dtype=np.int32)) + + def test_dimension_values_kdim(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.dimension_values('longitude', expanded=False), + np.array([-1, 0, 1, 2], dtype=np.int32)) + + def test_dimension_values_vdim(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.dimension_values('unknown', flat=False), + np.array([[ 0, 4, 8], + [ 1, 5, 9], + [ 2, 6, 10], + [ 3, 7, 11]], dtype=np.int32)) + + def test_range_kdim(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.range('longitude'), (-1, 2)) + + def test_range_vdim(self): + cube = Dataset(self.cube, kdims=['longitude', 'latitude']) + self.assertEqual(cube.range('unknown'), (0, 11)) + + def test_select_index(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude=0).data.data, + np.array([[1, 5, 9]], dtype=np.int32)) + + def test_select_slice(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude=(0, 1)).data.data, + np.array([[1, 2], [5, 6], [9, 10]], dtype=np.int32)) + + def test_select_set(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude={0, 1}).data.data, + np.array([[1, 2], [5, 6], [9, 10]], dtype=np.int32)) + + def test_select_multi_index(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude=0, latitude=0), 5) + + def test_select_multi_slice(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude=(0, 1), + latitude=(0, 1)).data.data, + np.array([[5, 6], [9, 10]], dtype=np.int32)) + + def test_select_multi_slice(self): + cube = Dataset(self.cube) + self.assertEqual(cube.select(longitude={0, 2}, + latitude={0, 2}).data.data, + np.array([[5, 7]], dtype=np.int32)) + + def test_getitem_index(self): + cube = Dataset(self.cube) + self.assertEqual(cube[0].data.data, + np.array([[1, 5, 9]], dtype=np.int32)) + + def test_getitem_scalar(self): + cube = Dataset(self.cube) + self.assertEqual(cube[0, 0], 5)