From 56cff411bc4d0d21e7cfddccd27405fc9c149f75 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 4 Jun 2016 18:24:53 +0100 Subject: [PATCH 1/4] Added support for dynamic groupby on all data interfaces --- holoviews/core/data/__init__.py | 34 ++++++++++++++++++++++----------- holoviews/core/data/grid.py | 4 +++- holoviews/core/data/iris.py | 9 --------- tests/testdataset.py | 16 +++++++++++++++- 4 files changed, 41 insertions(+), 22 deletions(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 8058d5528e..8126fb683e 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -48,7 +48,7 @@ from ..dimension import Dimension from ..element import Element -from ..spaces import HoloMap +from ..spaces import HoloMap, DynamicMap from .. import util @@ -369,26 +369,38 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs): - def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwargs): - """ - Return the results of a groupby operation over the specified + def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, + dynamic=False, **kwargs): + """Return the results of a groupby operation over the specified dimensions as an object of type container_type (expected to be dictionary-like). Keys vary over the columns (dimensions) and the corresponding - values are collections of group_type (e.g list, tuple) + values are collections of group_type (e.g an Element, list, tuple) constructed with kwargs (if supplied). + + If dynamic is requested container_type is automatically set to + a DynamicMap, allowing dynamic exploration of large + datasets. If the data does not represent a full cartesian grid + of the requested dimensions some Elements will be empty. """ if not isinstance(dimensions, list): dimensions = [dimensions] if not len(dimensions): dimensions = self.dimensions('key', True) if group_type is None: group_type = type(self) - dimensions = [self.get_dimension(d, strict=True).name for d in dimensions] - invalid_dims = list(set(dimensions) - set(self.dimensions('key', True))) - if invalid_dims: - raise Exception('Following dimensions could not be found:\n%s.' - % invalid_dims) - return self.interface.groupby(self, dimensions, container_type, + dimensions = [self.get_dimension(d, strict=True) for d in dimensions] + dim_names = [d.name for d in dimensions] + + if dynamic: + group_dims = [d.name for d in self.kdims if d not in dimensions] + def load_subset(*args): + constraint = dict(zip(dim_names, args)) + return group_type(self.select(**constraint).reindex(group_dims)) + dynamic_dims = [d(values=list(self.interface.values(self, d.name, False))) + for d in dimensions] + return DynamicMap(load_subset, kdims=dynamic_dims) + + return self.interface.groupby(self, dim_names, container_type, group_type, **kwargs) def __len__(self): diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py index 22265b1396..683007c86c 100644 --- a/holoviews/core/data/grid.py +++ b/holoviews/core/data/grid.py @@ -291,7 +291,9 @@ def reindex(cls, dataset, kdims, vdims): vdata = data[vdim.name] if dropped_axes: vdata = vdata.squeeze(axis=dropped_axes) - data[vdim.name] = np.transpose(vdata, axes) + if len(axes) > 1: + vdata = np.transpose(vdata, axes) + data[vdim.name] = vdata return data diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py index 00c47fab18..887582014d 100644 --- a/holoviews/core/data/iris.py +++ b/holoviews/core/data/iris.py @@ -165,15 +165,6 @@ def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwarg constraints = [d.name for d in dims] slice_dims = [d for d in dataset.kdims if d not in dims] - if dynamic: - def load_subset(*args): - constraint = iris.Constraint(**dict(zip(constraints, args))) - return dataset.clone(dataset.data.extract(constraint), - new_type=group_type, - **dict(kwargs, kdims=slice_dims)) - dynamic_dims = [d(values=list(cls.values(dataset, d, False))) for d in dims] - return DynamicMap(load_subset, kdims=dynamic_dims) - unique_coords = product(*[cls.values(dataset, d, expanded=False) for d in dims]) data = [] diff --git a/tests/testdataset.py b/tests/testdataset.py index 868d92e57a..058a015eca 100644 --- a/tests/testdataset.py +++ b/tests/testdataset.py @@ -280,6 +280,12 @@ def test_dataset_groupby(self): kdims=['Gender']) self.assertEqual(self.table.groupby(['Gender']), grouped) + def test_dataset_groupby_dynamic(self): + grouped_dataset = self.table.groupby('Gender', dynamic=True) + self.assertEqual(grouped_dataset['M'], + self.table.select(Gender='M').reindex(['Age'])) + self.assertEqual(grouped_dataset['F'], + self.table.select(Gender='F').reindex(['Age'])) def test_dataset_add_dimensions_value_ht(self): table = self.dataset_ht.add_dimension('z', 1, 0) @@ -491,6 +497,15 @@ def test_dataset_sort_vdim_hm(self): def test_dataset_groupby(self): self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs)) + def test_dataset_groupby_dynamic(self): + array = np.random.rand(11, 11) + dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array}, + kdims=['x', 'y'], vdims=['z']) + grouped = dataset.groupby('x', dynamic=True) + first = Dataset({'y': self.y_ints, 'z': array[:, 0]}, + kdims=['y'], vdims=['z']) + self.assertEqual(grouped[0], first) + class IrisDatasetTest(GridDatasetTest): @@ -525,7 +540,6 @@ def test_dataset_sample_hm(self): pass - class XArrayDatasetTest(GridDatasetTest): """ Tests for Iris interface From ae47ae99f7a809502dd502e5aca69fee7c84f9a7 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 4 Jun 2016 18:40:14 +0100 Subject: [PATCH 2/4] Implemented dropping static dimensions using iris reindex --- holoviews/core/data/iris.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py index 887582014d..7f7169a39a 100644 --- a/holoviews/core/data/iris.py +++ b/holoviews/core/data/iris.py @@ -148,6 +148,14 @@ def reindex(cls, dataset, kdims=None, vdims=None): does not need to be reindexed, the Element can simply reorder its key dimensions. """ + if kdims and len(kdims) != dataset.ndims: + drop_dims = [kd for kd in dataset.kdims if kd not in kdims] + constraints = {} + for d in drop_dims: + vals = cls.values(dataset, d, False) + if len(vals): + constraints[d.name] = vals[0] + return dataset.data.extract(iris.Constraint(**constraints)) return dataset.data From 0478e023c3be989e3085337e8181a7931643afa6 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Sat, 4 Jun 2016 20:35:19 +0100 Subject: [PATCH 3/4] Handled scalar values in dynamic Dataset.groupby --- holoviews/core/data/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 8126fb683e..9ed78bad17 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -395,7 +395,11 @@ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, group_dims = [d.name for d in self.kdims if d not in dimensions] def load_subset(*args): constraint = dict(zip(dim_names, args)) - return group_type(self.select(**constraint).reindex(group_dims)) + group = self.select(**constraint) + if np.isscalar(group): + return group_type(([group],), group=self.group, + label=self.label, vdims=self.vdims) + return group_type(group).reindex(group_dims) dynamic_dims = [d(values=list(self.interface.values(self, d.name, False))) for d in dimensions] return DynamicMap(load_subset, kdims=dynamic_dims) From 733d329a9dc49813e18ffbed416c53a32b8d4241 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 6 Jun 2016 20:50:32 +0100 Subject: [PATCH 4/4] Fix for dynamic Dataset.groupby --- holoviews/core/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py index 9ed78bad17..9e00a837c1 100644 --- a/holoviews/core/data/__init__.py +++ b/holoviews/core/data/__init__.py @@ -399,7 +399,7 @@ def load_subset(*args): if np.isscalar(group): return group_type(([group],), group=self.group, label=self.label, vdims=self.vdims) - return group_type(group).reindex(group_dims) + return group_type(group.reindex(group_dims)) dynamic_dims = [d(values=list(self.interface.values(self, d.name, False))) for d in dimensions] return DynamicMap(load_subset, kdims=dynamic_dims)