From 56cff411bc4d0d21e7cfddccd27405fc9c149f75 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 4 Jun 2016 18:24:53 +0100
Subject: [PATCH 1/4] Added support for dynamic groupby on all data interfaces
---
holoviews/core/data/__init__.py | 34 ++++++++++++++++++++++-----------
holoviews/core/data/grid.py | 4 +++-
holoviews/core/data/iris.py | 9 ---------
tests/testdataset.py | 16 +++++++++++++++-
4 files changed, 41 insertions(+), 22 deletions(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 8058d5528e..8126fb683e 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -48,7 +48,7 @@
from ..dimension import Dimension
from ..element import Element
-from ..spaces import HoloMap
+from ..spaces import HoloMap, DynamicMap
from .. import util
@@ -369,26 +369,38 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
- def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwargs):
- """
- Return the results of a groupby operation over the specified
+ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,
+ dynamic=False, **kwargs):
+ """Return the results of a groupby operation over the specified
dimensions as an object of type container_type (expected to be
dictionary-like).
Keys vary over the columns (dimensions) and the corresponding
- values are collections of group_type (e.g list, tuple)
+ values are collections of group_type (e.g an Element, list, tuple)
constructed with kwargs (if supplied).
+
+ If dynamic is requested container_type is automatically set to
+ a DynamicMap, allowing dynamic exploration of large
+ datasets. If the data does not represent a full cartesian grid
+ of the requested dimensions some Elements will be empty.
"""
if not isinstance(dimensions, list): dimensions = [dimensions]
if not len(dimensions): dimensions = self.dimensions('key', True)
if group_type is None: group_type = type(self)
- dimensions = [self.get_dimension(d, strict=True).name for d in dimensions]
- invalid_dims = list(set(dimensions) - set(self.dimensions('key', True)))
- if invalid_dims:
- raise Exception('Following dimensions could not be found:\n%s.'
- % invalid_dims)
- return self.interface.groupby(self, dimensions, container_type,
+ dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
+ dim_names = [d.name for d in dimensions]
+
+ if dynamic:
+ group_dims = [d.name for d in self.kdims if d not in dimensions]
+ def load_subset(*args):
+ constraint = dict(zip(dim_names, args))
+ return group_type(self.select(**constraint).reindex(group_dims))
+ dynamic_dims = [d(values=list(self.interface.values(self, d.name, False)))
+ for d in dimensions]
+ return DynamicMap(load_subset, kdims=dynamic_dims)
+
+ return self.interface.groupby(self, dim_names, container_type,
group_type, **kwargs)
def __len__(self):
diff --git a/holoviews/core/data/grid.py b/holoviews/core/data/grid.py
index 22265b1396..683007c86c 100644
--- a/holoviews/core/data/grid.py
+++ b/holoviews/core/data/grid.py
@@ -291,7 +291,9 @@ def reindex(cls, dataset, kdims, vdims):
vdata = data[vdim.name]
if dropped_axes:
vdata = vdata.squeeze(axis=dropped_axes)
- data[vdim.name] = np.transpose(vdata, axes)
+ if len(axes) > 1:
+ vdata = np.transpose(vdata, axes)
+ data[vdim.name] = vdata
return data
diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py
index 00c47fab18..887582014d 100644
--- a/holoviews/core/data/iris.py
+++ b/holoviews/core/data/iris.py
@@ -165,15 +165,6 @@ def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwarg
constraints = [d.name for d in dims]
slice_dims = [d for d in dataset.kdims if d not in dims]
- if dynamic:
- def load_subset(*args):
- constraint = iris.Constraint(**dict(zip(constraints, args)))
- return dataset.clone(dataset.data.extract(constraint),
- new_type=group_type,
- **dict(kwargs, kdims=slice_dims))
- dynamic_dims = [d(values=list(cls.values(dataset, d, False))) for d in dims]
- return DynamicMap(load_subset, kdims=dynamic_dims)
-
unique_coords = product(*[cls.values(dataset, d, expanded=False)
for d in dims])
data = []
diff --git a/tests/testdataset.py b/tests/testdataset.py
index 868d92e57a..058a015eca 100644
--- a/tests/testdataset.py
+++ b/tests/testdataset.py
@@ -280,6 +280,12 @@ def test_dataset_groupby(self):
kdims=['Gender'])
self.assertEqual(self.table.groupby(['Gender']), grouped)
+ def test_dataset_groupby_dynamic(self):
+ grouped_dataset = self.table.groupby('Gender', dynamic=True)
+ self.assertEqual(grouped_dataset['M'],
+ self.table.select(Gender='M').reindex(['Age']))
+ self.assertEqual(grouped_dataset['F'],
+ self.table.select(Gender='F').reindex(['Age']))
def test_dataset_add_dimensions_value_ht(self):
table = self.dataset_ht.add_dimension('z', 1, 0)
@@ -491,6 +497,15 @@ def test_dataset_sort_vdim_hm(self):
def test_dataset_groupby(self):
self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs))
+ def test_dataset_groupby_dynamic(self):
+ array = np.random.rand(11, 11)
+ dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
+ kdims=['x', 'y'], vdims=['z'])
+ grouped = dataset.groupby('x', dynamic=True)
+ first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
+ kdims=['y'], vdims=['z'])
+ self.assertEqual(grouped[0], first)
+
class IrisDatasetTest(GridDatasetTest):
@@ -525,7 +540,6 @@ def test_dataset_sample_hm(self):
pass
-
class XArrayDatasetTest(GridDatasetTest):
"""
Tests for Iris interface
From ae47ae99f7a809502dd502e5aca69fee7c84f9a7 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 4 Jun 2016 18:40:14 +0100
Subject: [PATCH 2/4] Implemented dropping static dimensions using iris reindex
---
holoviews/core/data/iris.py | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/holoviews/core/data/iris.py b/holoviews/core/data/iris.py
index 887582014d..7f7169a39a 100644
--- a/holoviews/core/data/iris.py
+++ b/holoviews/core/data/iris.py
@@ -148,6 +148,14 @@ def reindex(cls, dataset, kdims=None, vdims=None):
does not need to be reindexed, the Element can simply
reorder its key dimensions.
"""
+ if kdims and len(kdims) != dataset.ndims:
+ drop_dims = [kd for kd in dataset.kdims if kd not in kdims]
+ constraints = {}
+ for d in drop_dims:
+ vals = cls.values(dataset, d, False)
+ if len(vals):
+ constraints[d.name] = vals[0]
+ return dataset.data.extract(iris.Constraint(**constraints))
return dataset.data
From 0478e023c3be989e3085337e8181a7931643afa6 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Sat, 4 Jun 2016 20:35:19 +0100
Subject: [PATCH 3/4] Handled scalar values in dynamic Dataset.groupby
---
holoviews/core/data/__init__.py | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 8126fb683e..9ed78bad17 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -395,7 +395,11 @@ def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,
group_dims = [d.name for d in self.kdims if d not in dimensions]
def load_subset(*args):
constraint = dict(zip(dim_names, args))
- return group_type(self.select(**constraint).reindex(group_dims))
+ group = self.select(**constraint)
+ if np.isscalar(group):
+ return group_type(([group],), group=self.group,
+ label=self.label, vdims=self.vdims)
+ return group_type(group).reindex(group_dims)
dynamic_dims = [d(values=list(self.interface.values(self, d.name, False)))
for d in dimensions]
return DynamicMap(load_subset, kdims=dynamic_dims)
From 733d329a9dc49813e18ffbed416c53a32b8d4241 Mon Sep 17 00:00:00 2001
From: Philipp Rudiger
Date: Mon, 6 Jun 2016 20:50:32 +0100
Subject: [PATCH 4/4] Fix for dynamic Dataset.groupby
---
holoviews/core/data/__init__.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/holoviews/core/data/__init__.py b/holoviews/core/data/__init__.py
index 9ed78bad17..9e00a837c1 100644
--- a/holoviews/core/data/__init__.py
+++ b/holoviews/core/data/__init__.py
@@ -399,7 +399,7 @@ def load_subset(*args):
if np.isscalar(group):
return group_type(([group],), group=self.group,
label=self.label, vdims=self.vdims)
- return group_type(group).reindex(group_dims)
+ return group_type(group.reindex(group_dims))
dynamic_dims = [d(values=list(self.interface.values(self, d.name, False)))
for d in dimensions]
return DynamicMap(load_subset, kdims=dynamic_dims)