Skip to content

Commit

Permalink
Merge pull request #711 from ioam/dataset_dynamic_groupby
Browse files Browse the repository at this point in the history
Added support for dynamic groupby on all data interfaces
  • Loading branch information
jlstevens authored Jul 14, 2016
2 parents 9032f88 + 733d329 commit 124019e
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 22 deletions.
38 changes: 27 additions & 11 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

from ..dimension import Dimension
from ..element import Element
from ..spaces import HoloMap
from ..spaces import HoloMap, DynamicMap
from .. import util


Expand Down Expand Up @@ -369,26 +369,42 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):



def groupby(self, dimensions=[], container_type=HoloMap, group_type=None, **kwargs):
"""
Return the results of a groupby operation over the specified
def groupby(self, dimensions=[], container_type=HoloMap, group_type=None,
dynamic=False, **kwargs):
"""Return the results of a groupby operation over the specified
dimensions as an object of type container_type (expected to be
dictionary-like).
Keys vary over the columns (dimensions) and the corresponding
values are collections of group_type (e.g list, tuple)
values are collections of group_type (e.g an Element, list, tuple)
constructed with kwargs (if supplied).
If dynamic is requested container_type is automatically set to
a DynamicMap, allowing dynamic exploration of large
datasets. If the data does not represent a full cartesian grid
of the requested dimensions some Elements will be empty.
"""
if not isinstance(dimensions, list): dimensions = [dimensions]
if not len(dimensions): dimensions = self.dimensions('key', True)
if group_type is None: group_type = type(self)

dimensions = [self.get_dimension(d, strict=True).name for d in dimensions]
invalid_dims = list(set(dimensions) - set(self.dimensions('key', True)))
if invalid_dims:
raise Exception('Following dimensions could not be found:\n%s.'
% invalid_dims)
return self.interface.groupby(self, dimensions, container_type,
dimensions = [self.get_dimension(d, strict=True) for d in dimensions]
dim_names = [d.name for d in dimensions]

if dynamic:
group_dims = [d.name for d in self.kdims if d not in dimensions]
def load_subset(*args):
constraint = dict(zip(dim_names, args))
group = self.select(**constraint)
if np.isscalar(group):
return group_type(([group],), group=self.group,
label=self.label, vdims=self.vdims)
return group_type(group.reindex(group_dims))
dynamic_dims = [d(values=list(self.interface.values(self, d.name, False)))
for d in dimensions]
return DynamicMap(load_subset, kdims=dynamic_dims)

return self.interface.groupby(self, dim_names, container_type,
group_type, **kwargs)

def __len__(self):
Expand Down
4 changes: 3 additions & 1 deletion holoviews/core/data/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,9 @@ def reindex(cls, dataset, kdims, vdims):
vdata = data[vdim.name]
if dropped_axes:
vdata = vdata.squeeze(axis=dropped_axes)
data[vdim.name] = np.transpose(vdata, axes)
if len(axes) > 1:
vdata = np.transpose(vdata, axes)
data[vdim.name] = vdata
return data


Expand Down
17 changes: 8 additions & 9 deletions holoviews/core/data/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ def reindex(cls, dataset, kdims=None, vdims=None):
does not need to be reindexed, the Element can simply
reorder its key dimensions.
"""
if kdims and len(kdims) != dataset.ndims:
drop_dims = [kd for kd in dataset.kdims if kd not in kdims]
constraints = {}
for d in drop_dims:
vals = cls.values(dataset, d, False)
if len(vals):
constraints[d.name] = vals[0]
return dataset.data.extract(iris.Constraint(**constraints))
return dataset.data


Expand All @@ -165,15 +173,6 @@ def groupby(cls, dataset, dims, container_type=HoloMap, group_type=None, **kwarg
constraints = [d.name for d in dims]
slice_dims = [d for d in dataset.kdims if d not in dims]

if dynamic:
def load_subset(*args):
constraint = iris.Constraint(**dict(zip(constraints, args)))
return dataset.clone(dataset.data.extract(constraint),
new_type=group_type,
**dict(kwargs, kdims=slice_dims))
dynamic_dims = [d(values=list(cls.values(dataset, d, False))) for d in dims]
return DynamicMap(load_subset, kdims=dynamic_dims)

unique_coords = product(*[cls.values(dataset, d, expanded=False)
for d in dims])
data = []
Expand Down
16 changes: 15 additions & 1 deletion tests/testdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,12 @@ def test_dataset_groupby(self):
kdims=['Gender'])
self.assertEqual(self.table.groupby(['Gender']), grouped)

def test_dataset_groupby_dynamic(self):
grouped_dataset = self.table.groupby('Gender', dynamic=True)
self.assertEqual(grouped_dataset['M'],
self.table.select(Gender='M').reindex(['Age']))
self.assertEqual(grouped_dataset['F'],
self.table.select(Gender='F').reindex(['Age']))

def test_dataset_add_dimensions_value_ht(self):
table = self.dataset_ht.add_dimension('z', 1, 0)
Expand Down Expand Up @@ -491,6 +497,15 @@ def test_dataset_sort_vdim_hm(self):
def test_dataset_groupby(self):
self.assertEqual(self.dataset_hm.groupby('x').keys(), list(self.xs))

def test_dataset_groupby_dynamic(self):
array = np.random.rand(11, 11)
dataset = Dataset({'x':self.xs, 'y':self.y_ints, 'z': array},
kdims=['x', 'y'], vdims=['z'])
grouped = dataset.groupby('x', dynamic=True)
first = Dataset({'y': self.y_ints, 'z': array[:, 0]},
kdims=['y'], vdims=['z'])
self.assertEqual(grouped[0], first)



class IrisDatasetTest(GridDatasetTest):
Expand Down Expand Up @@ -525,7 +540,6 @@ def test_dataset_sample_hm(self):
pass



class XArrayDatasetTest(GridDatasetTest):
"""
Tests for Iris interface
Expand Down

0 comments on commit 124019e

Please sign in to comment.