Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dataset aliases #1075

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions holoviews/core/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,16 @@ def __init__(self, data, **kwargs):
pvals = util.get_param_values(data)
kwargs.update([(l, pvals[l]) for l in ['group', 'label']
if l in pvals and l not in kwargs])
initialized = Interface.initialize(type(self), data,
kwargs.get('kdims'),
kwargs.get('vdims'),

kdims, vdims = None, None
if 'kdims' in kwargs:
kdims = [kd if isinstance(kd, Dimension) else Dimension(kd)
for kd in kwargs['kdims']]
if 'vdims' in kwargs:
vdims = [kd if isinstance(kd, Dimension) else Dimension(kd)
for kd in kwargs['vdims']]

initialized = Interface.initialize(type(self), data, kdims, vdims,
datatype=kwargs.get('datatype'))
(data, self.interface, dims, extra_kws) = initialized
super(Dataset, self).__init__(data, **dict(extra_kws, **dict(kwargs, **dims)))
Expand Down Expand Up @@ -195,6 +202,8 @@ def sort(self, by=[]):
Sorts the data by the values along the supplied dimensions.
"""
if not by: by = self.kdims
if not isinstance(by, list): by = [by]

sorted_columns = self.interface.sort(self, by)
return self.clone(sorted_columns)

Expand Down Expand Up @@ -234,7 +243,7 @@ def add_dimension(self, dimension, dim_pos, dim_val, vdim=False, **kwargs):
dimensions and a key value scalar or sequence of the same length
as the existing keys.
"""
if isinstance(dimension, str):
if isinstance(dimension, (util.basestring, tuple)):
dimension = Dimension(dimension)

if dimension.name in self.kdims:
Expand Down Expand Up @@ -369,10 +378,10 @@ def aggregate(self, dimensions=None, function=None, spreadfn=None, **kwargs):
raise ValueError("The aggregate method requires a function to be specified")
if dimensions is None: dimensions = self.kdims
elif not isinstance(dimensions, list): dimensions = [dimensions]
aggregated = self.interface.aggregate(self, dimensions, function, **kwargs)
kdims = [self.get_dimension(d) for d in dimensions]
aggregated = self.interface.aggregate(self, kdims, function, **kwargs)
aggregated = self.interface.unpack_scalar(self, aggregated)

kdims = [self.get_dimension(d) for d in dimensions]
vdims = self.vdims
if spreadfn:
error = self.interface.aggregate(self, dimensions, spreadfn)
Expand Down Expand Up @@ -466,7 +475,7 @@ def redim(self, specs=None, **dimensions):
kdims = replace_dimensions(self.kdims, dimensions)
vdims = replace_dimensions(self.vdims, dimensions)
zipped_dims = zip(self.kdims+self.vdims, kdims+vdims)
renames = {pk.name: nk for pk, nk in zipped_dims if pk != nk}
renames = {pk.key: nk for pk, nk in zipped_dims if pk != nk}
data = self.data
if renames:
data = self.interface.redim(self, renames)
Expand All @@ -478,7 +487,7 @@ def dimension_values(self, dim, expanded=True, flat=True):
Returns the values along a particular dimension. If unique
values are requested will return only unique values.
"""
dim = self.get_dimension(dim, strict=True).name
dim = self.get_dimension(dim, strict=True)
return self.interface.values(self, dim, expanded, flat)


Expand Down
2 changes: 1 addition & 1 deletion holoviews/core/data/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def init(cls, eltype, data, kdims, vdims):
if vdims is None:
vdims = eltype.vdims

dimensions = [d.name if isinstance(d, Dimension) else
dimensions = [d.key if isinstance(d, Dimension) else
d for d in kdims + vdims]
if ((isinstance(data, dict) or util.is_dataframe(data)) and
all(d in data for d in dimensions)):
Expand Down
31 changes: 17 additions & 14 deletions holoviews/core/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def shape(cls, dataset):

@classmethod
def range(cls, columns, dimension):
column = columns.data[columns.get_dimension(dimension).name]
column = columns.data[columns.get_dimension(dimension).key]
if column.dtype.kind == 'O':
column = np.sort(column[column.notnull()].compute())
return column[0], column[-1]
Expand All @@ -70,7 +70,8 @@ def sort(cls, columns, by=[]):

@classmethod
def values(cls, columns, dim, expanded=True, flat=True):
data = columns.data[dim]
dim = columns.get_dimension(dim)
data = columns.data[dim.key]
if not expanded:
data = data.unique()
return data.compute().values
Expand All @@ -88,7 +89,8 @@ def select_mask(cls, dataset, selection):
if isinstance(k, tuple):
k = slice(*k)
masks = []
series = dataset.data[dim]
alias = dataset.get_dimension(dim).key
series = dataset.data[alias]
if isinstance(k, slice):
if k.start is not None:
masks.append(k.start <= series)
Expand Down Expand Up @@ -123,7 +125,7 @@ def select(cls, columns, selection_mask=None, **selection):
indexed = cls.indexed(columns, selection)
df = df if selection_mask is None else df[selection_mask]
if indexed and len(df) == 1:
return df[columns.vdims[0].name].compute().iloc[0]
return df[columns.vdims[0].key].compute().iloc[0]
return df

@classmethod
Expand All @@ -139,15 +141,16 @@ def groupby(cls, columns, dimensions, container_type, group_type, **kwargs):
group_kwargs.update(kwargs)

data = []
groupby = columns.data.groupby(dimensions)
if len(dimensions) == 1:
column = columns.data[dimensions[0]]
group_by = [d.key for d in index_dims]
groupby = columns.data.groupby(group_by)
if len(group_by) == 1:
column = columns.data[group_by[0]]
if column.dtype.name == 'category':
indices = ((ind,) for ind in column.cat.categories)
else:
indices = ((ind,) for ind in column.unique().compute())
else:
group_tuples = columns.data[dimensions].itertuple()
group_tuples = columns.data[group_by].itertuple()
indices = util.unique_iterator(ind[1:] for ind in group_tuples)
for coord in indices:
if any(isinstance(c, float) and np.isnan(c) for c in coord):
Expand All @@ -161,12 +164,12 @@ def groupby(cls, columns, dimensions, container_type, group_type, **kwargs):
return container_type(data, kdims=index_dims)
else:
return container_type(data)

@classmethod
def aggregate(cls, columns, dimensions, function, **kwargs):
data = columns.data
cols = [d.name for d in columns.kdims if d in dimensions]
vdims = columns.dimensions('value', True)
cols = [d.key for d in columns.kdims if d in dimensions]
vdims = columns.dimensions('value', label='key')
dtypes = data.dtypes
numeric = [c for c, dtype in zip(dtypes.index, dtypes.values)
if dtype.kind in 'iufc' and c in vdims]
Expand Down Expand Up @@ -203,7 +206,7 @@ def unpack_scalar(cls, columns, data):
@classmethod
def sample(cls, columns, samples=[]):
data = columns.data
dims = columns.dimensions('key', label=True)
dims = columns.dimensions('key', label='key')
mask = None
for sample in samples:
if np.isscalar(sample): sample = [sample]
Expand All @@ -218,12 +221,12 @@ def sample(cls, columns, samples=[]):
@classmethod
def add_dimension(cls, columns, dimension, dim_pos, values, vdim):
data = columns.data
if dimension.name not in data.columns:
if dimension.key not in data.columns:
if not np.isscalar(values):
err = ('Dask dataframe does not support assigning '
'non-scalar value.')
raise NotImplementedError(err)
data = data.assign(**{dimension.name: values})
data = data.assign(**{dimension.key: values})
return data

@classmethod
Expand Down
39 changes: 22 additions & 17 deletions holoviews/core/data/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class DictInterface(Interface):

@classmethod
def dimension_type(cls, dataset, dim):
name = dataset.get_dimension(dim).name
name = dataset.get_dimension(dim).key
return dataset.data[name].dtype.type

@classmethod
Expand All @@ -40,7 +40,7 @@ def init(cls, eltype, data, kdims, vdims):
if vdims is None:
vdims = eltype.vdims

dimensions = [d.name if isinstance(d, Dimension) else
dimensions = [d.key if isinstance(d, Dimension) else
d for d in kdims + vdims]
if isinstance(data, tuple):
data = {d: v for d, v in zip(dimensions, data)}
Expand Down Expand Up @@ -81,7 +81,7 @@ def init(cls, eltype, data, kdims, vdims):

@classmethod
def validate(cls, dataset):
dimensions = dataset.dimensions(label=True)
dimensions = dataset.dimensions(label='key')
not_found = [d for d in dimensions if d not in dataset.data]
if not_found:
raise ValueError('Following dimensions not found in data: %s' % not_found)
Expand Down Expand Up @@ -113,12 +113,15 @@ def length(cls, dataset):

@classmethod
def array(cls, dataset, dimensions):
if not dimensions: dimensions = dataset.dimensions(label=True)
return np.column_stack(dataset.data[dim] for dim in dimensions)
if not dimensions:
dimensions = dataset.dimensions(label='key')
else:
dimensions = [dataset.get_dimensions(d).key for d in dimensions]
return np.column_stack(dataset.data[dim.key] for dim in dimensions)

@classmethod
def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):
dim = dimension.name if isinstance(dimension, Dimension) else dimension
dim = dimension.key if isinstance(dimension, Dimension) else dimension
data = list(dataset.data.items())
if isinstance(values, util.basestring) or not hasattr(values, '__iter__'):
values = np.array([values]*len(dataset))
Expand All @@ -127,7 +130,7 @@ def add_dimension(cls, dataset, dimension, dim_pos, values, vdim):

@classmethod
def redim(cls, dataset, dimensions):
return OrderedDict([(dimensions.get(k, dataset.get_dimension(k)).name, v)
return OrderedDict([(dimensions.get(k, dataset.get_dimension(k)).key, v)
for k,v in dataset.data.items()])

@classmethod
Expand All @@ -145,6 +148,7 @@ def concat(cls, dataset_objs):

@classmethod
def sort(cls, dataset, by=[]):
by = [dataset.get_dimension(d).key for d in by]
if len(by) == 1:
sorting = cls.values(dataset, by[0]).argsort()
else:
Expand All @@ -155,17 +159,18 @@ def sort(cls, dataset, by=[]):

@classmethod
def values(cls, dataset, dim, expanded=True, flat=True):
values = np.array(dataset.data.get(dataset.get_dimension(dim).name))
dim = dataset.get_dimension(dim).key
values = np.array(dataset.data.get(dim))
if not expanded:
return util.unique_array(values)
return values


@classmethod
def reindex(cls, dataset, kdims, vdims):
# DataFrame based tables don't need to be reindexed
return OrderedDict([(d.name, dataset.dimension_values(d))
for d in kdims+vdims])
dimensions = [dataset.get_dimension(d).key for d in kdims+vdims]
return OrderedDict([(d, dataset.dimension_values(d))
for d in dimensions])


@classmethod
Expand All @@ -184,14 +189,14 @@ def groupby(cls, dataset, dimensions, container_type, group_type, **kwargs):
group_kwargs.update(kwargs)

# Find all the keys along supplied dimensions
keys = [tuple(dataset.data[d.name][i] for d in dimensions)
keys = [tuple(dataset.data[d.key][i] for d in dimensions)
for i in range(len(dataset))]

# Iterate over the unique entries applying selection masks
grouped_data = []
for unique_key in util.unique_iterator(keys):
mask = cls.select_mask(dataset, dict(zip(dimensions, unique_key)))
group_data = OrderedDict(((d.name, dataset[d.name][mask]) for d in kdims+vdims))
group_data = OrderedDict(((d.key, dataset[d.key][mask]) for d in kdims+vdims))
group_data = group_type(group_data, **group_kwargs)
grouped_data.append((unique_key, group_data))

Expand All @@ -210,7 +215,7 @@ def select(cls, dataset, selection_mask=None, **selection):
data = OrderedDict((k, list(compress(v, selection_mask)))
for k, v in dataset.data.items())
if indexed and len(list(data.values())[0]) == 1:
return data[dataset.vdims[0].name][0]
return data[dataset.vdims[0].key][0]
return data


Expand All @@ -221,7 +226,7 @@ def sample(cls, dataset, samples=[]):
sample_mask = True
if np.isscalar(sample): sample = [sample]
for i, v in enumerate(sample):
name = dataset.get_dimension(i).name
name = dataset.get_dimension(i).key
sample_mask &= (np.array(dataset.data[name])==v)
mask |= sample_mask
return {k: np.array(col)[mask]
Expand All @@ -230,8 +235,8 @@ def sample(cls, dataset, samples=[]):

@classmethod
def aggregate(cls, dataset, kdims, function, **kwargs):
kdims = [dataset.get_dimension(d).name for d in kdims]
vdims = dataset.dimensions('value', True)
kdims = [dataset.get_dimension(d).key for d in kdims]
vdims = dataset.dimensions('value', label='key')
groups = cls.groupby(dataset, kdims, list, OrderedDict)
aggregated = OrderedDict([(k, []) for k in kdims+vdims])

Expand Down
Loading