Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make HeatMap more general #849

Merged
merged 22 commits into from
Jan 9, 2017
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
bec8024
Added is_nan utility
philippjfr Sep 5, 2016
339f988
Added functions to generate dense 2D aggregate from coordinates
philippjfr Sep 5, 2016
1d3d57e
Simplified HeatMap and allowed any number of value dimensions
philippjfr Sep 5, 2016
69a9793
Fixes for HeatMap implementations
philippjfr Sep 19, 2016
efd4bd9
Fixed missing imports
philippjfr Jan 8, 2017
17651f0
Added backward compatible raster property on HeatMap
philippjfr Jan 8, 2017
f3543e6
HeatMap now pre-computes gridded representation
philippjfr Jan 8, 2017
843387c
Fixes for HeatMap aggregation
philippjfr Jan 8, 2017
29f47c9
Made the get_2d_aggregate helper function general
philippjfr Jan 8, 2017
3f4b073
Fixed bug in HeatmapPlot
philippjfr Jan 8, 2017
d68485f
Added unit tests for HeatMap aggregation
philippjfr Jan 8, 2017
143c301
Retain global ordering of y-value dimensions
philippjfr Jan 8, 2017
0a91dce
Made categorical_aggregate2d an ElementOperation
philippjfr Jan 8, 2017
03cebf6
Small optimizations for categorical_aggregate2D
philippjfr Jan 8, 2017
844c1ad
Cleaned up HeatMap plotting classes
philippjfr Jan 8, 2017
fb4b207
Improved formatting for NaNs in HeatMap hover and annotations
philippjfr Jan 8, 2017
fcac23e
Removed depth on HeatMap
philippjfr Jan 8, 2017
d380d08
Removed unused variable
philippjfr Jan 8, 2017
dcae11f
Fixes for categorical_aggregate2d ordering
philippjfr Jan 8, 2017
9082070
Fixed and simplified one-to-one mapping function
philippjfr Jan 8, 2017
f5998f2
Added docstrings for graph utility functions
philippjfr Jan 9, 2017
050c4c7
Split categorical_aggregate2d into a few methods
philippjfr Jan 9, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 37 additions & 3 deletions holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,14 +607,14 @@ def sort_topologically(graph):
}

sort_topologically(graph)
[set([1, 2]), set([3, 4]), set([5, 6])]
[[1, 2], [3, 4], [5, 6]]
"""
levels_by_name = {}
names_by_level = defaultdict(set)
names_by_level = defaultdict(list)

def add_level_to_name(name, level):
levels_by_name[name] = level
names_by_level[level].add(name)
names_by_level[level].append(name)


def walk_depth_first(name):
Expand Down Expand Up @@ -647,6 +647,30 @@ def walk_depth_first(name):
(names_by_level.get(i, None)
for i in itertools.count())))


def is_cyclic(graph):
"""Return True if the directed graph g has a cycle."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the representation of the graph? A list of edges as tuples? Would be good to mention in the docstring.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing the representation is similar as in one_to_one...even so, probably worth mentioning..

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, all three methods here (sort_topologically, cyclical and one_to_one) use the same representation, which is mapping between nodes and edges, will add the docstring.

path = set()

def visit(vertex):
path.add(vertex)
for neighbour in graph.get(vertex, ()):
if neighbour in path or visit(neighbour):
return True
path.remove(vertex)
return False

return any(visit(v) for v in graph)


def one_to_one(graph, nodes):
"""Return True if graph contains only one to one mappings.
Pass a graph as a dictionary mapping of edges for each node and
a list of all nodes."""
edges = itertools.chain.from_iterable(graph.values())
return len(graph) == len(nodes) and len(set(edges)) == len(nodes)


def get_overlay_spec(o, k, v):
"""
Gets the type.group.label + key spec from an Element in an Overlay.
Expand Down Expand Up @@ -996,3 +1020,13 @@ def dt64_to_dt(dt64):
"""
ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
return dt.datetime.utcfromtimestamp(ts)


def is_nan(x):
"""
Checks whether value is NaN on arbitrary types
"""
try:
return np.isnan(x)
except:
return False
91 changes: 10 additions & 81 deletions holoviews/element/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ..core.util import pd
from .chart import Curve
from .tabular import Table
from .util import compute_edges, toarray
from .util import compute_edges, toarray, categorical_aggregate2d

try:
from ..core.data import PandasInterface
Expand Down Expand Up @@ -365,16 +365,14 @@ def dimension_values(self, dimension, expanded=True, flat=True):
return super(QuadMesh, self).dimension_values(idx)



class HeatMap(Dataset, Element2D):
"""
HeatMap is an atomic Element used to visualize two dimensional
parameter spaces. It supports sparse or non-linear spaces, dynamically
upsampling them to a dense representation, which can be visualized.

A HeatMap can be initialized with any dict or NdMapping type with
two-dimensional keys. Once instantiated the dense representation is
available via the .data property.
two-dimensional keys.
"""

group = param.String(default='HeatMap', constant=True)
Expand All @@ -383,85 +381,16 @@ class HeatMap(Dataset, Element2D):

vdims = param.List(default=[Dimension('z')])

def __init__(self, data, extents=None, **params):
def __init__(self, data, **params):
super(HeatMap, self).__init__(data, **params)
data, self.raster = self._compute_raster()
self.data = data.data
self.interface = data.interface
self.depth = 1
if extents is None:
(d1, d2) = self.raster.shape[:2]
self.extents = (0, 0, d2, d1)
else:
self.extents = extents


def _compute_raster(self):
if self.interface.gridded:
return self, np.flipud(self.dimension_values(2, flat=False))
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys]
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Dataset(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype])
concat_data = self.interface.concatenate([dense_data, Dataset(self)], datatype=dtype)
with warnings.catch_warnings():
warnings.filterwarnings('ignore', r'Mean of empty slice')
data = concat_data.aggregate(self.kdims, np.nanmean)
array = data.dimension_values(2).reshape(len(d1keys), len(d2keys))
return data, np.flipud(array.T)


def __setstate__(self, state):
if '_data' in state:
data = state['_data']
if isinstance(data, NdMapping):
items = [tuple(k)+((v,) if np.isscalar(v) else tuple(v))
for k, v in data.items()]
kdims = state['kdims'] if 'kdims' in state else self.kdims
vdims = state['vdims'] if 'vdims' in state else self.vdims
data = Dataset(items, kdims=kdims, vdims=vdims).data
elif isinstance(data, Dataset):
data = data.data
kdims = data.kdims
vdims = data.vdims
state['data'] = data
state['kdims'] = kdims
state['vdims'] = vdims
self.__dict__ = state

if isinstance(self.data, NdElement):
self.interface = NdElementInterface
elif isinstance(self.data, np.ndarray):
self.interface = ArrayInterface
elif util.is_dataframe(self.data):
self.interface = PandasInterface
elif isinstance(self.data, dict):
self.interface = DictInterface
self.depth = 1
data, self.raster = self._compute_raster()
self.interface = data.interface
self.data = data.data
if 'extents' not in state:
(d1, d2) = self.raster.shape[:2]
self.extents = (0, 0, d2, d1)

super(HeatMap, self).__setstate__(state)

def dense_keys(self):
d1keys = self.dimension_values(0, False)
d2keys = self.dimension_values(1, False)
return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]))


def dframe(self, dense=False):
if dense:
keys1, keys2 = self.dense_keys()
dense_map = self.clone({(k1, k2): self._data.get((k1, k2), np.NaN)
for k1, k2 in product(keys1, keys2)})
return dense_map.dframe()
return super(HeatMap, self).dframe()
self.gridded = categorical_aggregate2d(self)

@property
def raster(self):
self.warning("The .raster attribute on HeatMap is deprecated, "
"the 2D aggregate is now computed dynamically "
"during plotting.")
return self.gridded.dimension_values(2, flat=False)


class Image(SheetCoordinateSystem, Raster):
Expand Down
109 changes: 109 additions & 0 deletions holoviews/element/util.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
import itertools

import param
import numpy as np

from ..core import Dataset, OrderedDict
from ..core.operation import ElementOperation
from ..core.util import (pd, is_nan, sort_topologically,
cartesian_product, is_cyclic, one_to_one)

try:
import dask
except:
dask = None

try:
import xarray as xr
except:
xr = None


def toarray(v, index_value=False):
"""
Interface helper function to turn dask Arrays into numpy arrays as
Expand All @@ -30,3 +44,98 @@ def compute_edges(edges):
raise ValueError('Centered bins have to be of equal width.')
edges -= width/2.
return np.concatenate([edges, [edges[-1]+width]])


def reduce_fn(x):
"""
Aggregation function to get the first non-zero value.
"""
values = x.values if pd and isinstance(x, pd.Series) else x
for v in values:
if not is_nan(v):
return v
return np.NaN


class categorical_aggregate2d(ElementOperation):
Copy link
Contributor

@jlstevens jlstevens Jan 9, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks great! I was just wondering if you want to keep this class in util or move it to operation.element?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's imported there but can't be moved, cyclical imports again.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, having it available for operation.element is fine.

"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps this would be better expressed as an operation? Then maybe it could have a minimal docstring example in the class docstring?

Generates a gridded Dataset of 2D aggregate arrays indexed by the
first two dimensions of the passed Element, turning all remaining
dimensions into value dimensions. The key dimensions of the
gridded array are treated as categorical indices. Useful for data
indexed by two independent categorical variables such as a table
of population values indexed by country and year. Data that is
indexed by continuous dimensions should be binned before
aggregation. The aggregation will retain the global sorting order
of both dimensions.

>> table = Table([('USA', 2000, 282.2), ('UK', 2005, 58.89)],
kdims=['Country', 'Year'], vdims=['Population'])
>> categorical_aggregate2d(table)
Dataset({'Country': ['USA', 'UK'], 'Year': [2000, 2005],
'Population': [[ 282.2 , np.NaN], [np.NaN, 58.89]]},
kdims=['Country', 'Year'], vdims=['Population'])
"""

datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc="""
The grid interface types to use when constructing the gridded Dataset.""")

def _process(self, obj, key=None):
"""
Generates a categorical 2D aggregate by inserting NaNs at all
cross-product locations that do not already have a value assigned.
Returns a 2D gridded Dataset object.
"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quite a long method...if you see chunks that could be split up into helper methods, that might be sensible. Up to you though!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to split it up.

if isinstance(obj, Dataset) and obj.interface.gridded:
return obj
elif obj.ndims > 2:
raise ValueError("Cannot aggregate more than two dimensions")
elif len(obj.dimensions()) < 3:
raise ValueError("Must have at two dimensions to aggregate over"
"and one value dimension to aggregate on.")

dim_labels = obj.dimensions(label=True)
dims = obj.dimensions()
kdims, vdims = dims[:2], dims[2:]
xdim, ydim = dim_labels[:2]
nvdims = len(dims) - 2
d1keys = obj.dimension_values(xdim, False)
d2keys = obj.dimension_values(ydim, False)
shape = (len(d2keys), len(d1keys))
nsamples = np.product(shape)

# Determine global orderings of y-values using topological sort
grouped = obj.groupby(xdim, container_type=OrderedDict,
group_type=Dataset).values()
orderings = OrderedDict()
for group in grouped:
vals = group.dimension_values(ydim)
if len(vals) == 1:
orderings[vals[0]] = [vals[0]]
else:
for i in range(len(vals)-1):
p1, p2 = vals[i:i+2]
orderings[p1] = [p2]
if one_to_one(orderings, d2keys):
d2keys = np.sort(d2keys)
elif not is_cyclic(orderings):
d2keys = list(itertools.chain(*sort_topologically(orderings)))

# Pad data with NaNs
ys, xs = cartesian_product([d2keys, d1keys])
data = {xdim: xs.flatten(), ydim: ys.flatten()}
for vdim in vdims:
values = np.empty(nsamples)
values[:] = np.NaN
data[vdim.name] = values
dtype = 'dataframe' if pd else 'dictionary'
dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
concat_data = obj.interface.concatenate([dense_data, Dataset(obj)], datatype=dtype)
agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn)

# Convert data to a gridded dataset
grid_data = {xdim: d1keys, ydim: d2keys}
for vdim in vdims:
grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
return agg.clone(grid_data, datatype=self.p.datatype)

1 change: 1 addition & 0 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
from ..element.util import categorical_aggregate2d
from ..streams import RangeXY

column_interfaces = [ArrayInterface, DictInterface]
Expand Down
30 changes: 20 additions & 10 deletions holoviews/plotting/bokeh/raster.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import numpy as np
import param

from ...core.util import cartesian_product
from bokeh.models.mappers import LinearColorMapper
try:
from bokeh.models.mappers import LogColorMapper
except ImportError:
LogColorMapper = None

from ...core.util import cartesian_product, is_nan, unique_array
from ...element import Image, Raster, RGB
from ..renderer import SkipRendering
from ..util import map_colors
Expand Down Expand Up @@ -130,27 +136,31 @@ class HeatmapPlot(ColorbarPlot):
def _axes_props(self, plots, subplots, element, ranges):
dims = element.dimensions()
labels = self._get_axis_labels(dims)
xvals, yvals = [element.dimension_values(i, False)
for i in range(2)]
agg = element.gridded
xvals, yvals = [agg.dimension_values(i, False) for i in range(2)]
if self.invert_yaxis: yvals = yvals[::-1]
plot_ranges = {'x_range': [str(x) for x in xvals],
'y_range': [str(y) for y in yvals]}
return ('auto', 'auto'), labels, plot_ranges


def get_data(self, element, ranges=None, empty=False):
x, y, z = element.dimensions(label=True)
x, y, z = element.dimensions(label=True)[:3]
aggregate = element.gridded
style = self.style[self.cyclic_index]
cmapper = self._get_colormapper(element.vdims[0], element, ranges, style)
if empty:
data = {x: [], y: [], z: [], 'color': []}
data = {x: [], y: [], z: []}
else:
zvals = np.rot90(element.raster, 3).flatten()
xvals, yvals = [[str(v) for v in element.dimension_values(i)]
zvals = aggregate.dimension_values(z)
xvals, yvals = [[str(v) for v in aggregate.dimension_values(i)]
for i in range(2)]
data = {x: xvals, y: yvals, z: zvals}
data = {x: xvals, y: yvals, 'zvalues': zvals}

return (data, {'x': x, 'y': y, 'fill_color': {'field': z, 'transform': cmapper},
if 'hover' in self.tools+self.default_tools:
for vdim in element.vdims:
data[vdim.name] = ['-' if is_nan(v) else vdim.pprint_value(v)
for v in aggregate.dimension_values(vdim)]
return (data, {'x': x, 'y': y, 'fill_color': {'field': 'zvalues', 'transform': cmapper},
'height': 1, 'width': 1})


Expand Down
Loading