holoviz · jlstevens · Jan 9, 2017 · Sep 5, 2016 · Sep 5, 2016 · Sep 5, 2016
diff --git a/holoviews/core/util.py b/holoviews/core/util.py
@@ -607,14 +607,14 @@ def sort_topologically(graph):
     }
 
     sort_topologically(graph)
-    [set([1, 2]), set([3, 4]), set([5, 6])]
+    [[1, 2], [3, 4], [5, 6]]
     """
     levels_by_name = {}
-    names_by_level = defaultdict(set)
+    names_by_level = defaultdict(list)
 
     def add_level_to_name(name, level):
         levels_by_name[name] = level
-        names_by_level[level].add(name)
+        names_by_level[level].append(name)
 
 
     def walk_depth_first(name):
@@ -647,6 +647,35 @@ def walk_depth_first(name):
                                     (names_by_level.get(i, None)
                                      for i in itertools.count())))
 
+
+def is_cyclic(graph):
+    """
+    Return True if the directed graph g has a cycle. The directed graph
+    should be represented as adictionary mapping of edges for each node.
+    """
+    path = set()
+
+    def visit(vertex):
+        path.add(vertex)
+        for neighbour in graph.get(vertex, ()):
+            if neighbour in path or visit(neighbour):
+                return True
+        path.remove(vertex)
+        return False
+
+    return any(visit(v) for v in graph)
+
+
+def one_to_one(graph, nodes):
+    """
+    Return True if graph contains only one to one mappings. The
+    directed graph should be represented as a dictionary mapping of
+    edges for each node. Nodes should be passed a simple list.
+    """
+    edges = itertools.chain.from_iterable(graph.values())
+    return len(graph) == len(nodes) and len(set(edges)) == len(nodes)
+
+
 def get_overlay_spec(o, k, v):
     """
     Gets the type.group.label + key spec from an Element in an Overlay.
@@ -996,3 +1025,13 @@ def dt64_to_dt(dt64):
     """
     ts = (dt64 - np.datetime64('1970-01-01T00:00:00Z')) / np.timedelta64(1, 's')
     return dt.datetime.utcfromtimestamp(ts)
+
+
+def is_nan(x):
+    """
+    Checks whether value is NaN on arbitrary types
+    """
+    try:
+        return np.isnan(x)
+    except:
+        return False
diff --git a/holoviews/element/raster.py b/holoviews/element/raster.py
@@ -14,7 +14,7 @@
 from ..core.util import pd
 from .chart import Curve
 from .tabular import Table
-from .util import compute_edges, toarray
+from .util import compute_edges, toarray, categorical_aggregate2d
 
 try:
     from ..core.data import PandasInterface
@@ -365,16 +365,14 @@ def dimension_values(self, dimension, expanded=True, flat=True):
             return super(QuadMesh, self).dimension_values(idx)
 
 
-
 class HeatMap(Dataset, Element2D):
     """
     HeatMap is an atomic Element used to visualize two dimensional
     parameter spaces. It supports sparse or non-linear spaces, dynamically
     upsampling them to a dense representation, which can be visualized.
 
     A HeatMap can be initialized with any dict or NdMapping type with
-    two-dimensional keys. Once instantiated the dense representation is
-    available via the .data property.
+    two-dimensional keys.
     """
 
     group = param.String(default='HeatMap', constant=True)
@@ -383,85 +381,16 @@ class HeatMap(Dataset, Element2D):
 
     vdims = param.List(default=[Dimension('z')])
 
-    def __init__(self, data, extents=None, **params):
+    def __init__(self, data, **params):
         super(HeatMap, self).__init__(data, **params)
-        data, self.raster = self._compute_raster()
-        self.data = data.data
-        self.interface = data.interface
-        self.depth = 1
-        if extents is None:
-            (d1, d2) = self.raster.shape[:2]
-            self.extents = (0, 0, d2, d1)
-        else:
-            self.extents = extents
-
-
-    def _compute_raster(self):
-        if self.interface.gridded:
-            return self, np.flipud(self.dimension_values(2, flat=False))
-        d1keys = self.dimension_values(0, False)
-        d2keys = self.dimension_values(1, False)
-        coords = [(d1, d2, np.NaN) for d1 in d1keys for d2 in d2keys]
-        dtype = 'dataframe' if pd else 'dictionary'
-        dense_data = Dataset(coords, kdims=self.kdims, vdims=self.vdims, datatype=[dtype])
-        concat_data = self.interface.concatenate([dense_data, Dataset(self)], datatype=dtype)
-        with warnings.catch_warnings():
-            warnings.filterwarnings('ignore', r'Mean of empty slice')
-            data = concat_data.aggregate(self.kdims, np.nanmean)
-        array = data.dimension_values(2).reshape(len(d1keys), len(d2keys))
-        return data, np.flipud(array.T)
-
-
-    def __setstate__(self, state):
-        if '_data' in state:
-            data = state['_data']
-            if isinstance(data, NdMapping):
-                items = [tuple(k)+((v,) if np.isscalar(v) else tuple(v))
-                         for k, v in data.items()]
-                kdims = state['kdims'] if 'kdims' in state else self.kdims
-                vdims = state['vdims'] if 'vdims' in state else self.vdims
-                data = Dataset(items, kdims=kdims, vdims=vdims).data
-            elif isinstance(data, Dataset):
-                data = data.data
-                kdims = data.kdims
-                vdims = data.vdims
-            state['data'] = data
-            state['kdims'] = kdims
-            state['vdims'] = vdims
-        self.__dict__ = state
-
-        if isinstance(self.data, NdElement):
-            self.interface = NdElementInterface
-        elif isinstance(self.data, np.ndarray):
-            self.interface = ArrayInterface
-        elif util.is_dataframe(self.data):
-            self.interface = PandasInterface
-        elif isinstance(self.data, dict):
-            self.interface = DictInterface
-        self.depth = 1
-        data, self.raster = self._compute_raster()
-        self.interface = data.interface
-        self.data = data.data
-        if 'extents' not in state:
-            (d1, d2) = self.raster.shape[:2]
-            self.extents = (0, 0, d2, d1)
-
-        super(HeatMap, self).__setstate__(state)
-
-    def dense_keys(self):
-        d1keys = self.dimension_values(0, False)
-        d2keys = self.dimension_values(1, False)
-        return list(zip(*[(d1, d2) for d1 in d1keys for d2 in d2keys]))
-
-
-    def dframe(self, dense=False):
-        if dense:
-            keys1, keys2 = self.dense_keys()
-            dense_map = self.clone({(k1, k2): self._data.get((k1, k2), np.NaN)
-                                 for k1, k2 in product(keys1, keys2)})
-            return dense_map.dframe()
-        return super(HeatMap, self).dframe()
+        self.gridded = categorical_aggregate2d(self)
 
+    @property
+    def raster(self):
+        self.warning("The .raster attribute on HeatMap is deprecated, "
+                     "the 2D aggregate is now computed dynamically "
+                     "during plotting.")
+        return self.gridded.dimension_values(2, flat=False)
 
 
 class Image(SheetCoordinateSystem, Raster):

diff --git a/holoviews/element/util.py b/holoviews/element/util.py
@@ -1,10 +1,24 @@
+import itertools
+
+import param
 import numpy as np
 
+from ..core import Dataset, OrderedDict
+from ..core.operation import ElementOperation
+from ..core.util import (pd, is_nan, sort_topologically,
+                         cartesian_product, is_cyclic, one_to_one)
+
 try:
     import dask
 except:
     dask = None
 
+try:
+    import xarray as xr
+except:
+    xr = None
+
+
 def toarray(v, index_value=False):
     """
     Interface helper function to turn dask Arrays into numpy arrays as
@@ -30,3 +44,114 @@ def compute_edges(edges):
         raise ValueError('Centered bins have to be of equal width.')
     edges -= width/2.
     return np.concatenate([edges, [edges[-1]+width]])
+
+
+def reduce_fn(x):
+    """
+    Aggregation function to get the first non-zero value.
+    """
+    values = x.values if pd and isinstance(x, pd.Series) else x
+    for v in values:
+        if not is_nan(v):
+            return v
+    return np.NaN
+
+
+class categorical_aggregate2d(ElementOperation):
+    """
+    Generates a gridded Dataset of 2D aggregate arrays indexed by the
+    first two dimensions of the passed Element, turning all remaining
+    dimensions into value dimensions. The key dimensions of the
+    gridded array are treated as categorical indices. Useful for data
+    indexed by two independent categorical variables such as a table
+    of population values indexed by country and year. Data that is
+    indexed by continuous dimensions should be binned before
+    aggregation. The aggregation will retain the global sorting order
+    of both dimensions.
+
+    >> table = Table([('USA', 2000, 282.2), ('UK', 2005, 58.89)],
+                     kdims=['Country', 'Year'], vdims=['Population'])
+    >> categorical_aggregate2d(table)
+    Dataset({'Country': ['USA', 'UK'], 'Year': [2000, 2005],
+             'Population': [[ 282.2 , np.NaN], [np.NaN,   58.89]]},
+            kdims=['Country', 'Year'], vdims=['Population'])
+    """
+
+    datatype = param.List(['xarray', 'grid'] if xr else ['grid'], doc="""
+        The grid interface types to use when constructing the gridded Dataset.""")
+
+    def _get_coords(self, obj):
+        """
+        Get the coordinates of the 2D aggregate, maintaining the correct
+        sorting order.
+        """
+        xdim, ydim = obj.dimensions(label=True)[:2]
+        xcoords = obj.dimension_values(xdim, False)
+        ycoords = obj.dimension_values(ydim, False)
+
+        # Determine global orderings of y-values using topological sort
+        grouped = obj.groupby(xdim, container_type=OrderedDict,
+                              group_type=Dataset).values()
+        orderings = OrderedDict()
+        for group in grouped:
+            vals = group.dimension_values(ydim)
+            if len(vals) == 1:
+                orderings[vals[0]] = [vals[0]]
+            else:
+                for i in range(len(vals)-1):
+                    p1, p2 = vals[i:i+2]
+                    orderings[p1] = [p2]
+        if one_to_one(orderings, ycoords):
+            ycoords = np.sort(ycoords)
+        elif not is_cyclic(orderings):
+            ycoords = list(itertools.chain(*sort_topologically(orderings)))
+        return xcoords, ycoords
+
+
+    def _aggregate_dataset(self, obj, xcoords, ycoords):
+        """
+        Generates a gridded Dataset from a column-based dataset and
+        lists of xcoords and ycoords
+        """
+        dim_labels = obj.dimensions(label=True)
+        vdims = obj.dimensions()[2:]
+        xdim, ydim = dim_labels[:2]
+        shape = (len(ycoords), len(xcoords))
+        nsamples = np.product(shape)
+
+        ys, xs = cartesian_product([ycoords, xcoords])
+        data = {xdim: xs.flatten(), ydim: ys.flatten()}
+        for vdim in vdims:
+            values = np.empty(nsamples)
+            values[:] = np.NaN
+            data[vdim.name] = values
+        dtype = 'dataframe' if pd else 'dictionary'
+        dense_data = Dataset(data, kdims=obj.kdims, vdims=obj.vdims, datatype=[dtype])
+        concat_data = obj.interface.concatenate([dense_data, Dataset(obj)], datatype=dtype)
+        agg = concat_data.reindex([xdim, ydim]).aggregate([xdim, ydim], reduce_fn)
+
+        # Convert data to a gridded dataset
+        grid_data = {xdim: xcoords, ydim: ycoords}
+        for vdim in vdims:
+            grid_data[vdim.name] = agg.dimension_values(vdim).reshape(shape)
+        return agg.clone(grid_data, datatype=self.p.datatype)
+
+
+    def _process(self, obj, key=None):
+        """
+        Generates a categorical 2D aggregate by inserting NaNs at all
+        cross-product locations that do not already have a value assigned.
+        Returns a 2D gridded Dataset object.
+        """
+        if isinstance(obj, Dataset) and obj.interface.gridded:
+            return obj
+        elif obj.ndims > 2:
+            raise ValueError("Cannot aggregate more than two dimensions")
+        elif len(obj.dimensions()) < 3:
+            raise ValueError("Must have at two dimensions to aggregate over"
+                             "and one value dimension to aggregate on.")
+
+        if not isinstance(obj, Dataset):
+            obj = Dataset(obj)
+        xcoords, ycoords = self._get_coords(obj)
+        return self._aggregate_dataset(obj, xcoords, ycoords)
diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py
@@ -15,6 +15,7 @@
 from ..element.chart import Histogram, Scatter
 from ..element.raster import Raster, Image, RGB, QuadMesh
 from ..element.path import Contours, Polygons
+from ..element.util import categorical_aggregate2d
 from ..streams import RangeXY
 
 column_interfaces = [ArrayInterface, DictInterface]

diff --git a/holoviews/plotting/bokeh/raster.py b/holoviews/plotting/bokeh/raster.py
@@ -1,7 +1,13 @@
 import numpy as np
 import param
 
-from ...core.util import cartesian_product
+from bokeh.models.mappers import LinearColorMapper
+try:
+    from bokeh.models.mappers import LogColorMapper
+except ImportError:
+    LogColorMapper = None
+
+from ...core.util import cartesian_product, is_nan, unique_array
 from ...element import Image, Raster, RGB
 from ..renderer import SkipRendering
 from ..util import map_colors
@@ -130,27 +136,31 @@ class HeatmapPlot(ColorbarPlot):
     def _axes_props(self, plots, subplots, element, ranges):
         dims = element.dimensions()
         labels = self._get_axis_labels(dims)
-        xvals, yvals = [element.dimension_values(i, False)
-                        for i in range(2)]
+        agg = element.gridded
+        xvals, yvals = [agg.dimension_values(i, False) for i in range(2)]
         if self.invert_yaxis: yvals = yvals[::-1]
         plot_ranges = {'x_range': [str(x) for x in xvals],
                        'y_range': [str(y) for y in yvals]}
         return ('auto', 'auto'), labels, plot_ranges
 
-
     def get_data(self, element, ranges=None, empty=False):
-        x, y, z = element.dimensions(label=True)
+        x, y, z = element.dimensions(label=True)[:3]
+        aggregate = element.gridded
         style = self.style[self.cyclic_index]
         cmapper = self._get_colormapper(element.vdims[0], element, ranges, style)
         if empty:
-            data = {x: [], y: [], z: [], 'color': []}
+            data = {x: [], y: [], z: []}
         else:
-            zvals = np.rot90(element.raster, 3).flatten()
-            xvals, yvals = [[str(v) for v in element.dimension_values(i)]
+            zvals = aggregate.dimension_values(z)
+            xvals, yvals = [[str(v) for v in aggregate.dimension_values(i)]
                             for i in range(2)]
-            data = {x: xvals, y: yvals, z: zvals}
+            data = {x: xvals, y: yvals, 'zvalues': zvals}
 
-        return (data, {'x': x, 'y': y, 'fill_color': {'field': z, 'transform': cmapper},
+        if 'hover' in self.tools+self.default_tools:
+            for vdim in element.vdims:
+                data[vdim.name] = ['-' if is_nan(v) else vdim.pprint_value(v)
+                                   for v in aggregate.dimension_values(vdim)]
+        return (data, {'x': x, 'y': y, 'fill_color': {'field': 'zvalues', 'transform': cmapper},
                        'height': 1, 'width': 1})