Allow aggregate to rasterize to target Image gridding (#1513)

* Allow aggregate to rasterize to target Image gridding * Datashader coordinates now handled correctly * Added optional datashader unit tests * Install datashader from bokeh channel
holoviz · Jun 4, 2017 · 03b6edc · 03b6edc
1 parent 1e06adb
commit 03b6edc
Show file tree

Hide file tree

Showing 3 changed files with 107 additions and 28 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -27,9 +27,10 @@ install:
   - conda update -q conda
   # Useful for debugging any issues with conda
   - conda info -a
-  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scipy=0.18.1 numpy freetype nose bokeh=0.12.5 pandas=0.19.2 jupyter ipython=4.2.0 param pyqt=4 matplotlib=1.5.1 xarray datashader dask=0.13
+  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scipy=0.18.1 numpy freetype nose bokeh=0.12.5 pandas=0.19.2 jupyter ipython=4.2.0 param pyqt=4 matplotlib=1.5.1 xarray
   - source activate test-environment
   - conda install -c conda-forge  iris sip=4.18 plotly flexx
+  - conda install -c bokeh datashader dask=0.13
   - if [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
       conda install python=3.4.3;
     fi

diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py
@@ -109,6 +109,11 @@ class aggregate(Operation):
     y_sampling = param.Number(default=None, doc="""
         Specifies the smallest allowed sampling interval along the y-axis.""")
 
+    target = param.ClassSelector(class_=Image, doc="""
+        A target Image which defines the desired x_range, y_range,
+        width and height.
+    """)
+
     streams = param.List(default=[PlotSize, RangeXY], doc="""
         List of streams that are applied if dynamic=True, allowing
         for dynamic interaction with the plot.""")
@@ -185,6 +190,34 @@ def get_agg_data(cls, obj, category=None):
         return x, y, Dataset(df, kdims=kdims, vdims=vdims), glyph
 
 
+    def _get_sampling(self, element, x, y):
+        target = self.p.target
+        if target:
+            x_range, y_range = target.range(x), target.range(y)
+            height, width = target.dimension_values(2, flat=False).shape
+        else:
+            if x is None or y is None:
+                x_range = self.p.x_range or (-0.5, 0.5)
+                y_range = self.p.y_range or (-0.5, 0.5)
+            else:
+                x_range = self.p.x_range or element.range(x)
+                y_range = self.p.y_range or element.range(y)
+            width, height = self.p.width, self.p.height
+        (xstart, xend), (ystart, yend) = x_range, y_range
+
+        # Compute highest allowed sampling density
+        xspan = xend - xstart
+        yspan = yend - ystart
+        if self.p.x_sampling:
+            width = int(min([(xspan/self.p.x_sampling), width]))
+        if self.p.y_sampling:
+            height = int(min([(yspan/self.p.y_sampling), height]))
+        xunit, yunit = float(xspan)/width, float(yspan)/height
+        xs, ys = (np.linspace(xstart+xunit/2., xend-xunit/2., width),
+                  np.linspace(ystart+yunit/2., yend-yunit/2., height))
+        return (x_range, y_range), (xs, ys), (width, height)
+
+
     def _aggregate_ndoverlay(self, element, agg_fn):
         """
         Optimized aggregation for NdOverlay objects by aggregating each
@@ -197,10 +230,9 @@ def _aggregate_ndoverlay(self, element, agg_fn):
         """
         # Compute overall bounds
         x, y = element.last.dimensions()[0:2]
-        xstart, xend = self.p.x_range if self.p.x_range else element.range(x)
-        ystart, yend = self.p.y_range if self.p.y_range else element.range(y)
+        (x_range, y_range), (xs, ys), (width, height) = self._get_sampling(element, x, y)
         agg_params = dict({k: v for k, v in self.p.items() if k in aggregate.params()},
-                          x_range=(xstart, xend), y_range=(ystart, yend))
+                          x_range=x_range, y_range=y_range)
 
         # Optimize categorical counts by aggregating them individually
         if isinstance(agg_fn, ds.count_cat):
@@ -267,30 +299,15 @@ def _process(self, element, key=None):
             return self._aggregate_ndoverlay(element, agg_fn)
 
         x, y, data, glyph = self.get_agg_data(element, category)
+        (x_range, y_range), (xs, ys), (width, height) = self._get_sampling(element, x, y)
 
         if x is None or y is None:
-            x0, x1 = self.p.x_range or (-0.5, 0.5)
-            y0, y1 = self.p.y_range or (-0.5, 0.5)
-            xc = np.linspace(x0, x1, self.p.width)
-            yc = np.linspace(y0, y1, self.p.height)
-            xarray = xr.DataArray(np.full((self.p.height, self.p.width), np.NaN, dtype=np.float32),
-                                  dims=['y', 'x'], coords={'x': xc, 'y': yc})
+            xarray = xr.DataArray(np.full((height, width), np.NaN, dtype=np.float32),
+                                  dims=['y', 'x'], coords={'x': xs, 'y': ys})
             return self.p.element_type(xarray)
 
-        xstart, xend = self.p.x_range if self.p.x_range else data.range(x)
-        ystart, yend = self.p.y_range if self.p.y_range else data.range(y)
-
-        # Compute highest allowed sampling density
-        width, height = self.p.width, self.p.height
-        if self.p.x_sampling:
-            x_range = xend - xstart
-            width = int(min([(x_range/self.p.x_sampling), width]))
-        if self.p.y_sampling:
-            y_range = yend - ystart
-            height = int(min([(y_range/self.p.y_sampling), height]))
-
         cvs = ds.Canvas(plot_width=width, plot_height=height,
-                        x_range=(xstart, xend), y_range=(ystart, yend))
+                        x_range=x_range, y_range=y_range)
 
         column = agg_fn.column
         if column and isinstance(agg_fn, ds.count_cat):
@@ -304,12 +321,14 @@ def _process(self, element, key=None):
 
         agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
         if agg.ndim == 2:
-            return self.p.element_type(agg, **params)
+            # Replacing x and y coordinates to avoid numerical precision issues
+            return self.p.element_type((xs, ys, agg.data), **params)
         else:
-            return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
-                                                     **params)
-                              for c in agg.coords[column].data},
-                             kdims=[data.get_dimension(column)])
+            layers = {}
+            for c in agg.coords[column].data:
+                cagg = agg.sel(**{column: c})
+                layers[c] = self.p.element_type((xs, ys, cagg.data), **params)
+            return NdOverlay(layers, kdims=[data.get_dimension(column)])
 
 
 

diff --git a/tests/testdatashader.py b/tests/testdatashader.py
@@ -0,0 +1,59 @@
+from nose.plugins.attrib import attr
+
+import pandas as pd
+import numpy as np
+
+from holoviews import Curve, Scatter, Points, Image, Dataset
+from holoviews.element.comparison import ComparisonTestCase
+
+try:
+    from holoviews.operation.datashader import aggregate
+except:
+    aggregate = None
+
+
+@attr(optional=1)
+class DatashaderTests(ComparisonTestCase):
+    """
+    Tests for datashader aggregation
+    """
+
+    def test_aggregate_points(self):
+        points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
+        img = aggregate(points, dynamic=False,  x_range=(0, 1), y_range=(0, 1),
+                        width=2, height=2)
+        expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
+                         vdims=['Count'])
+        self.assertEqual(img, expected)
+
+    def test_aggregate_points_target(self):
+        points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
+        expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
+                         vdims=['Count'])
+        img = aggregate(points, dynamic=False,  target=expected)
+        self.assertEqual(img, expected)
+
+    def test_aggregate_points_sampling(self):
+        points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
+        expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
+                         vdims=['Count'])
+        img = aggregate(points, dynamic=False,  x_range=(0, 1), y_range=(0, 1),
+                        x_sampling=0.5, y_sampling=0.5)
+        self.assertEqual(img, expected)
+
+    def test_aggregate_curve(self):
+        curve = Curve([(0.2, 0.3), (0.4, 0.7), (0.8, 0.99)])
+        expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [1, 1]]),
+                         vdims=['Count'])
+        img = aggregate(curve, dynamic=False,  x_range=(0, 1), y_range=(0, 1),
+                        width=2, height=2)
+        self.assertEqual(img, expected)
+
+    def test_aggregate_ndoverlay(self):
+        ds = Dataset([(0.2, 0.3, 0), (0.4, 0.7, 1), (0, 0.99, 2)], kdims=['x', 'y', 'z'])
+        ndoverlay = ds.to(Points, ['x', 'y'], [], 'z').overlay()
+        expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
+                         vdims=['Count'])
+        img = aggregate(ndoverlay, dynamic=False,  x_range=(0, 1), y_range=(0, 1),
+                        width=2, height=2)
+        self.assertEqual(img, expected)