Skip to content

Commit

Permalink
Allow aggregate to rasterize to target Image gridding (#1513)
Browse files Browse the repository at this point in the history
* Allow aggregate to rasterize to target Image gridding

* Datashader coordinates now handled correctly

* Added optional datashader unit tests

* Install datashader from bokeh channel
  • Loading branch information
philippjfr authored and jlstevens committed Jun 4, 2017
1 parent 1e06adb commit 03b6edc
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 28 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ install:
- conda update -q conda
# Useful for debugging any issues with conda
- conda info -a
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scipy=0.18.1 numpy freetype nose bokeh=0.12.5 pandas=0.19.2 jupyter ipython=4.2.0 param pyqt=4 matplotlib=1.5.1 xarray datashader dask=0.13
- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION scipy=0.18.1 numpy freetype nose bokeh=0.12.5 pandas=0.19.2 jupyter ipython=4.2.0 param pyqt=4 matplotlib=1.5.1 xarray
- source activate test-environment
- conda install -c conda-forge iris sip=4.18 plotly flexx
- conda install -c bokeh datashader dask=0.13
- if [[ "$TRAVIS_PYTHON_VERSION" == "3.4" ]]; then
conda install python=3.4.3;
fi
Expand Down
73 changes: 46 additions & 27 deletions holoviews/operation/datashader.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ class aggregate(Operation):
y_sampling = param.Number(default=None, doc="""
Specifies the smallest allowed sampling interval along the y-axis.""")

target = param.ClassSelector(class_=Image, doc="""
A target Image which defines the desired x_range, y_range,
width and height.
""")

streams = param.List(default=[PlotSize, RangeXY], doc="""
List of streams that are applied if dynamic=True, allowing
for dynamic interaction with the plot.""")
Expand Down Expand Up @@ -185,6 +190,34 @@ def get_agg_data(cls, obj, category=None):
return x, y, Dataset(df, kdims=kdims, vdims=vdims), glyph


def _get_sampling(self, element, x, y):
target = self.p.target
if target:
x_range, y_range = target.range(x), target.range(y)
height, width = target.dimension_values(2, flat=False).shape
else:
if x is None or y is None:
x_range = self.p.x_range or (-0.5, 0.5)
y_range = self.p.y_range or (-0.5, 0.5)
else:
x_range = self.p.x_range or element.range(x)
y_range = self.p.y_range or element.range(y)
width, height = self.p.width, self.p.height
(xstart, xend), (ystart, yend) = x_range, y_range

# Compute highest allowed sampling density
xspan = xend - xstart
yspan = yend - ystart
if self.p.x_sampling:
width = int(min([(xspan/self.p.x_sampling), width]))
if self.p.y_sampling:
height = int(min([(yspan/self.p.y_sampling), height]))
xunit, yunit = float(xspan)/width, float(yspan)/height
xs, ys = (np.linspace(xstart+xunit/2., xend-xunit/2., width),
np.linspace(ystart+yunit/2., yend-yunit/2., height))
return (x_range, y_range), (xs, ys), (width, height)


def _aggregate_ndoverlay(self, element, agg_fn):
"""
Optimized aggregation for NdOverlay objects by aggregating each
Expand All @@ -197,10 +230,9 @@ def _aggregate_ndoverlay(self, element, agg_fn):
"""
# Compute overall bounds
x, y = element.last.dimensions()[0:2]
xstart, xend = self.p.x_range if self.p.x_range else element.range(x)
ystart, yend = self.p.y_range if self.p.y_range else element.range(y)
(x_range, y_range), (xs, ys), (width, height) = self._get_sampling(element, x, y)
agg_params = dict({k: v for k, v in self.p.items() if k in aggregate.params()},
x_range=(xstart, xend), y_range=(ystart, yend))
x_range=x_range, y_range=y_range)

# Optimize categorical counts by aggregating them individually
if isinstance(agg_fn, ds.count_cat):
Expand Down Expand Up @@ -267,30 +299,15 @@ def _process(self, element, key=None):
return self._aggregate_ndoverlay(element, agg_fn)

x, y, data, glyph = self.get_agg_data(element, category)
(x_range, y_range), (xs, ys), (width, height) = self._get_sampling(element, x, y)

if x is None or y is None:
x0, x1 = self.p.x_range or (-0.5, 0.5)
y0, y1 = self.p.y_range or (-0.5, 0.5)
xc = np.linspace(x0, x1, self.p.width)
yc = np.linspace(y0, y1, self.p.height)
xarray = xr.DataArray(np.full((self.p.height, self.p.width), np.NaN, dtype=np.float32),
dims=['y', 'x'], coords={'x': xc, 'y': yc})
xarray = xr.DataArray(np.full((height, width), np.NaN, dtype=np.float32),
dims=['y', 'x'], coords={'x': xs, 'y': ys})
return self.p.element_type(xarray)

xstart, xend = self.p.x_range if self.p.x_range else data.range(x)
ystart, yend = self.p.y_range if self.p.y_range else data.range(y)

# Compute highest allowed sampling density
width, height = self.p.width, self.p.height
if self.p.x_sampling:
x_range = xend - xstart
width = int(min([(x_range/self.p.x_sampling), width]))
if self.p.y_sampling:
y_range = yend - ystart
height = int(min([(y_range/self.p.y_sampling), height]))

cvs = ds.Canvas(plot_width=width, plot_height=height,
x_range=(xstart, xend), y_range=(ystart, yend))
x_range=x_range, y_range=y_range)

column = agg_fn.column
if column and isinstance(agg_fn, ds.count_cat):
Expand All @@ -304,12 +321,14 @@ def _process(self, element, key=None):

agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator)
if agg.ndim == 2:
return self.p.element_type(agg, **params)
# Replacing x and y coordinates to avoid numerical precision issues
return self.p.element_type((xs, ys, agg.data), **params)
else:
return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}),
**params)
for c in agg.coords[column].data},
kdims=[data.get_dimension(column)])
layers = {}
for c in agg.coords[column].data:
cagg = agg.sel(**{column: c})
layers[c] = self.p.element_type((xs, ys, cagg.data), **params)
return NdOverlay(layers, kdims=[data.get_dimension(column)])



Expand Down
59 changes: 59 additions & 0 deletions tests/testdatashader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from nose.plugins.attrib import attr

import pandas as pd
import numpy as np

from holoviews import Curve, Scatter, Points, Image, Dataset
from holoviews.element.comparison import ComparisonTestCase

try:
from holoviews.operation.datashader import aggregate
except:
aggregate = None


@attr(optional=1)
class DatashaderTests(ComparisonTestCase):
"""
Tests for datashader aggregation
"""

def test_aggregate_points(self):
points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
img = aggregate(points, dynamic=False, x_range=(0, 1), y_range=(0, 1),
width=2, height=2)
expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
vdims=['Count'])
self.assertEqual(img, expected)

def test_aggregate_points_target(self):
points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
vdims=['Count'])
img = aggregate(points, dynamic=False, target=expected)
self.assertEqual(img, expected)

def test_aggregate_points_sampling(self):
points = Points([(0.2, 0.3), (0.4, 0.7), (0, 0.99)])
expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
vdims=['Count'])
img = aggregate(points, dynamic=False, x_range=(0, 1), y_range=(0, 1),
x_sampling=0.5, y_sampling=0.5)
self.assertEqual(img, expected)

def test_aggregate_curve(self):
curve = Curve([(0.2, 0.3), (0.4, 0.7), (0.8, 0.99)])
expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [1, 1]]),
vdims=['Count'])
img = aggregate(curve, dynamic=False, x_range=(0, 1), y_range=(0, 1),
width=2, height=2)
self.assertEqual(img, expected)

def test_aggregate_ndoverlay(self):
ds = Dataset([(0.2, 0.3, 0), (0.4, 0.7, 1), (0, 0.99, 2)], kdims=['x', 'y', 'z'])
ndoverlay = ds.to(Points, ['x', 'y'], [], 'z').overlay()
expected = Image(([0.25, 0.75], [0.25, 0.75], [[1, 0], [2, 0]]),
vdims=['Count'])
img = aggregate(ndoverlay, dynamic=False, x_range=(0, 1), y_range=(0, 1),
width=2, height=2)
self.assertEqual(img, expected)

0 comments on commit 03b6edc

Please sign in to comment.