Skip to content

Commit

Permalink
Fix histogram operation on Ibis data (#5929)
Browse files Browse the repository at this point in the history
  • Loading branch information
maximlt authored Oct 9, 2023
1 parent adbfe2c commit 8ba691a
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 4 deletions.
11 changes: 8 additions & 3 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ def ibis4():
return ibis_version() >= Version("4.0")


@lru_cache
def ibis5():
return ibis_version() >= Version("5.0")


class IbisInterface(Interface):

types = ()
Expand Down Expand Up @@ -163,16 +168,16 @@ def histogram(cls, expr, bins, density=True, weights=None):
else:
# sort_by will be removed in Ibis 5.0
hist_bins = binned.value_counts().sort_by('bucket').execute()

for b, v in zip(hist_bins['bucket'], hist_bins['count']):
metric_name = 'bucket_count' if ibis5() else 'count'
for b, v in zip(hist_bins['bucket'], hist_bins[metric_name]):
if np.isnan(b):
continue
hist[int(b)] = v
if weights is not None:
raise NotImplementedError("Weighted histograms currently "
"not implemented for IbisInterface.")
if density:
hist = hist/expr.count().execute()
hist = hist/expr.count().execute()/np.diff(bins)
return hist, bins

@classmethod
Expand Down
8 changes: 7 additions & 1 deletion holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,10 +737,16 @@ def _process(self, element, key=None):

# Mask data
if is_ibis_expr(data):
from ..core.data.ibis import ibis5

mask = data.notnull()
if self.p.nonzero:
mask = mask & (data != 0)
data = data.to_projection()
if ibis5():
data = data.as_table()
else:
# to_projection removed in ibis 5.0.0
data = data.to_projection()
data = data[mask]
no_data = not len(data.head(1).execute())
data = data[dim.name]
Expand Down
14 changes: 14 additions & 0 deletions holoviews/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from panel.tests.conftest import server_cleanup, port, pytest_addoption, pytest_configure, optional_markers # noqa


Expand Down Expand Up @@ -26,3 +28,15 @@ def pytest_collection_modifyitems(config, items):
dask.config.set({"dataframe.convert-string": False})
except Exception:
pass


@pytest.fixture
def ibis_sqlite_backend():
try:
import ibis
except ImportError:
yield None
else:
ibis.set_backend('sqlite')
yield
ibis.set_backend(None)
43 changes: 43 additions & 0 deletions holoviews/tests/operation/test_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
except ImportError:
da = None

try:
import ibis
except ImportError:
ibis = None

from holoviews import (HoloMap, NdOverlay, NdLayout, GridSpace, Image,
Contours, Polygons, Points, Histogram, Curve, Area,
QuadMesh, Dataset, renderer)
Expand All @@ -20,6 +25,7 @@
interpolate_curve, decimate)

da_skip = skipIf(da is None, "dask.array is not available")
ibis_skip = skipIf(ibis is None, "ibis is not available")


class OperationTests(ComparisonTestCase):
Expand Down Expand Up @@ -179,6 +185,43 @@ def test_dataset_weighted_histogram_dask(self):
self.assertIsInstance(op_hist.data['y'], da.Array)
self.assertEqual(op_hist, hist)

@ibis_skip
@pytest.mark.usefixtures('ibis_sqlite_backend')
def test_dataset_histogram_ibis(self):
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, dimension='x', num_bins=3, normed=True)

hist = Histogram(([0, 3, 6, 9], [0.1, 0.1, 0.133333]),
vdims=('x_frequency', 'Frequency'))
self.assertEqual(op_hist, hist)

@ibis_skip
@pytest.mark.usefixtures('ibis_sqlite_backend')
def test_dataset_cumulative_histogram_ibis(self):
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, num_bins=3, cumulative=True, normed=True)

hist = Histogram(([0, 3, 6, 9], [0.3, 0.6, 1]),
vdims=('x_frequency', 'Frequency'))
self.assertEqual(op_hist, hist)

@ibis_skip
@pytest.mark.usefixtures('ibis_sqlite_backend')
def test_dataset_histogram_explicit_bins_ibis(self):
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, bins=[0, 1, 3], normed=False)

hist = Histogram(([0, 1, 3], [1, 3]),
vdims=('x_count', 'Count'))
self.assertEqual(op_hist, hist)


def test_points_histogram_bin_range(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, bin_range=(0, 3), normed=True)
Expand Down

0 comments on commit 8ba691a

Please sign in to comment.