Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix histogram operation on Ibis data #5929

Merged
merged 9 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ def ibis4():
return ibis_version() >= Version("4.0")


@lru_cache
def ibis5():
return ibis_version() >= Version("5.0")
hoxbro marked this conversation as resolved.
Show resolved Hide resolved


class IbisInterface(Interface):

types = ()
Expand Down Expand Up @@ -163,16 +168,16 @@ def histogram(cls, expr, bins, density=True, weights=None):
else:
# sort_by will be removed in Ibis 5.0
hist_bins = binned.value_counts().sort_by('bucket').execute()

for b, v in zip(hist_bins['bucket'], hist_bins['count']):
metric_name = 'bucket_count' if ibis5() else 'count'
for b, v in zip(hist_bins['bucket'], hist_bins[metric_name]):
if np.isnan(b):
continue
hist[int(b)] = v
if weights is not None:
raise NotImplementedError("Weighted histograms currently "
"not implemented for IbisInterface.")
if density:
hist = hist/expr.count().execute()
hist = hist/expr.count().execute()/np.diff(bins)
hoxbro marked this conversation as resolved.
Show resolved Hide resolved
return hist, bins

@classmethod
Expand Down
8 changes: 7 additions & 1 deletion holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,10 +737,16 @@ def _process(self, element, key=None):

# Mask data
if is_ibis_expr(data):
from ..core.data.ibis import ibis5

mask = data.notnull()
if self.p.nonzero:
mask = mask & (data != 0)
data = data.to_projection()
if ibis5():
data = data.as_table()
else:
# to_projection removed in ibis 5.0.0
data = data.to_projection()
data = data[mask]
no_data = not len(data.head(1).execute())
data = data[dim.name]
Expand Down
11 changes: 11 additions & 0 deletions holoviews/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

from panel.tests.conftest import server_cleanup, port, pytest_addoption, pytest_configure, optional_markers # noqa


Expand Down Expand Up @@ -26,3 +28,12 @@ def pytest_collection_modifyitems(config, items):
dask.config.set({"dataframe.convert-string": False})
except Exception:
pass


@pytest.fixture
def ibis_sqlite_backend():
import ibis
old = ibis.get_backend()
ibis.set_backend('sqlite')
yield
ibis.set_backend(old)
41 changes: 41 additions & 0 deletions holoviews/tests/operation/test_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
except ImportError:
da = None

try:
import ibis
except ImportError:
ibis = None

from holoviews import (HoloMap, NdOverlay, NdLayout, GridSpace, Image,
Contours, Polygons, Points, Histogram, Curve, Area,
QuadMesh, Dataset, renderer)
Expand All @@ -20,6 +25,7 @@
interpolate_curve, decimate)

da_skip = skipIf(da is None, "dask.array is not available")
ibis_skip = skipIf(ibis is None, "ibis is not available")


class OperationTests(ComparisonTestCase):
Expand Down Expand Up @@ -179,6 +185,41 @@
self.assertIsInstance(op_hist.data['y'], da.Array)
self.assertEqual(op_hist, hist)

@ibis_skip
@pytest.mark.usefixtures('ibis_sqlite_backend')
def test_dataset_histogram_ibis(self):
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, dimension='x', num_bins=3, normed=True)

hist = Histogram(([0, 3, 6, 9], [0.1, 0.1, 0.133333]),
vdims=('x_frequency', 'Frequency'))
self.assertEqual(op_hist, hist)

@ibis_skip
@pytest.mark.usefixtures('ibis_sqlite_backend')
def test_dataset_cumulative_histogram_ibis(self):
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, num_bins=3, cumulative=True, normed=True)

hist = Histogram(([0, 3, 6, 9], [0.3, 0.6, 1]),
vdims=('x_frequency', 'Frequency'))
self.assertEqual(op_hist, hist)

def test_dataset_histogram_explicit_bins_ibis(self):
hoxbro marked this conversation as resolved.
Show resolved Hide resolved
df = pd.DataFrame(dict(x=np.arange(10)))
t = ibis.memtable(df, name='t')

Check failure on line 214 in holoviews/tests/operation/test_operation.py

View workflow job for this annotation

GitHub Actions / Core tests on Python 3.11, ubuntu-latest

OperationTests.test_dataset_histogram_explicit_bins_ibis AttributeError: 'NoneType' object has no attribute 'memtable'
ds = Dataset(t, vdims='x')
op_hist = histogram(ds, bins=[0, 1, 3], normed=False)

Check failure on line 216 in holoviews/tests/operation/test_operation.py

View workflow job for this annotation

GitHub Actions / Unit tests on Python 3.8, ubuntu-latest

OperationTests.test_dataset_histogram_explicit_bins_ibis ibis.common.exceptions.IbisError: You have used a function that relies on the default backend, but the default backend (DuckDB) is not installed. You may specify an alternate backend to use, e.g. ibis.set_backend("polars") or to install the DuckDB backend, run: pip install 'ibis-framework[duckdb]' or conda install -c conda-forge ibis-framework For more information on available backends, visit https://ibis-project.org/install

Check failure on line 216 in holoviews/tests/operation/test_operation.py

View workflow job for this annotation

GitHub Actions / Unit tests on Python 3.11, ubuntu-latest

OperationTests.test_dataset_histogram_explicit_bins_ibis ibis.common.exceptions.IbisError: You have used a function that relies on the default backend, but the default backend (DuckDB) is not installed. You may specify an alternate backend to use, e.g. ibis.set_backend("polars") or to install the DuckDB backend, run: pip install 'ibis-framework[duckdb]' or conda install -c conda-forge ibis-framework For more information on available backends, visit https://ibis-project.org/install

Check failure on line 216 in holoviews/tests/operation/test_operation.py

View workflow job for this annotation

GitHub Actions / Unit tests on Python 3.9, ubuntu-latest

OperationTests.test_dataset_histogram_explicit_bins_ibis ibis.common.exceptions.IbisError: You have used a function that relies on the default backend, but the default backend (DuckDB) is not installed. You may specify an alternate backend to use, e.g. ibis.set_backend("polars") or to install the DuckDB backend, run: pip install 'ibis-framework[duckdb]' or conda install -c conda-forge ibis-framework For more information on available backends, visit https://ibis-project.org/install

Check failure on line 216 in holoviews/tests/operation/test_operation.py

View workflow job for this annotation

GitHub Actions / Unit tests on Python 3.10, ubuntu-latest

OperationTests.test_dataset_histogram_explicit_bins_ibis ibis.common.exceptions.IbisError: You have used a function that relies on the default backend, but the default backend (DuckDB) is not installed. You may specify an alternate backend to use, e.g. ibis.set_backend("polars") or to install the DuckDB backend, run: pip install 'ibis-framework[duckdb]' or conda install -c conda-forge ibis-framework For more information on available backends, visit https://ibis-project.org/install

hist = Histogram(([0, 1, 3], [1, 3]),
vdims=('x_count', 'Count'))
self.assertEqual(op_hist, hist)


def test_points_histogram_bin_range(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, bin_range=(0, 3), normed=True)
Expand Down
Loading