diff --git a/holoviews/core/data/ibis.py b/holoviews/core/data/ibis.py index 6213b81e6d..a06b987742 100644 --- a/holoviews/core/data/ibis.py +++ b/holoviews/core/data/ibis.py @@ -41,12 +41,22 @@ def has_rowid(cls): return hasattr(ibis.expr.operations, "RowID") @classmethod - def is_rowid_zero_indexed(cls, data): + def _get_backend(cls, data): try: from ibis.client import find_backends, validate_backends (backend,) = validate_backends(list(find_backends(data))) + return backend except Exception: - backend = data._find_backend() + pass + + try: + return data._find_backend() + except ibis.common.exceptions.IbisError: + return "ibis.backends.not_found" + + @classmethod + def is_rowid_zero_indexed(cls, data): + backend = cls._get_backend(data) return type(backend).__module__ in cls.zero_indexed_backend_modules @classmethod @@ -111,7 +121,14 @@ def nonzero(cls, dataset): @cached def range(cls, dataset, dimension): dimension = dataset.get_dimension(dimension, strict=True) - if cls.dtype(dataset, dimension).kind in 'SUO': + dtype_kind = cls.dtype(dataset, dimension).kind + if dtype_kind == 'O': + # Can this be done more efficiently? + column = dataset.data[dimension.name].execute() + first = column.iloc[0] + last = column.iloc[-1] + return first, last + if dtype_kind in 'SU': return None, None if dimension.nodata is not None: return Interface.range(dataset, dimension) @@ -147,9 +164,23 @@ def values( def histogram(cls, expr, bins, density=True, weights=None): bins = numpy.asarray(bins) bins = [int(v) if bins.dtype.kind in 'iu' else float(v) for v in bins] - binned = expr.bucket(bins).name('bucket') + + # See https://github.com/ibis-project/ibis/issues/4940#issuecomment-1334181645 + df = expr.to_projection() + try: + hist_bins = ( + df + .mutate(bucket=expr.bucket(bins)) + .bucket + .value_counts() + .sort_by('bucket') + ).execute() + except NotImplementedError: + # See https://github.com/ibis-project/ibis/issues/4939 + array = expr.execute() + return numpy.histogram(array, bins=bins, density=density, weights=weights) + hist = numpy.zeros(len(bins)-1) - hist_bins = binned.value_counts().sort_by('bucket').execute() for b, v in zip(hist_bins['bucket'], hist_bins['count']): if numpy.isnan(b): continue @@ -172,7 +203,9 @@ def dtype(cls, dataset, dimension): dimension = dataset.get_dimension(dimension) return dataset.data.head(0).execute().dtypes[dimension.name] - dimension_type = dtype + @classmethod + def dimension_type(cls, dataset, dim): + return cls.dtype(dataset, dim).type @classmethod def sort(cls, dataset, by=[], reverse=False): diff --git a/holoviews/core/util.py b/holoviews/core/util.py index b1133a28dd..7fecf1eb63 100644 --- a/holoviews/core/util.py +++ b/holoviews/core/util.py @@ -891,7 +891,6 @@ def isfinite(val): return finite & (~pd.isna(val)) return finite - def isdatetime(value): """ Whether the array or scalar is recognized datetime type. diff --git a/holoviews/tests/core/data/test_ibisinterface.py b/holoviews/tests/core/data/test_ibisinterface.py index b5b192d464..9a1e6df132 100644 --- a/holoviews/tests/core/data/test_ibisinterface.py +++ b/holoviews/tests/core/data/test_ibisinterface.py @@ -1,7 +1,6 @@ import sqlite3 -from unittest import SkipTest - from tempfile import NamedTemporaryFile +from unittest import SkipTest try: import ibis @@ -9,14 +8,25 @@ except: raise SkipTest("Could not import ibis, skipping IbisInterface tests.") +try: + import duckdb +except: + raise SkipTest("Could not import duckdb, skipping IbisInterface tests.") + +from pathlib import Path + import numpy as np import pandas as pd - +import param +import pytest +from bokeh.models import axes as bokeh_axes +from holoviews import render from holoviews.core.data import Dataset -from holoviews.core.spaces import HoloMap from holoviews.core.data.ibis import IbisInterface +from holoviews.core.spaces import HoloMap +from holoviews.element.chart import Curve -from .base import HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests +from .base import HeterogeneousColumnTests, InterfaceTests, ScalarColumnTests def create_temp_db(df, name, index=False): @@ -303,3 +313,99 @@ def test_dataset_iloc_ellipsis_list_cols(self): def test_dataset_boolean_index(self): raise SkipTest("Not supported") + +def pandas_data(df: pd.DataFrame, *args, **kwargs): + return ibis.pandas.connect({"df": df}) + +def ibis_duckdb_data(df: pd.DataFrame, *args, **kwargs): + tmpdir = kwargs["tmpdir"] + filename = str(Path(tmpdir)/"db.db") + duckdb_con = duckdb.connect(filename) + duckdb_con.execute("CREATE TABLE df AS SELECT * FROM df") + + return ibis.duckdb.connect(filename) + +def ibis_sqlite_data(df: pd.DataFrame, *args, **kwargs): + return create_temp_db(df, "df") + +class IbisMemConnection(param.Parameterized): + def __init__(self, df): + super().__init__() + self._table = ibis.memtable(df) + + def table(self, df): + return self._table + +def ibis_mem_table(df: pd.DataFrame, *args, **kwargs): + return IbisMemConnection(df=df) + +@pytest.fixture +def reference_df(): + return pd.DataFrame( + { + "actual": [100, 150, 125, 140, 145, 135, 123], + "forecast": [90, 160, 125, 150, 141, 141, 120], + "numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5], + "date": pd.date_range("2022-01-03", "2022-01-09"), + "string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"], + }, + ) + +@pytest.fixture(params=[pandas_data, ibis_duckdb_data, ibis_sqlite_data, ibis_mem_table]) +def connection(request, reference_df, tmpdir): + return request.param(reference_df, tmpdir=tmpdir) + +@pytest.fixture +def data(connection): + return connection.table("df") + +@pytest.fixture +def dataset(data): + return Dataset(data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"]) + +def test_get_backend(data): + assert IbisInterface._get_backend(data) + +def test_index_ibis_table(data): + table = IbisInterface._index_ibis_table(data) + table.execute() + +@pytest.mark.parametrize(["dimension", "expected"], [ + ("date", (np.datetime64('2022-01-03'), np.datetime64('2022-01-09'))), + ("string", ('Mon', 'Sun')), + ("numerical",(np.float64(1.1), np.float64(5.5))), +]) +def test_range(dimension, expected, dataset): + assert IbisInterface.range(dataset, dimension) == expected + +@pytest.mark.parametrize(["dimension", "expected"], [ + ("date", np.datetime64), + ("string", np.object_), + ("numerical", np.float64), +]) +def test_dimension_type(dimension, expected, dataset): + assert IbisInterface.dimension_type(dataset, dimension) is expected + +def test_histogram(data): + expr = data[data.actual.notnull()].actual + bins = [90.0, 113.33333333333333, 136.66666666666666, 160.0] + result = IbisInterface.histogram(expr, bins, density=False) + np.testing.assert_array_equal(result[0], np.array([1, 3, 3])) + np.testing.assert_array_equal(result[1], np.array(bins)) + +@pytest.mark.parametrize(["kdims", "vdims", "xaxis_type", "yaxis_type"], [ + ("date", "actual", bokeh_axes.DatetimeAxis, bokeh_axes.LinearAxis), + ("string", "actual", bokeh_axes.CategoricalAxis, bokeh_axes.LinearAxis), + ("numerical", "actual", bokeh_axes.LinearAxis, bokeh_axes.LinearAxis), + ("numerical", "date", bokeh_axes.LinearAxis, bokeh_axes.DatetimeAxis), + ("numerical", "string", bokeh_axes.LinearAxis, bokeh_axes.CategoricalAxis), + ]) +def test_bokeh_axis(data, kdims, vdims, xaxis_type, yaxis_type): + """Test to make sure the right axis can be identified for the bokeh backend""" + plot_ibis = Curve(data, kdims=kdims, vdims=vdims) + # When + plot_bokeh = render(plot_ibis, "bokeh") + xaxis, yaxis = plot_bokeh.axis + # Then + assert isinstance(xaxis, xaxis_type) + assert isinstance(yaxis, yaxis_type)