Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the ibis histogram #5531

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
45 changes: 39 additions & 6 deletions holoviews/core/data/ibis.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,22 @@ def has_rowid(cls):
return hasattr(ibis.expr.operations, "RowID")

@classmethod
def is_rowid_zero_indexed(cls, data):
def _get_backend(cls, data):
try:
from ibis.client import find_backends, validate_backends
(backend,) = validate_backends(list(find_backends(data)))
return backend
except Exception:
backend = data._find_backend()
pass

try:
return data._find_backend()
except ibis.common.exceptions.IbisError:
return "ibis.backends.not_found"

@classmethod
def is_rowid_zero_indexed(cls, data):
backend = cls._get_backend(data)
return type(backend).__module__ in cls.zero_indexed_backend_modules

@classmethod
Expand Down Expand Up @@ -111,7 +121,14 @@ def nonzero(cls, dataset):
@cached
def range(cls, dataset, dimension):
dimension = dataset.get_dimension(dimension, strict=True)
if cls.dtype(dataset, dimension).kind in 'SUO':
dtype_kind = cls.dtype(dataset, dimension).kind
if dtype_kind == 'O':
# Can this be done more efficiently?
column = dataset.data[dimension.name].execute()
first = column.iloc[0]
last = column.iloc[-1]
return first, last
if dtype_kind in 'SU':
return None, None
if dimension.nodata is not None:
return Interface.range(dataset, dimension)
Expand Down Expand Up @@ -147,9 +164,23 @@ def values(
def histogram(cls, expr, bins, density=True, weights=None):
bins = numpy.asarray(bins)
bins = [int(v) if bins.dtype.kind in 'iu' else float(v) for v in bins]
binned = expr.bucket(bins).name('bucket')

# See https://github.com/ibis-project/ibis/issues/4940#issuecomment-1334181645
df = expr.to_projection()
try:
hist_bins = (
df
.mutate(bucket=expr.bucket(bins))
.bucket
.value_counts()
.sort_by('bucket')
).execute()
except NotImplementedError:
# See https://github.com/ibis-project/ibis/issues/4939
array = expr.execute()
return numpy.histogram(array, bins=bins, density=density, weights=weights)

hist = numpy.zeros(len(bins)-1)
hist_bins = binned.value_counts().sort_by('bucket').execute()
for b, v in zip(hist_bins['bucket'], hist_bins['count']):
if numpy.isnan(b):
continue
Expand All @@ -172,7 +203,9 @@ def dtype(cls, dataset, dimension):
dimension = dataset.get_dimension(dimension)
return dataset.data.head(0).execute().dtypes[dimension.name]

dimension_type = dtype
@classmethod
def dimension_type(cls, dataset, dim):
return cls.dtype(dataset, dim).type

@classmethod
def sort(cls, dataset, by=[], reverse=False):
Expand Down
1 change: 0 additions & 1 deletion holoviews/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,6 @@ def isfinite(val):
return finite & (~pd.isna(val))
return finite


def isdatetime(value):
"""
Whether the array or scalar is recognized datetime type.
Expand Down
116 changes: 111 additions & 5 deletions holoviews/tests/core/data/test_ibisinterface.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,32 @@
import sqlite3
from unittest import SkipTest

from tempfile import NamedTemporaryFile
from unittest import SkipTest

try:
import ibis
from ibis import sqlite
except:
raise SkipTest("Could not import ibis, skipping IbisInterface tests.")

try:
import duckdb
except:
raise SkipTest("Could not import duckdb, skipping IbisInterface tests.")

from pathlib import Path

import numpy as np
import pandas as pd

import param
import pytest
from bokeh.models import axes as bokeh_axes
from holoviews import render
from holoviews.core.data import Dataset
from holoviews.core.spaces import HoloMap
from holoviews.core.data.ibis import IbisInterface
from holoviews.core.spaces import HoloMap
from holoviews.element.chart import Curve

from .base import HeterogeneousColumnTests, ScalarColumnTests, InterfaceTests
from .base import HeterogeneousColumnTests, InterfaceTests, ScalarColumnTests


def create_temp_db(df, name, index=False):
Expand Down Expand Up @@ -303,3 +313,99 @@ def test_dataset_iloc_ellipsis_list_cols(self):

def test_dataset_boolean_index(self):
raise SkipTest("Not supported")

def pandas_data(df: pd.DataFrame, *args, **kwargs):
return ibis.pandas.connect({"df": df})

def ibis_duckdb_data(df: pd.DataFrame, *args, **kwargs):
tmpdir = kwargs["tmpdir"]
filename = str(Path(tmpdir)/"db.db")
duckdb_con = duckdb.connect(filename)
duckdb_con.execute("CREATE TABLE df AS SELECT * FROM df")

return ibis.duckdb.connect(filename)

def ibis_sqlite_data(df: pd.DataFrame, *args, **kwargs):
return create_temp_db(df, "df")

class IbisMemConnection(param.Parameterized):
def __init__(self, df):
super().__init__()
self._table = ibis.memtable(df)

def table(self, df):
return self._table

def ibis_mem_table(df: pd.DataFrame, *args, **kwargs):
return IbisMemConnection(df=df)

@pytest.fixture
def reference_df():
return pd.DataFrame(
{
"actual": [100, 150, 125, 140, 145, 135, 123],
"forecast": [90, 160, 125, 150, 141, 141, 120],
"numerical": [1.1, 1.9, 3.2, 3.8, 4.3, 5.0, 5.5],
"date": pd.date_range("2022-01-03", "2022-01-09"),
"string": ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"],
},
)

@pytest.fixture(params=[pandas_data, ibis_duckdb_data, ibis_sqlite_data, ibis_mem_table])
def connection(request, reference_df, tmpdir):
return request.param(reference_df, tmpdir=tmpdir)

@pytest.fixture
def data(connection):
return connection.table("df")

@pytest.fixture
def dataset(data):
return Dataset(data, kdims=["numerical", "date", "string"], vdims=["actual", "forecast"])

def test_get_backend(data):
assert IbisInterface._get_backend(data)

def test_index_ibis_table(data):
table = IbisInterface._index_ibis_table(data)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indentation seems off.

table.execute()

@pytest.mark.parametrize(["dimension", "expected"], [
("date", (np.datetime64('2022-01-03'), np.datetime64('2022-01-09'))),
("string", ('Mon', 'Sun')),
("numerical",(np.float64(1.1), np.float64(5.5))),
])
def test_range(dimension, expected, dataset):
assert IbisInterface.range(dataset, dimension) == expected

@pytest.mark.parametrize(["dimension", "expected"], [
("date", np.datetime64),
("string", np.object_),
("numerical", np.float64),
])
def test_dimension_type(dimension, expected, dataset):
assert IbisInterface.dimension_type(dataset, dimension) is expected

def test_histogram(data):
expr = data[data.actual.notnull()].actual
bins = [90.0, 113.33333333333333, 136.66666666666666, 160.0]
result = IbisInterface.histogram(expr, bins, density=False)
np.testing.assert_array_equal(result[0], np.array([1, 3, 3]))
np.testing.assert_array_equal(result[1], np.array(bins))

@pytest.mark.parametrize(["kdims", "vdims", "xaxis_type", "yaxis_type"], [
("date", "actual", bokeh_axes.DatetimeAxis, bokeh_axes.LinearAxis),
("string", "actual", bokeh_axes.CategoricalAxis, bokeh_axes.LinearAxis),
("numerical", "actual", bokeh_axes.LinearAxis, bokeh_axes.LinearAxis),
("numerical", "date", bokeh_axes.LinearAxis, bokeh_axes.DatetimeAxis),
("numerical", "string", bokeh_axes.LinearAxis, bokeh_axes.CategoricalAxis),
])
def test_bokeh_axis(data, kdims, vdims, xaxis_type, yaxis_type):
"""Test to make sure the right axis can be identified for the bokeh backend"""
plot_ibis = Curve(data, kdims=kdims, vdims=vdims)
# When
plot_bokeh = render(plot_ibis, "bokeh")
xaxis, yaxis = plot_bokeh.axis
# Then
assert isinstance(xaxis, xaxis_type)
assert isinstance(yaxis, yaxis_type)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this one for now.