Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lazy load dask.dataframe in datashader.py #6309

Merged
merged 5 commits into from
Jul 4, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions holoviews/operation/datashader.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import sys
import warnings
from collections.abc import Callable, Iterable
from functools import partial

import dask.dataframe as dd
import datashader as ds
import datashader.reductions as rd
import datashader.transfer_functions as tf
Expand Down Expand Up @@ -69,6 +69,13 @@
from ..streams import PointerXY
from .resample import LinkableOperation, ResampleOperation2D


def _lazy_dask_dataframe():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit nit but I tend to think this doesn't do what people mean with lazy import (i.e. delaying importing a module until it's used). I just remembered hvPlot has a few utils that do this together with the isinstance check, like:

def is_dask(data):
    if not check_library(data, 'dask'):
        return False
    import dask.dataframe as dd

    return isinstance(data, (dd.DataFrame, dd.Series))

def is_xarray_dataarray(data):
    if not check_library(data, 'xarray'):
        return False
    from xarray import DataArray

    return isinstance(data, DataArray)

if "dask" in sys.modules:
import dask.dataframe as dd
return dd
return None

ds_version = Version(ds.__version__)
ds15 = ds_version >= Version('0.15.1')
ds16 = ds_version >= Version('0.16.0')
Expand Down Expand Up @@ -300,15 +307,16 @@ def get_agg_data(cls, obj, category=None):
else:
x, y = dims

dd = _lazy_dask_dataframe()
if len(paths) > 1:
if glyph == 'line':
path = paths[0][:1]
if isinstance(path, dd.DataFrame):
if dd and isinstance(path, dd.DataFrame):
path = path.compute()
empty = path.copy()
empty.iloc[0, :] = (np.nan,) * empty.shape[1]
paths = [elem for p in paths for elem in (p, empty)][:-1]
if all(isinstance(path, dd.DataFrame) for path in paths):
if dd and all(isinstance(path, dd.DataFrame) for path in paths):
df = dd.concat(paths)
else:
paths = [p.compute() if isinstance(p, dd.DataFrame) else p for p in paths]
Expand All @@ -318,7 +326,7 @@ def get_agg_data(cls, obj, category=None):
if category and df[category].dtype.name != 'category':
df[category] = df[category].astype('category')

is_custom = isinstance(df, dd.DataFrame) or cuDFInterface.applies(df)
is_custom = (dd and isinstance(df, dd.DataFrame)) or cuDFInterface.applies(df)
if any((not is_custom and len(df[d.name]) and isinstance(df[d.name].values[0], cftime_types)) or
df[d.name].dtype.kind in ["M", "u"] for d in (x, y)):
df = df.copy()
Expand Down
Loading