Skip to content

Commit

Permalink
[FEA] Report all unsupported operations for a query in cudf.polars
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Oct 1, 2024
1 parent 04baa22 commit bb51789
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 1 deletion.
6 changes: 6 additions & 0 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,12 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
) # pragma: no cover


@dataclasses.dataclass
class ErrorNode(IR):
def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
return pl.DataFrame()


@dataclasses.dataclass
class PythonScan(IR):
"""Representation of input from a python function."""
Expand Down
35 changes: 34 additions & 1 deletion python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,24 @@

from cudf_polars.dsl import expr, ir
from cudf_polars.typing import NodeTraverser
from cudf_polars.utils import dtypes
from cudf_polars.utils import dtypes, other

__all__ = ["translate_ir", "translate_named_expr"]


def debug(func):
def wrapper(*args, **kwargs):
try:
print(args, kwargs)
return func(*args, **kwargs)
except NotImplementedError:
if other._env_get_bool("CUDF_POLARS_DEBUG_MODE", default=False):
return ir.ErrorNode(args[0].get_schema())
raise

return wrapper


class set_node(AbstractContextManager[None]):
"""
Run a block with current node set in the visitor.
Expand Down Expand Up @@ -66,11 +79,14 @@ def __exit__(self, *args: Any) -> None:
def _translate_ir(
node: Any, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
if other._env_get_bool("CUDF_POLARS_DEBUG_MODE", default=False):
return ir.ErrorNode(schema)
raise NotImplementedError(
f"Translation for {type(node).__name__}"
) # pragma: no cover


@debug
@_translate_ir.register
def _(
node: pl_ir.PythonScan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -83,6 +99,7 @@ def _(
return ir.PythonScan(schema, options, predicate)


@debug
@_translate_ir.register
def _(
node: pl_ir.Scan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand Down Expand Up @@ -120,13 +137,15 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.Cache, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
return ir.Cache(schema, node.id_, translate_ir(visitor, n=node.input))


@debug
@_translate_ir.register
def _(
node: pl_ir.DataFrameScan, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -141,6 +160,7 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.Select, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -151,6 +171,7 @@ def _(
return ir.Select(schema, inp, exprs, node.should_broadcast)


@debug
@_translate_ir.register
def _(
node: pl_ir.GroupBy, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -169,6 +190,7 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.Join, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -185,6 +207,7 @@ def _(
return ir.Join(schema, inp_left, inp_right, left_on, right_on, node.options)


@debug
@_translate_ir.register
def _(
node: pl_ir.HStack, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -195,6 +218,7 @@ def _(
return ir.HStack(schema, inp, exprs, node.should_broadcast)


@debug
@_translate_ir.register
def _(
node: pl_ir.Reduce, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -205,6 +229,7 @@ def _(
return ir.Reduce(schema, inp, exprs)


@debug
@_translate_ir.register
def _(
node: pl_ir.Distinct, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -216,6 +241,7 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.Sort, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -226,13 +252,15 @@ def _(
return ir.Sort(schema, inp, by, node.sort_options, node.slice)


@debug
@_translate_ir.register
def _(
node: pl_ir.Slice, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
return ir.Slice(schema, translate_ir(visitor, n=node.input), node.offset, node.len)


@debug
@_translate_ir.register
def _(
node: pl_ir.Filter, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -243,6 +271,7 @@ def _(
return ir.Filter(schema, inp, mask)


@debug
@_translate_ir.register
def _(
node: pl_ir.SimpleProjection,
Expand All @@ -252,6 +281,7 @@ def _(
return ir.Projection(schema, translate_ir(visitor, n=node.input))


@debug
@_translate_ir.register
def _(
node: pl_ir.MapFunction, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -266,6 +296,7 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.Union, visitor: NodeTraverser, schema: dict[str, plc.DataType]
Expand All @@ -275,13 +306,15 @@ def _(
)


@debug
@_translate_ir.register
def _(
node: pl_ir.HConcat, visitor: NodeTraverser, schema: dict[str, plc.DataType]
) -> ir.IR:
return ir.HConcat(schema, [translate_ir(visitor, n=n) for n in node.inputs])


@debug
def translate_ir(visitor: NodeTraverser, *, n: int | None = None) -> ir.IR:
"""
Translate a polars-internal IR node to our representation.
Expand Down
30 changes: 30 additions & 0 deletions python/cudf_polars/cudf_polars/utils/other.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

"""Other utilities."""

from __future__ import annotations

import os


def _env_get_int(name, default):
"""Get the integer value of the environment variable."""
try:
return int(os.getenv(name, default))
except (ValueError, TypeError):
return default


def _env_get_bool(name, default):
"""Get the the boolean value of the environment variable."""
env = os.getenv(name)
if env is None:
return default
as_a_int = _env_get_int(name, None)
env = env.lower().strip()
if env == "true" or env == "on" or as_a_int:
return True
if env == "false" or env == "off" or as_a_int == 0:
return False
return default

0 comments on commit bb51789

Please sign in to comment.