Skip to content

Commit

Permalink
Migrate binary operations to pylibcudf (#14821)
Browse files Browse the repository at this point in the history
This PR migrates the binary operations in cuDF Python to pylibcudf.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #14821
  • Loading branch information
vyasr committed Jan 23, 2024
1 parent bc706af commit 48367a9
Show file tree
Hide file tree
Showing 13 changed files with 185 additions and 334 deletions.
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
========
binaryop
========

.. automodule:: cudf._lib.pylibcudf.binaryop
:members:
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This page provides API documentation for pylibcudf.
:maxdepth: 1
:caption: API Documentation

binaryop
column
copying
gpumemoryview
Expand Down
5 changes: 0 additions & 5 deletions python/cudf/cudf/_lib/binaryop.pxd

This file was deleted.

261 changes: 41 additions & 220 deletions python/cudf/cudf/_lib/binaryop.pyx
Original file line number Diff line number Diff line change
@@ -1,160 +1,30 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from enum import IntEnum

from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf._lib.binaryop cimport underlying_type_t_binary_operator
from cudf._lib.column cimport Column

from cudf._lib.scalar import as_device_scalar

from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.types cimport dtype_to_pylibcudf_type

from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.types cimport data_type, type_id
from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id

from cudf.api.types import is_scalar
from cudf._lib import pylibcudf
from cudf._lib.scalar import as_device_scalar
from cudf.core.buffer import acquire_spill_lock

cimport cudf._lib.cpp.binaryop as cpp_binaryop
from cudf._lib.cpp.binaryop cimport binary_operator
import cudf


class BinaryOperation(IntEnum):
ADD = (
<underlying_type_t_binary_operator> binary_operator.ADD
)
SUB = (
<underlying_type_t_binary_operator> binary_operator.SUB
)
MUL = (
<underlying_type_t_binary_operator> binary_operator.MUL
)
DIV = (
<underlying_type_t_binary_operator> binary_operator.DIV
)
TRUEDIV = (
<underlying_type_t_binary_operator> binary_operator.TRUE_DIV
)
FLOORDIV = (
<underlying_type_t_binary_operator> binary_operator.FLOOR_DIV
)
MOD = (
<underlying_type_t_binary_operator> binary_operator.PYMOD
)
POW = (
<underlying_type_t_binary_operator> binary_operator.POW
)
INT_POW = (
<underlying_type_t_binary_operator> binary_operator.INT_POW
)
EQ = (
<underlying_type_t_binary_operator> binary_operator.EQUAL
)
NE = (
<underlying_type_t_binary_operator> binary_operator.NOT_EQUAL
)
LT = (
<underlying_type_t_binary_operator> binary_operator.LESS
)
GT = (
<underlying_type_t_binary_operator> binary_operator.GREATER
)
LE = (
<underlying_type_t_binary_operator> binary_operator.LESS_EQUAL
)
GE = (
<underlying_type_t_binary_operator> binary_operator.GREATER_EQUAL
)
AND = (
<underlying_type_t_binary_operator> binary_operator.BITWISE_AND
)
OR = (
<underlying_type_t_binary_operator> binary_operator.BITWISE_OR
)
XOR = (
<underlying_type_t_binary_operator> binary_operator.BITWISE_XOR
)
L_AND = (
<underlying_type_t_binary_operator> binary_operator.LOGICAL_AND
)
L_OR = (
<underlying_type_t_binary_operator> binary_operator.LOGICAL_OR
)
GENERIC_BINARY = (
<underlying_type_t_binary_operator> binary_operator.GENERIC_BINARY
)
NULL_EQUALS = (
<underlying_type_t_binary_operator> binary_operator.NULL_EQUALS
)


cdef binaryop_v_v(Column lhs, Column rhs,
binary_operator c_op, data_type c_dtype):
cdef column_view c_lhs = lhs.view()
cdef column_view c_rhs = rhs.view()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_binaryop.binary_operation(
c_lhs,
c_rhs,
c_op,
c_dtype
)
)

return Column.from_unique_ptr(move(c_result))


cdef binaryop_v_s(Column lhs, DeviceScalar rhs,
binary_operator c_op, data_type c_dtype):
cdef column_view c_lhs = lhs.view()
cdef const scalar* c_rhs = rhs.get_raw_ptr()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_binaryop.binary_operation(
c_lhs,
c_rhs[0],
c_op,
c_dtype
)
)

return Column.from_unique_ptr(move(c_result))

cdef binaryop_s_v(DeviceScalar lhs, Column rhs,
binary_operator c_op, data_type c_dtype):
cdef const scalar* c_lhs = lhs.get_raw_ptr()
cdef column_view c_rhs = rhs.view()

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_binaryop.binary_operation(
c_lhs[0],
c_rhs,
c_op,
c_dtype
)
)

return Column.from_unique_ptr(move(c_result))
# Map pandas operation names to pylibcudf operation names.
_op_map = {
"TRUEDIV": "TRUE_DIV",
"FLOORDIV": "FLOOR_DIV",
"MOD": "PYMOD",
"EQ": "EQUAL",
"NE": "NOT_EQUAL",
"LT": "LESS",
"GT": "GREATER",
"LE": "LESS_EQUAL",
"GE": "GREATER_EQUAL",
"AND": "BITWISE_AND",
"OR": "BITWISE_OR",
"XOR": "BITWISE_XOR",
"L_AND": "LOGICAL_AND",
"L_OR": "LOGICAL_OR",
}


@acquire_spill_lock()
Expand All @@ -166,74 +36,25 @@ def binaryop(lhs, rhs, op, dtype):
# pipeline for libcudf binops that don't map to Python binops.
if op not in {"INT_POW", "NULL_EQUALS"}:
op = op[2:-2]

op = BinaryOperation[op.upper()]
cdef binary_operator c_op = <binary_operator> (
<underlying_type_t_binary_operator> op
)

cdef data_type c_dtype = dtype_to_data_type(dtype)

if is_scalar(lhs) or lhs is None:
s_lhs = as_device_scalar(lhs, dtype=rhs.dtype if lhs is None else None)
result = binaryop_s_v(
s_lhs,
rhs,
c_op,
c_dtype
)

elif is_scalar(rhs) or rhs is None:
s_rhs = as_device_scalar(rhs, dtype=lhs.dtype if rhs is None else None)
result = binaryop_v_s(
lhs,
s_rhs,
c_op,
c_dtype
)

else:
result = binaryop_v_v(
lhs,
rhs,
c_op,
c_dtype
)
return result


@acquire_spill_lock()
def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
"""
Apply a user-defined binary operator (a UDF) defined in `udf_ptx` on
the two input columns `lhs` and `rhs`. The output type of the UDF
has to be specified in `dtype`, a numpy data type.
Currently ONLY int32, int64, float32 and float64 are supported.
"""
cdef column_view c_lhs = lhs.view()
cdef column_view c_rhs = rhs.view()

cdef type_id tid = (
<type_id> (
<underlying_type_t_type_id> (
SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[cudf.dtype(dtype)]
)
op = op.upper()
op = _op_map.get(op, op)

return Column.from_pylibcudf(
# Check if the dtype args are desirable here.
pylibcudf.binaryop.binary_operation(
lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column)
else (
<DeviceScalar> as_device_scalar(
lhs, dtype=rhs.dtype if lhs is None else None
)
).c_value,
rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column)
else (
<DeviceScalar> as_device_scalar(
rhs, dtype=lhs.dtype if rhs is None else None
)
).c_value,
pylibcudf.binaryop.BinaryOperator[op],
dtype_to_pylibcudf_type(dtype),
)
)
cdef data_type c_dtype = data_type(tid)

cdef string cpp_str = udf_ptx.encode("UTF-8")

cdef unique_ptr[column] c_result

with nogil:
c_result = move(
cpp_binaryop.binary_operation(
c_lhs,
c_rhs,
cpp_str,
c_dtype
)
)

return Column.from_unique_ptr(move(c_result))
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources copying.pyx types.pyx)
set(cython_sources binaryop.pyx copying.pyx types.pyx)

set(linked_libraries cudf::cudf)

Expand Down
Loading

0 comments on commit 48367a9

Please sign in to comment.