Migrate binary operations to pylibcudf (#14821)

This PR migrates the binary operations in cuDF Python to pylibcudf. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Matthew Roeschke (https://github.com/mroeschke) URL: #14821
rapidsai · Jan 23, 2024 · 48367a9 · 48367a9
1 parent bc706af
commit 48367a9
Show file tree

Hide file tree

Showing 13 changed files with 185 additions and 334 deletions.
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/binaryop.rst
@@ -0,0 +1,6 @@
+========
+binaryop
+========
+
+.. automodule:: cudf._lib.pylibcudf.binaryop
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -8,6 +8,7 @@ This page provides API documentation for pylibcudf.
     :maxdepth: 1
     :caption: API Documentation
 
+    binaryop
     column
     copying
     gpumemoryview

diff --git a/python/cudf/cudf/_lib/binaryop.pxd b/python/cudf/cudf/_lib/binaryop.pxd
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
@@ -1,160 +1,30 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from enum import IntEnum
-
-from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
-from libcpp.utility cimport move
-
-from cudf._lib.binaryop cimport underlying_type_t_binary_operator
 from cudf._lib.column cimport Column
-
-from cudf._lib.scalar import as_device_scalar
-
 from cudf._lib.scalar cimport DeviceScalar
+from cudf._lib.types cimport dtype_to_pylibcudf_type
 
-from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
-
-from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.types cimport data_type, type_id
-from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id
-
-from cudf.api.types import is_scalar
+from cudf._lib import pylibcudf
+from cudf._lib.scalar import as_device_scalar
 from cudf.core.buffer import acquire_spill_lock
 
-cimport cudf._lib.cpp.binaryop as cpp_binaryop
-from cudf._lib.cpp.binaryop cimport binary_operator
-import cudf
-
-
-class BinaryOperation(IntEnum):
-    ADD = (
-        <underlying_type_t_binary_operator> binary_operator.ADD
-    )
-    SUB = (
-        <underlying_type_t_binary_operator> binary_operator.SUB
-    )
-    MUL = (
-        <underlying_type_t_binary_operator> binary_operator.MUL
-    )
-    DIV = (
-        <underlying_type_t_binary_operator> binary_operator.DIV
-    )
-    TRUEDIV = (
-        <underlying_type_t_binary_operator> binary_operator.TRUE_DIV
-    )
-    FLOORDIV = (
-        <underlying_type_t_binary_operator> binary_operator.FLOOR_DIV
-    )
-    MOD = (
-        <underlying_type_t_binary_operator> binary_operator.PYMOD
-    )
-    POW = (
-        <underlying_type_t_binary_operator> binary_operator.POW
-    )
-    INT_POW = (
-        <underlying_type_t_binary_operator> binary_operator.INT_POW
-    )
-    EQ = (
-        <underlying_type_t_binary_operator> binary_operator.EQUAL
-    )
-    NE = (
-        <underlying_type_t_binary_operator> binary_operator.NOT_EQUAL
-    )
-    LT = (
-        <underlying_type_t_binary_operator> binary_operator.LESS
-    )
-    GT = (
-        <underlying_type_t_binary_operator> binary_operator.GREATER
-    )
-    LE = (
-        <underlying_type_t_binary_operator> binary_operator.LESS_EQUAL
-    )
-    GE = (
-        <underlying_type_t_binary_operator> binary_operator.GREATER_EQUAL
-    )
-    AND = (
-        <underlying_type_t_binary_operator> binary_operator.BITWISE_AND
-    )
-    OR = (
-        <underlying_type_t_binary_operator> binary_operator.BITWISE_OR
-    )
-    XOR = (
-        <underlying_type_t_binary_operator> binary_operator.BITWISE_XOR
-    )
-    L_AND = (
-        <underlying_type_t_binary_operator> binary_operator.LOGICAL_AND
-    )
-    L_OR = (
-        <underlying_type_t_binary_operator> binary_operator.LOGICAL_OR
-    )
-    GENERIC_BINARY = (
-        <underlying_type_t_binary_operator> binary_operator.GENERIC_BINARY
-    )
-    NULL_EQUALS = (
-        <underlying_type_t_binary_operator> binary_operator.NULL_EQUALS
-    )
-
-
-cdef binaryop_v_v(Column lhs, Column rhs,
-                  binary_operator c_op, data_type c_dtype):
-    cdef column_view c_lhs = lhs.view()
-    cdef column_view c_rhs = rhs.view()
-
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_binaryop.binary_operation(
-                c_lhs,
-                c_rhs,
-                c_op,
-                c_dtype
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
-
-
-cdef binaryop_v_s(Column lhs, DeviceScalar rhs,
-                  binary_operator c_op, data_type c_dtype):
-    cdef column_view c_lhs = lhs.view()
-    cdef const scalar* c_rhs = rhs.get_raw_ptr()
-
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_binaryop.binary_operation(
-                c_lhs,
-                c_rhs[0],
-                c_op,
-                c_dtype
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
-
-cdef binaryop_s_v(DeviceScalar lhs, Column rhs,
-                  binary_operator c_op, data_type c_dtype):
-    cdef const scalar* c_lhs = lhs.get_raw_ptr()
-    cdef column_view c_rhs = rhs.view()
-
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_binaryop.binary_operation(
-                c_lhs[0],
-                c_rhs,
-                c_op,
-                c_dtype
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+# Map pandas operation names to pylibcudf operation names.
+_op_map = {
+    "TRUEDIV": "TRUE_DIV",
+    "FLOORDIV": "FLOOR_DIV",
+    "MOD": "PYMOD",
+    "EQ": "EQUAL",
+    "NE": "NOT_EQUAL",
+    "LT": "LESS",
+    "GT": "GREATER",
+    "LE": "LESS_EQUAL",
+    "GE": "GREATER_EQUAL",
+    "AND": "BITWISE_AND",
+    "OR": "BITWISE_OR",
+    "XOR": "BITWISE_XOR",
+    "L_AND": "LOGICAL_AND",
+    "L_OR": "LOGICAL_OR",
+}
 
 
 @acquire_spill_lock()
@@ -166,74 +36,25 @@ def binaryop(lhs, rhs, op, dtype):
     # pipeline for libcudf binops that don't map to Python binops.
     if op not in {"INT_POW", "NULL_EQUALS"}:
         op = op[2:-2]
-
-    op = BinaryOperation[op.upper()]
-    cdef binary_operator c_op = <binary_operator> (
-        <underlying_type_t_binary_operator> op
-    )
-
-    cdef data_type c_dtype = dtype_to_data_type(dtype)
-
-    if is_scalar(lhs) or lhs is None:
-        s_lhs = as_device_scalar(lhs, dtype=rhs.dtype if lhs is None else None)
-        result = binaryop_s_v(
-            s_lhs,
-            rhs,
-            c_op,
-            c_dtype
-        )
-
-    elif is_scalar(rhs) or rhs is None:
-        s_rhs = as_device_scalar(rhs, dtype=lhs.dtype if rhs is None else None)
-        result = binaryop_v_s(
-            lhs,
-            s_rhs,
-            c_op,
-            c_dtype
-        )
-
-    else:
-        result = binaryop_v_v(
-            lhs,
-            rhs,
-            c_op,
-            c_dtype
-        )
-    return result
-
-
-@acquire_spill_lock()
-def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
-    """
-    Apply a user-defined binary operator (a UDF) defined in `udf_ptx` on
-    the two input columns `lhs` and `rhs`. The output type of the UDF
-    has to be specified in `dtype`, a numpy data type.
-    Currently ONLY int32, int64, float32 and float64 are supported.
-    """
-    cdef column_view c_lhs = lhs.view()
-    cdef column_view c_rhs = rhs.view()
-
-    cdef type_id tid = (
-        <type_id> (
-            <underlying_type_t_type_id> (
-                SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[cudf.dtype(dtype)]
-            )
+    op = op.upper()
+    op = _op_map.get(op, op)
+
+    return Column.from_pylibcudf(
+        # Check if the dtype args are desirable here.
+        pylibcudf.binaryop.binary_operation(
+            lhs.to_pylibcudf(mode="read") if isinstance(lhs, Column)
+            else (
+                <DeviceScalar> as_device_scalar(
+                    lhs, dtype=rhs.dtype if lhs is None else None
+                )
+            ).c_value,
+            rhs.to_pylibcudf(mode="read") if isinstance(rhs, Column)
+            else (
+                <DeviceScalar> as_device_scalar(
+                    rhs, dtype=lhs.dtype if rhs is None else None
+                )
+            ).c_value,
+            pylibcudf.binaryop.BinaryOperator[op],
+            dtype_to_pylibcudf_type(dtype),
         )
     )
-    cdef data_type c_dtype = data_type(tid)
-
-    cdef string cpp_str = udf_ptx.encode("UTF-8")
-
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_binaryop.binary_operation(
-                c_lhs,
-                c_rhs,
-                cpp_str,
-                c_dtype
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/_lib/cpp/CMakeLists.txt b/python/cudf/cudf/_lib/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources copying.pyx types.pyx)
+set(cython_sources binaryop.pyx copying.pyx types.pyx)
 
 set(linked_libraries cudf::cudf)