Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate filling operations to pylibcudf #15225

Merged
merged 12 commits into from
Mar 7, 2024
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/filling.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
========
filling
========

.. automodule:: cudf._lib.pylibcudf.filling
:members:
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This page provides API documentation for pylibcudf.
column
concatenate
copying
filling
gpumemoryview
groupby
join
Expand Down
110 changes: 32 additions & 78 deletions python/cudf/cudf/_lib/filling.pyx
Original file line number Diff line number Diff line change
@@ -1,103 +1,57 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

cimport cudf._lib.cpp.filling as cpp_filling
from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport size_type
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib import pylibcudf
from cudf._lib.scalar import as_device_scalar


@acquire_spill_lock()
def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
cdef mutable_column_view c_destination = destination.mutable_view()
cdef size_type c_begin = <size_type> begin
cdef size_type c_end = <size_type> end
cdef const scalar* c_value = value.get_raw_ptr()

cpp_filling.fill_in_place(
c_destination,
c_begin,
c_end,
c_value[0]
pylibcudf.filling.fill_in_place(
destination.to_pylibcudf(mode='write'),
begin,
end,
(<DeviceScalar> as_device_scalar(value, dtype=destination.dtype)).c_value
)


@acquire_spill_lock()
def fill(Column destination, int begin, int end, DeviceScalar value):
cdef column_view c_destination = destination.view()
cdef size_type c_begin = <size_type> begin
cdef size_type c_end = <size_type> end
cdef const scalar* c_value = value.get_raw_ptr()
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_filling.fill(
c_destination,
c_begin,
c_end,
c_value[0]
))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
pylibcudf.filling.fill(
destination.to_pylibcudf(mode='read'),
begin,
end,
(<DeviceScalar> as_device_scalar(value)).c_value
)
)


@acquire_spill_lock()
def repeat(list inp, object count):
ctbl = pylibcudf.Table([col.to_pylibcudf(mode="read") for col in inp])
if isinstance(count, Column):
return _repeat_via_column(inp, count)
else:
return _repeat_via_size_type(inp, count)


def _repeat_via_column(list inp, Column count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef column_view c_count = count.view()
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_filling.repeat(
c_inp,
c_count,
))

return columns_from_unique_ptr(move(c_result))


def _repeat_via_size_type(list inp, size_type count):
cdef table_view c_inp = table_view_from_columns(inp)
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_filling.repeat(
c_inp,
count = count.to_pylibcudf(mode="read")
return columns_from_pylibcudf_table(
pylibcudf.filling.repeat(
ctbl,
count
))

return columns_from_unique_ptr(move(c_result))
)
)


@acquire_spill_lock()
def sequence(int size, DeviceScalar init, DeviceScalar step):
cdef size_type c_size = size
cdef const scalar* c_init = init.get_raw_ptr()
cdef const scalar* c_step = step.get_raw_ptr()
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_filling.sequence(
c_size,
c_init[0],
c_step[0]
))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
pylibcudf.filling.sequence(
size,
(<DeviceScalar> as_device_scalar(init)).c_value,
(<DeviceScalar> as_device_scalar(step)).c_value
)
)
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ set(cython_sources
column.pyx
concatenate.pyx
copying.pyx
filling.pyx
gpumemoryview.pyx
groupby.pyx
interop.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ from . cimport (
binaryop,
concatenate,
copying,
filling,
groupby,
interop,
join,
Expand Down Expand Up @@ -37,6 +38,7 @@ __all__ = [
"binaryop",
"concatenate",
"copying",
"filling",
"gpumemoryview",
"groupby",
"interop",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
binaryop,
concatenate,
copying,
filling,
groupby,
interop,
join,
Expand Down Expand Up @@ -35,6 +36,7 @@
"binaryop",
"concatenate",
"copying",
"filling",
"gpumemoryview",
"groupby",
"interop",
Expand Down
32 changes: 32 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/filling.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from cudf._lib.cpp.types cimport size_type

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table

ctypedef fused ColumnOrSize:
Column
size_type

cpdef Column fill(
object destination,
size_type c_begin,
size_type c_end,
object value,
)
cpdef void fill_in_place(
object destination,
size_type c_begin,
size_type c_end,
object value,
)
cpdef Column sequence(
size_type size,
object init,
object step,
)
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
cpdef Table repeat(
Table input_table,
ColumnOrSize count
)
163 changes: 163 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/filling.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.cpp.column.column cimport column
from cudf._lib.cpp.filling cimport (
fill as cpp_fill,
fill_in_place as cpp_fill_in_place,
repeat as cpp_repeat,
sequence as cpp_sequence,
)
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.types cimport size_type

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table


cpdef Column fill(
object destination,
size_type begin,
size_type end,
object value,
):

"""Fill destination column from begin to end with value.
``destination ``must be a
:py:class:`~cudf._lib.pylibcudf.column.Column`. ``value`` must be a
:py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
For details, see :cpp:func:`fill`.
Parameters
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
----------
destination : Column
The column to be filled
begin : size_type
The index to begin filling from.
end : size_type
The index at which to stop filling.
Returns
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
-------
pylibcudf.Column
The result of the filling operation
"""

cdef unique_ptr[column] result
with nogil:
result = move(
cpp_fill(
(<Column> destination).view(),
vyasr marked this conversation as resolved.
Show resolved Hide resolved
begin,
end,
dereference((<Scalar> value).c_obj)
)
)
return Column.from_libcudf(move(result))

cpdef void fill_in_place(
object destination,
size_type begin,
size_type end,
object value,
):

"""Fill destination column in place from begin to end with value.
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
``destination ``must be a
:py:class:`~cudf._lib.pylibcudf.column.Column`. ``value`` must be a
:py:class:`~cudf._lib.pylibcudf.scalar.Scalar`.
For details, see :cpp:func:`fill_in_place`.
Parameters
----------
destination : Column
The column to be filled
begin : size_type
The index to begin filling from.
end : size_type
The index at which to stop filling.
"""

with nogil:
cpp_fill_in_place(
(<Column> destination).mutable_view(),
begin,
end,
dereference((<Scalar> value).c_obj)
)

cpdef Column sequence(size_type size, object init, object step):
"""Create a sequence column of size `size` with initial value `init` and
step `step`.
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
Parameters
----------
size : int
The size of the sequence
init : Scalar
The initial value of the sequence
step : Scalar
The step of the sequence
Returns
-------
pylibcudf.Column
The result of the sequence operation
"""

cdef unique_ptr[column] result
cdef size_type c_size = size
with nogil:
result = move(
cpp_sequence(
c_size,
dereference((<Scalar> init).c_obj),
dereference((<Scalar> step).c_obj),
)
)
return Column.from_libcudf(move(result))


cpdef Table repeat(
Table input_table,
ColumnOrSize count
):
"""Repeat rows of a Table either ``count`` times
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
or as specified by an integral column. If ``count``
is a column, the number of repetitions of each row
is defined by the value at the corresponding index
of ``count``.

For details, see :cpp:func:`repeat`.

Parameters
----------
input_table : Table
The table to be repeated
count : Union[Column, size_type]
Integer value to repeat each row by or
Non-nullable column of an integral type
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
Returns
vyasr marked this conversation as resolved.
Show resolved Hide resolved
-------
pylibcudf.Table
The result of the repeat operation
"""

cdef unique_ptr[table] result

if ColumnOrSize is Column:
with nogil:
result = move(
cpp_repeat(
input_table.view(),
count.view()
)
)
if ColumnOrSize is size_type:
with nogil:
result = move(
cpp_repeat(
input_table.view(),
count
)
)
return Table.from_libcudf(move(result))
Loading