Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement concatenate, lists.explode, merge, sorting, and stream compaction in pylibcudf #15011

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
===========
concatenate
===========

.. automodule:: cudf._lib.pylibcudf.concatenate
:members:
5 changes: 5 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@ This page provides API documentation for pylibcudf.
aggregation
binaryop
column
concatenate
copying
gpumemoryview
groupby
join
lists
merge
reduce
rolling
scalar
stream_compaction
sorting
replace
table
types
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/lists.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=====
lists
=====

.. automodule:: cudf._lib.pylibcudf.lists
:members:
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/merge.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=====
merge
=====

.. automodule:: cudf._lib.pylibcudf.merge
:members:
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/sorting.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=======
sorting
=======

.. automodule:: cudf._lib.pylibcudf.sorting
:members:
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=================
stream_compaction
=================

.. automodule:: cudf._lib.pylibcudf.stream_compaction
:members:
66 changes: 19 additions & 47 deletions python/cudf/cudf/_lib/concat.pyx
Original file line number Diff line number Diff line change
@@ -1,62 +1,34 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.column cimport Column
from cudf._lib.cpp.column.column cimport column, column_view
from cudf._lib.cpp.concatenate cimport (
concatenate_columns as libcudf_concatenate_columns,
concatenate_masks as libcudf_concatenate_masks,
concatenate_tables as libcudf_concatenate_tables,
)
from cudf._lib.cpp.table.table cimport table, table_view
from cudf._lib.utils cimport (
data_from_unique_ptr,
make_column_views,
table_view_from_table,
)

from cudf.core.buffer import acquire_spill_lock, as_buffer

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer


cpdef concat_masks(object columns):
cdef device_buffer c_result
cdef unique_ptr[device_buffer] c_unique_result
cdef vector[column_view] c_views = make_column_views(columns)
with nogil:
c_result = move(libcudf_concatenate_masks(c_views))
c_unique_result = move(make_unique[device_buffer](move(c_result)))
return as_buffer(
DeviceBuffer.c_from_unique_ptr(move(c_unique_result))
)
from cudf._lib.utils cimport data_from_pylibcudf_table

from cudf._lib import pylibcudf
from cudf.core.buffer import acquire_spill_lock


@acquire_spill_lock()
def concat_columns(object columns):
cdef unique_ptr[column] c_result
cdef vector[column_view] c_views = make_column_views(columns)
with nogil:
c_result = move(libcudf_concatenate_columns(c_views))
return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
pylibcudf.concatenate.concatenate(
[col.to_pylibcudf(mode="read") for col in columns]
)
)


@acquire_spill_lock()
def concat_tables(object tables, bool ignore_index=False):
cdef unique_ptr[table] c_result
cdef vector[table_view] c_views
c_views.reserve(len(tables))
for tbl in tables:
c_views.push_back(table_view_from_table(tbl, ignore_index))
with nogil:
c_result = move(libcudf_concatenate_tables(c_views))

return data_from_unique_ptr(
move(c_result),
plc_tables = []
for table in tables:
cols = table._data.columns
if not ignore_index:
cols = table._index._data.columns + cols
plc_tables.append(pylibcudf.Table([c.to_pylibcudf(mode="read") for c in cols]))

return data_from_pylibcudf_table(
pylibcudf.concatenate.concatenate(plc_tables),
column_names=tables[0]._column_names,
index_names=None if ignore_index else tables[0]._index_names
)
4 changes: 2 additions & 2 deletions python/cudf/cudf/_lib/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
# the License.
# =============================================================================

set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd types.pyx
unary.pyx
set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd
stream_compaction.pyx types.pyx unary.pyx
)

set(linked_libraries cudf::cudf)
Expand Down
17 changes: 4 additions & 13 deletions python/cudf/cudf/_lib/cpp/concatenate.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.vector cimport vector
Expand All @@ -16,16 +16,7 @@ cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
# constructable from a vector. In case they are needed in the future,
# host_span versions can be added, e.g:
#
# cdef device_buffer concatenate_masks "cudf::concatenate_masks"(
# host_span[column_view] views
# ) except +
# cdef unique_ptr[column] concatenate(host_span[column_view] columns) except +

cdef device_buffer concatenate_masks "cudf::concatenate_masks"(
const vector[column_view] views
) except +
cdef unique_ptr[column] concatenate_columns "cudf::concatenate"(
const vector[column_view] columns
) except +
cdef unique_ptr[table] concatenate_tables "cudf::concatenate"(
const vector[table_view] tables
) except +
cdef unique_ptr[column] concatenate(const vector[column_view] columns) except +
cdef unique_ptr[table] concatenate(const vector[table_view] tables) except +
15 changes: 7 additions & 8 deletions python/cudf/cudf/_lib/cpp/stream_compaction.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
Expand All @@ -19,13 +19,12 @@ from cudf._lib.cpp.types cimport (
)


cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" \
nogil:
ctypedef enum duplicate_keep_option:
KEEP_ANY 'cudf::duplicate_keep_option::KEEP_ANY'
KEEP_FIRST 'cudf::duplicate_keep_option::KEEP_FIRST'
KEEP_LAST 'cudf::duplicate_keep_option::KEEP_LAST'
KEEP_NONE 'cudf::duplicate_keep_option::KEEP_NONE'
cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil:
cpdef enum class duplicate_keep_option:
KEEP_ANY
KEEP_FIRST
KEEP_LAST
KEEP_NONE

cdef unique_ptr[table] drop_nulls(table_view source_table,
vector[size_type] keys,
Expand Down
Empty file.
28 changes: 13 additions & 15 deletions python/cudf/cudf/_lib/lists.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
from cudf._lib.cpp.lists.count_elements cimport (
count_elements as cpp_count_elements,
)
from cudf._lib.cpp.lists.explode cimport explode_outer as cpp_explode_outer
from cudf._lib.cpp.lists.extract cimport extract_list_element
from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
from cudf._lib.cpp.lists.sorting cimport sort_lists as cpp_sort_lists
from cudf._lib.cpp.lists.stream_compaction cimport distinct as cpp_distinct
from cudf._lib.cpp.scalar.scalar cimport scalar
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.cpp.types cimport (
nan_equality,
Expand All @@ -34,7 +32,12 @@ from cudf._lib.cpp.types cimport (
size_type,
)
from cudf._lib.scalar cimport DeviceScalar
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
table_view_from_columns,
)

from cudf._lib import pylibcudf


@acquire_spill_lock()
Expand All @@ -55,18 +58,13 @@ def count_elements(Column col):


@acquire_spill_lock()
def explode_outer(
list source_columns, int explode_column_idx
):
cdef table_view c_table_view = table_view_from_columns(source_columns)
cdef size_type c_explode_column_idx = explode_column_idx

cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_explode_outer(c_table_view, c_explode_column_idx))

return columns_from_unique_ptr(move(c_result))
def explode_outer(list source_columns, int explode_column_idx):
return columns_from_pylibcudf_table(
pylibcudf.lists.explode_outer(
pylibcudf.Table([c.to_pylibcudf(mode="read") for c in source_columns]),
explode_column_idx,
)
)


@acquire_spill_lock()
Expand Down
63 changes: 22 additions & 41 deletions python/cudf/cudf/_lib/merge.pyx
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

cimport cudf._lib.cpp.types as libcudf_types
from cudf._lib.cpp.merge cimport merge as cpp_merge
from cudf._lib.cpp.table.table cimport table
from cudf._lib.cpp.table.table_view cimport table_view
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf._lib import pylibcudf


def merge_sorted(
Expand All @@ -22,45 +17,31 @@ def merge_sorted(
of sorted columns. `input_columns` is a list of lists of columns to be
merged.
"""
cdef vector[libcudf_types.size_type] c_column_keys = key_columns_indices
cdef vector[table_view] c_input_tables
cdef vector[libcudf_types.order] c_column_order
cdef vector[libcudf_types.null_order] c_null_precedence

c_input_tables.reserve(len(input_columns))
for source_columns in input_columns:
c_input_tables.push_back(
table_view_from_columns(source_columns))
c_input_tables = [
pylibcudf.Table(
[c.to_pylibcudf(mode="read") for c in source_columns]
) for source_columns in input_columns
]

num_keys = len(key_columns_indices)

cdef libcudf_types.order column_order = (
libcudf_types.order.ASCENDING if ascending
else libcudf_types.order.DESCENDING
column_order = (
pylibcudf.types.Order.ASCENDING if ascending
else pylibcudf.types.Order.DESCENDING
)
c_column_order = vector[libcudf_types.order](num_keys, column_order)

if not ascending:
na_position = "last" if na_position == "first" else "first"
cdef libcudf_types.null_order null_precedence = (
libcudf_types.null_order.BEFORE if na_position == "first"
else libcudf_types.null_order.AFTER
)
c_null_precedence = vector[libcudf_types.null_order](
num_keys,
null_precedence
null_precedence = (
pylibcudf.types.NullOrder.BEFORE if na_position == "first"
else pylibcudf.types.NullOrder.AFTER
)

# Perform sorted merge operation
cdef unique_ptr[table] c_result
with nogil:
c_result = move(
cpp_merge(
c_input_tables,
c_column_keys,
c_column_order,
c_null_precedence,
)
return columns_from_pylibcudf_table(
pylibcudf.merge.merge(
c_input_tables,
key_columns_indices,
[column_order] * num_keys,
[null_precedence] * num_keys,
)

return columns_from_unique_ptr(move(c_result))
)
23 changes: 21 additions & 2 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,27 @@
# =============================================================================

set(cython_sources
aggregation.pyx binaryop.pyx column.pyx copying.pyx gpumemoryview.pyx groupby.pyx interop.pyx
join.pyx reduce.pyx replace.pyx rolling.pyx scalar.pyx table.pyx types.pyx unary.pyx utils.pyx
aggregation.pyx
binaryop.pyx
column.pyx
concatenate.pyx
copying.pyx
gpumemoryview.pyx
groupby.pyx
interop.pyx
join.pyx
lists.pyx
merge.pyx
reduce.pyx
replace.pyx
rolling.pyx
scalar.pyx
stream_compaction.pyx
sorting.pyx
table.pyx
types.pyx
unary.pyx
utils.pyx
)
set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
Loading
Loading