Skip to content

Commit

Permalink
Migrate reshape.pxd to pylibcudf (#15827)
Browse files Browse the repository at this point in the history
xref #15162

Authors:
  - Thomas Li (https://github.com/lithomas1)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15827
  • Loading branch information
lithomas1 committed May 24, 2024
1 parent d756c37 commit 8458306
Show file tree
Hide file tree
Showing 9 changed files with 147 additions and 24 deletions.
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ This page provides API documentation for pylibcudf.
lists
merge
reduce
reshape
rolling
scalar
search
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=======
reshape
=======

.. automodule:: cudf._lib.pylibcudf.reshape
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
merge.pyx
reduce.pyx
replace.pyx
reshape.pyx
rolling.pyx
scalar.pyx
search.pyx
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
merge,
reduce,
replace,
reshape,
rolling,
search,
sorting,
Expand Down
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
merge,
reduce,
replace,
reshape,
rolling,
search,
sorting,
Expand Down
11 changes: 11 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/reshape.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .scalar cimport Scalar
from .table cimport Table


cpdef Column interleave_columns(Table source_table)
cpdef Table tile(Table source_table, size_type count)
65 changes: 65 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/reshape.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .column cimport Column
from .table cimport Table


cpdef Column interleave_columns(Table source_table):
"""Interleave columns of a table into a single column.
Converts the column major table `input` into a row major column.
Example:
in = [[A1, A2, A3], [B1, B2, B3]]
return = [A1, B1, A2, B2, A3, B3]
Parameters
----------
source_table: Table
The input table to interleave
Returns
-------
Column
A new column which is the result of interleaving the input columns
"""
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_interleave_columns(source_table.view()))

return Column.from_libcudf(move(c_result))


cpdef Table tile(Table source_table, size_type count):
"""Repeats the rows from input table count times to form a new table.
Parameters
----------
source_table: Table
The input table containing rows to be repeated
count: size_type
The number of times to tile "rows". Must be non-negative
Returns
-------
Table
The table containing the tiled "rows"
"""
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_tile(source_table.view(), count))

return Table.from_libcudf(move(c_result))
42 changes: 18 additions & 24 deletions python/cudf/cudf/_lib/reshape.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,33 @@

from cudf.core.buffer import acquire_spill_lock

from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.reshape cimport (
interleave_columns as cpp_interleave_columns,
tile as cpp_tile,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.types cimport size_type
from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
from cudf._lib.utils cimport columns_from_pylibcudf_table

import cudf._lib.pylibcudf as plc


@acquire_spill_lock()
def interleave_columns(list source_columns):
cdef table_view c_view = table_view_from_columns(source_columns)
cdef unique_ptr[column] c_result

with nogil:
c_result = move(cpp_interleave_columns(c_view))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(
plc.reshape.interleave_columns(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
])
)
)


@acquire_spill_lock()
def tile(list source_columns, size_type count):
cdef size_type c_count = count
cdef table_view c_view = table_view_from_columns(source_columns)
cdef unique_ptr[table] c_result

with nogil:
c_result = move(cpp_tile(c_view, c_count))

return columns_from_unique_ptr(move(c_result))
return columns_from_pylibcudf_table(
plc.reshape.tile(
plc.Table([
c.to_pylibcudf(mode="read") for c in source_columns
]),
c_count
)
)
43 changes: 43 additions & 0 deletions python/cudf/cudf/pylibcudf_tests/test_reshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import pyarrow as pa
import pytest
from utils import assert_column_eq, assert_table_eq

from cudf._lib import pylibcudf as plc


@pytest.fixture(scope="module")
def reshape_data():
data = [[1, 2, 3], [4, 5, 6]]
return data


@pytest.fixture(scope="module")
def reshape_plc_tbl(reshape_data):
arrow_tbl = pa.Table.from_arrays(reshape_data, names=["a", "b"])
plc_tbl = plc.interop.from_arrow(arrow_tbl)
return plc_tbl


def test_interleave_columns(reshape_data, reshape_plc_tbl):
res = plc.reshape.interleave_columns(reshape_plc_tbl)

interleaved_data = [pa.array(pair) for pair in zip(*reshape_data)]

expect = pa.concat_arrays(interleaved_data)

assert_column_eq(res, expect)


@pytest.mark.parametrize("cnt", [0, 1, 3])
def test_tile(reshape_data, reshape_plc_tbl, cnt):
res = plc.reshape.tile(reshape_plc_tbl, cnt)

tiled_data = [pa.array(col * cnt) for col in reshape_data]

expect = pa.Table.from_arrays(
tiled_data, schema=plc.interop.to_arrow(reshape_plc_tbl).schema
)

assert_table_eq(res, expect)

0 comments on commit 8458306

Please sign in to comment.