Skip to content

Commit

Permalink
Migrate quantile.pxd to pylibcudf (#15874)
Browse files Browse the repository at this point in the history
xref #15162 

Migrate quantile.pxd to use pylibcudf APIs.

Authors:
  - Thomas Li (https://github.com/lithomas1)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: #15874
  • Loading branch information
lithomas1 committed Jun 6, 2024
1 parent 3468fa1 commit 5f45803
Show file tree
Hide file tree
Showing 12 changed files with 486 additions and 81 deletions.
4 changes: 3 additions & 1 deletion cpp/src/quantiles/quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <thrust/iterator/transform_iterator.h>

#include <memory>
#include <stdexcept>
#include <vector>

namespace cudf {
Expand Down Expand Up @@ -78,7 +79,8 @@ std::unique_ptr<table> quantiles(table_view const& input,

CUDF_EXPECTS(interp == interpolation::HIGHER || interp == interpolation::LOWER ||
interp == interpolation::NEAREST,
"multi-column quantiles require a non-arithmetic interpolation strategy.");
"multi-column quantiles require a non-arithmetic interpolation strategy.",
std::invalid_argument);

CUDF_EXPECTS(input.num_rows() > 0, "multi-column quantiles require at least one input row.");

Expand Down
9 changes: 6 additions & 3 deletions cpp/tests/quantiles/quantiles_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -25,6 +25,8 @@
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <stdexcept>

template <typename T>
struct QuantilesTest : public cudf::test::BaseFixture {};

Expand Down Expand Up @@ -104,9 +106,10 @@ TYPED_TEST(QuantilesTest, TestMultiColumnArithmeticInterpolation)
cudf::test::fixed_width_column_wrapper<T> input_b({});
auto input = cudf::table_view({input_a});

EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::LINEAR), cudf::logic_error);
EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::LINEAR), std::invalid_argument);

EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::MIDPOINT), cudf::logic_error);
EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::MIDPOINT),
std::invalid_argument);
}

TYPED_TEST(QuantilesTest, TestMultiColumnUnsorted)
Expand Down
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ This page provides API documentation for pylibcudf.
join
lists
merge
quantiles
reduce
reshape
rolling
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/quantiles.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
quantiles
=========

.. automodule:: cudf._lib.pylibcudf.quantiles
:members:
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set(cython_sources
join.pyx
lists.pyx
merge.pyx
quantiles.pyx
reduce.pyx
replace.pyx
reshape.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ from . cimport (
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -48,6 +49,7 @@ __all__ = [
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
join,
lists,
merge,
quantiles,
reduce,
replace,
reshape,
Expand Down Expand Up @@ -48,6 +49,7 @@
"join",
"lists",
"merge",
"quantiles",
"reduce",
"replace",
"rolling",
Expand Down
25 changes: 25 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.types cimport interpolation, sorted

from .column cimport Column
from .table cimport Table


cpdef Column quantile(
Column input,
vector[double] q,
interpolation interp = *,
Column ordered_indices = *,
bint exact = *
)

cpdef Table quantiles(
Table input,
vector[double] q,
interpolation interp = *,
sorted is_input_sorted = *,
list column_order = *,
list null_precedence = *,
)
152 changes: 152 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/quantiles.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp cimport bool
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from cudf._lib.pylibcudf.libcudf.column.column cimport column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.quantiles cimport (
quantile as cpp_quantile,
quantiles as cpp_quantiles,
)
from cudf._lib.pylibcudf.libcudf.table.table cimport table
from cudf._lib.pylibcudf.libcudf.types cimport null_order, order, sorted

from .column cimport Column
from .table cimport Table
from .types cimport interpolation


cpdef Column quantile(
Column input,
vector[double] q,
interpolation interp = interpolation.LINEAR,
Column ordered_indices = None,
bool exact=True
):
"""Computes quantiles with interpolation.
Computes the specified quantiles by interpolating values between which they lie,
using the interpolation strategy specified in interp.
Parameters
----------
input: Column
The Column to calculate quantiles on.
q: array-like that implements buffer-protocol
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.LINEAR
The strategy used to select between values adjacent to a specified quantile.
ordered_indices: Column, default empty column
The column containing the sorted order of input.
If empty, all input values are used in existing order.
Indices must be in range [0, input.size()), but are not required to be unique.
Values not indexed by this column will be ignored.
exact: bool, default True
Returns doubles if True. Otherwise, returns same type as input
For details, see :cpp:func:`quantile`.
Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[column] c_result
column_view ordered_indices_view

if ordered_indices is None:
ordered_indices_view = column_view()
else:
ordered_indices_view = ordered_indices.view()

with nogil:
c_result = move(
cpp_quantile(
input.view(),
q,
interp,
ordered_indices_view,
exact,
)
)

return Column.from_libcudf(move(c_result))


cpdef Table quantiles(
Table input,
vector[double] q,
interpolation interp = interpolation.NEAREST,
sorted is_input_sorted = sorted.NO,
list column_order = None,
list null_precedence = None,
):
"""Computes row quantiles with interpolation.
Computes the specified quantiles by retrieving the row corresponding to the
specified quantiles. In the event a quantile lies in between rows, the specified
interpolation strategy is used to pick between the rows.
Parameters
----------
input: Table
The Table to calculate row quantiles on.
q: array-like
The quantiles to calculate in range [0,1]
interp: Interpolation, default Interpolation.NEAREST
The strategy used to select between values adjacent to a specified quantile.
Must be a non-arithmetic interpolation strategy
(i.e. one of
{`Interpolation.HIGHER`, `Interpolation.LOWER`, `Interpolation.NEAREST`})
is_input_sorted: Sorted, default Sorted.NO
Whether the input table has been pre-sorted or not.
column_order: list, default None
A list of `Order` enums,
indicating the desired sort order for each column.
By default, will sort all columns so that they are in ascending order.
Ignored if `is_input_sorted` is `Sorted.YES`
null_precedence: list, default None
A list of `NullOrder` enums,
indicating how nulls should be sorted.
By default, will sort all columns so that nulls appear before
all other elements.
Ignored if `is_input_sorted` is `Sorted.YES`
For details, see :cpp:func:`quantiles`.
Returns
-------
Column
A Column containing specified quantiles, with nulls for indeterminable values
"""
cdef:
unique_ptr[table] c_result
vector[order] column_order_vec
vector[null_order] null_precedence_vec

if column_order is not None:
column_order_vec = column_order
if null_precedence is not None:
null_precedence_vec = null_precedence

with nogil:
c_result = move(
cpp_quantiles(
input.view(),
q,
interp,
is_input_sorted,
column_order_vec,
null_precedence_vec,
)
)

return Table.from_libcudf(move(c_result))
Loading

0 comments on commit 5f45803

Please sign in to comment.