Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename strings multiple target replace API #15898

Merged
merged 3 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/benchmarks/string/replace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ static void BM_replace(benchmark::State& state, replace_type rt)
case scalar: cudf::strings::replace(input, target, repl); break;
case slice: cudf::strings::replace_slice(input, repl, 1, 10); break;
case multi:
cudf::strings::replace(
cudf::strings::replace_multiple(
input, cudf::strings_column_view(targets), cudf::strings_column_view(repls));
break;
}
Expand Down
12 changes: 6 additions & 6 deletions cpp/include/cudf/strings/detail/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ std::unique_ptr<column> replace(strings_column_view const& strings,
rmm::device_async_resource_ref mr);

/**
* @copydoc cudf::strings::replace(strings_column_view const&, strings_column_view const&,
* @copydoc cudf::strings::replace_multiple(strings_column_view const&, strings_column_view const&,
* strings_column_view const&, rmm::cuda_stream_view, rmm::device_async_resource_ref)
*/
std::unique_ptr<column> replace(strings_column_view const& strings,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
std::unique_ptr<column> replace_mutiple(strings_column_view const& strings,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @brief Replaces any null string entries with the given string.
Expand Down
14 changes: 13 additions & 1 deletion cpp/include/cudf/strings/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,19 @@ std::unique_ptr<column> replace_slice(
* @param mr Device memory resource used to allocate the returned column's device memory
* @return New strings column
*/
std::unique_ptr<column> replace(
std::unique_ptr<column> replace_multiple(
strings_column_view const& input,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
vyasr marked this conversation as resolved.
Show resolved Hide resolved

/**
* @copydoc cudf::strings::replace_multiple
*
* @deprecated since 24.08
*/
[[deprecated]] std::unique_ptr<column> replace(
strings_column_view const& input,
strings_column_view const& targets,
strings_column_view const& repls,
Expand Down
23 changes: 17 additions & 6 deletions cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -499,11 +499,11 @@ std::unique_ptr<column> replace_string_parallel(strings_column_view const& input

} // namespace

std::unique_ptr<column> replace(strings_column_view const& input,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
std::unique_ptr<column> replace_multiple(strings_column_view const& input,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
if (input.is_empty()) { return make_empty_column(type_id::STRING); }
CUDF_EXPECTS(((targets.size() > 0) && (targets.null_count() == 0)),
Expand All @@ -524,14 +524,25 @@ std::unique_ptr<column> replace(strings_column_view const& input,

// external API

std::unique_ptr<column> replace_multiple(strings_column_view const& strings,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::replace_multiple(strings, targets, repls, stream, mr);
}

// deprecated in 24.08
std::unique_ptr<column> replace(strings_column_view const& strings,
strings_column_view const& targets,
strings_column_view const& repls,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
return detail::replace(strings, targets, repls, stream, mr);
return detail::replace_multiple(strings, targets, repls, stream, mr);
}

} // namespace strings
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/json/json_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ std::unique_ptr<cudf::column> drop_whitespace(cudf::column_view const& col)
cudf::strings_column_view strings(col);
cudf::strings_column_view targets(whitespace);
cudf::strings_column_view replacements(repl);
return cudf::strings::replace(strings, targets, replacements);
return cudf::strings::replace_multiple(strings, targets, replacements);
}

struct JsonPathTests : public cudf::test::BaseFixture {};
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/streams/strings/replace_test.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,7 +34,7 @@ TEST_F(StringsReplaceTest, Replace)
auto const target = cudf::string_scalar("é", true, cudf::test::get_default_stream());
auto const repl = cudf::string_scalar(" ", true, cudf::test::get_default_stream());
cudf::strings::replace(view, target, repl, -1, cudf::test::get_default_stream());
cudf::strings::replace(view, view, view, cudf::test::get_default_stream());
cudf::strings::replace_multiple(view, view, view, cudf::test::get_default_stream());
cudf::strings::replace_slice(view, repl, 1, 2, cudf::test::get_default_stream());

auto const pattern = std::string("[a-z]");
Expand Down
33 changes: 27 additions & 6 deletions cpp/tests/strings/replace_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,23 @@ TEST_F(StringsReplaceTest, ReplaceErrors)
EXPECT_THROW(cudf::strings::replace(sv, target, null_input), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace(sv, null_input, replacement), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace(sv, empty_input, replacement), cudf::logic_error);

auto const empty = cudf::test::strings_column_wrapper();
auto const ev = cudf::strings_column_view(empty);
auto const targets = cudf::test::strings_column_wrapper({"x"});
auto const tv = cudf::strings_column_view(targets);
auto const target_null = cudf::test::strings_column_wrapper({""}, {0});
auto const tv_null = cudf::strings_column_view(target_null);
auto const repls = cudf::test::strings_column_wrapper({"y", "z"});
auto const rv = cudf::strings_column_view(repls);
auto const repl_null = cudf::test::strings_column_wrapper({""}, {0});
auto const rv_null = cudf::strings_column_view(repl_null);

EXPECT_THROW(cudf::strings::replace_multiple(sv, ev, rv), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace_multiple(sv, tv_null, rv), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace_multiple(sv, tv, ev), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace_multiple(sv, tv, rv_null), cudf::logic_error);
EXPECT_THROW(cudf::strings::replace_multiple(sv, tv, rv), cudf::logic_error);
}

TEST_F(StringsReplaceTest, ReplaceSlice)
Expand Down Expand Up @@ -341,7 +358,7 @@ TEST_F(StringsReplaceTest, ReplaceMulti)
cudf::test::strings_column_wrapper repls({"_ ", "A ", "2 "});
auto repls_view = cudf::strings_column_view(repls);

auto results = cudf::strings::replace(strings_view, targets_view, repls_view);
auto results = cudf::strings::replace_multiple(strings_view, targets_view, repls_view);

std::vector<char const*> h_expected{"_ quick brown fox jumps over _ lazy dog",
"_ fat cat lays next 2 _ other accénted cat",
Expand All @@ -361,7 +378,7 @@ TEST_F(StringsReplaceTest, ReplaceMulti)
cudf::test::strings_column_wrapper repls({"* "});
auto repls_view = cudf::strings_column_view(repls);

auto results = cudf::strings::replace(strings_view, targets_view, repls_view);
auto results = cudf::strings::replace_multiple(strings_view, targets_view, repls_view);

std::vector<char const*> h_expected{"* quick brown fox jumps over * lazy dog",
"* fat cat lays next * * other accénted cat",
Expand Down Expand Up @@ -422,7 +439,7 @@ TEST_F(StringsReplaceTest, ReplaceMultiLong)
cudf::test::strings_column_wrapper repls({"x", "PEAR", "avocado", "$$"});
auto repls_view = cudf::strings_column_view(repls);

auto results = cudf::strings::replace(strings_view, targets_view, repls_view);
auto results = cudf::strings::replace_multiple(strings_view, targets_view, repls_view);

cudf::test::strings_column_wrapper expected(
{"This string needs to be very long to trigger the long-replace internal functions. "
Expand Down Expand Up @@ -454,7 +471,7 @@ TEST_F(StringsReplaceTest, ReplaceMultiLong)
cudf::test::strings_column_wrapper repls({"*"});
auto repls_view = cudf::strings_column_view(repls);

auto results = cudf::strings::replace(strings_view, targets_view, repls_view);
auto results = cudf::strings::replace_multiple(strings_view, targets_view, repls_view);

cudf::test::strings_column_wrapper expected(
{"This string needs to be very long to trigger the long-replace internal functions. "
Expand Down Expand Up @@ -494,7 +511,7 @@ TEST_F(StringsReplaceTest, ReplaceMultiLong)
auto repls = cudf::test::strings_column_wrapper({""});
auto repls_view = cudf::strings_column_view(repls);

auto results = cudf::strings::replace(strings_view, targets_view, repls_view);
auto results = cudf::strings::replace_multiple(strings_view, targets_view, repls_view);

cudf::test::strings_column_wrapper expected(
{"This string needs to be very long to trigger the long-replace internal functions. "
Expand Down Expand Up @@ -522,6 +539,10 @@ TEST_F(StringsReplaceTest, EmptyStringsColumn)
auto strings_view = cudf::strings_column_view(zero_size_strings_column);
auto results = cudf::strings::replace(
strings_view, cudf::string_scalar("not"), cudf::string_scalar("pertinent"));
auto view = results->view();
cudf::test::expect_column_empty(results->view());

auto const target = cudf::test::strings_column_wrapper({"x"});
auto const target_view = cudf::strings_column_view(target);
results = cudf::strings::replace_multiple(strings_view, target_view, target_view);
cudf::test::expect_column_empty(results->view());
}
2 changes: 1 addition & 1 deletion java/src/main/native/src/ColumnViewJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1755,7 +1755,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_stringReplaceMulti(
cudf::strings_column_view scvtargets(*cvtargets);
cudf::column_view* cvrepls = reinterpret_cast<cudf::column_view*>(repls_cv);
cudf::strings_column_view scvrepls(*cvrepls);
return release_as_jlong(cudf::strings::replace(scv, scvtargets, scvrepls));
return release_as_jlong(cudf::strings::replace_multiple(scv, scvtargets, scvrepls));
}
CATCH_STD(env, 0);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ cdef extern from "cudf/strings/replace.hpp" namespace "cudf::strings" nogil:
string_scalar repl,
int32_t maxrepl) except +

cdef unique_ptr[column] replace(
cdef unique_ptr[column] replace_multiple(
column_view source_strings,
column_view target_strings,
column_view repl_strings) except +
3 changes: 2 additions & 1 deletion python/cudf/cudf/_lib/strings/replace.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.scalar.scalar cimport string_scalar
from cudf._lib.pylibcudf.libcudf.strings.replace cimport (
replace as cpp_replace,
replace_multiple as cpp_replace_multiple,
replace_slice as cpp_replace_slice,
)
from cudf._lib.pylibcudf.libcudf.types cimport size_type
Expand Down Expand Up @@ -126,7 +127,7 @@ def replace_multi(Column source_strings,
cdef column_view repl_view = repl_strings.view()

with nogil:
c_result = move(cpp_replace(
c_result = move(cpp_replace_multiple(
source_view,
target_view,
repl_view
Expand Down
Loading