Skip to content

Commit

Permalink
apacheGH-40968: [C++][Gandiva] add RE2::Options set_dot_nl(true) for …
Browse files Browse the repository at this point in the history
…Like function
  • Loading branch information
xxlaykxx committed Jul 9, 2024
1 parent dab815a commit b150aca
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 14 deletions.
13 changes: 8 additions & 5 deletions cpp/src/gandiva/regex_functions_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,14 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
"'like' function requires a string literal as the second parameter"));

RE2::Options regex_op;
regex_op.set_dot_nl(true);
if (node.descriptor()->name() == "ilike") {
regex_op.set_case_sensitive(false); // set case-insensitive for ilike function.

return Make(std::get<std::string>(literal->holder()), holder, regex_op);
}
if (node.children().size() == 2) {
return Make(std::get<std::string>(literal->holder()), holder);
return Make(std::get<std::string>(literal->holder()), holder, regex_op);
} else {
auto escape_char = dynamic_cast<LiteralNode*>(node.children().at(2).get());
ARROW_RETURN_IF(
Expand All @@ -118,7 +119,7 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
Status::Invalid(
"'like' function requires a string literal as the third parameter"));
return Make(std::get<std::string>(literal->holder()),
std::get<std::string>(escape_char->holder()), holder);
std::get<std::string>(escape_char->holder()), holder, regex_op);
}
}

Expand All @@ -127,7 +128,9 @@ Status LikeHolder::Make(const std::string& sql_pattern,
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
RE2::Options regex_op;
regex_op.set_dot_nl(true);
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern,
"' failed with: ", lholder->regex_.error()));
Expand All @@ -137,7 +140,7 @@ Status LikeHolder::Make(const std::string& sql_pattern,
}

Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder) {
std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
ARROW_RETURN_IF(escape_char.length() > 1,
Status::Invalid("The length of escape char ", escape_char,
" in 'like' function is greater than 1"));
Expand All @@ -149,7 +152,7 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));
}

auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern));
auto lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));
ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern,
"' failed with: ", lholder->regex_.error()));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/gandiva/regex_functions_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder);

static Status Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder);
std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op);

static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
RE2::Options regex_op);
Expand Down
34 changes: 26 additions & 8 deletions cpp/src/gandiva/regex_functions_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ namespace gandiva {
class TestLikeHolder : public ::testing::Test {
public:
RE2::Options regex_op;
void SetUp() { regex_op.set_dot_nl(true); }

FunctionNode BuildLike(std::string pattern) {
auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
auto pattern_node =
Expand Down Expand Up @@ -75,6 +77,15 @@ TEST_F(TestLikeHolder, TestMatchOne) {
EXPECT_FALSE(like("dabc"));
}

TEST_F(TestLikeHolder, TestPcreSpecialWithNewLine) {
std::shared_ptr<LikeHolder> like_holder;
auto status = LikeHolder::Make("%Space1.%", &like_holder, regex_op);

auto& like = *like_holder;
EXPECT_TRUE(
like("[name: \"Space1.protect\"\nargs: \"count\"\ncolumn_name: \"pass_count\"]"));
}

TEST_F(TestLikeHolder, TestPcreSpecial) {
std::shared_ptr<LikeHolder> like_holder;

Expand Down Expand Up @@ -104,17 +115,25 @@ TEST_F(TestLikeHolder, TestDot) {
EXPECT_FALSE(like("abcd"));
}

TEST_F(TestLikeHolder, TestMatchWithNewLine) {
std::shared_ptr<LikeHolder> like_holder;
auto status = LikeHolder::Make("%abc%", "\\", &like_holder, regex_op);

auto& like = *like_holder;
EXPECT_TRUE(like("abc\nd"));
}

TEST_F(TestLikeHolder, TestMatchSubString) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("%abc%", "\\", &like_holder);
auto status = LikeHolder::Make("%abc%", "\\", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_TRUE(like("abc"));
EXPECT_FALSE(like("xxabdc"));

status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder);
status = LikeHolder::Make("%ab-.^$*+?()[]{}|—/c\\%%", "\\", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like_reserved_char = *like_holder;
Expand Down Expand Up @@ -192,7 +211,7 @@ TEST_F(TestLikeHolder, TestOptimise) {
TEST_F(TestLikeHolder, TestMatchOneEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", "\\", &like_holder);
auto status = LikeHolder::Make("ab\\_", "\\", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -209,8 +228,7 @@ TEST_F(TestLikeHolder, TestMatchOneEscape) {
TEST_F(TestLikeHolder, TestMatchManyEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\%", "\\", &like_holder);
EXPECT_EQ(status.ok(), true) << status.message();
auto status = LikeHolder::Make("ab\\%", "\\", &like_holder, regex_op);

auto& like = *like_holder;

Expand All @@ -226,7 +244,7 @@ TEST_F(TestLikeHolder, TestMatchManyEscape) {
TEST_F(TestLikeHolder, TestMatchEscape) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder);
auto status = LikeHolder::Make("ab\\\\", "\\", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -239,7 +257,7 @@ TEST_F(TestLikeHolder, TestMatchEscape) {
TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", "", &like_holder);
auto status = LikeHolder::Make("ab\\_", "", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -254,7 +272,7 @@ TEST_F(TestLikeHolder, TestEmptyEscapeChar) {
TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder, regex_op);
EXPECT_EQ(status.ok(), false) << status.message();
}

Expand Down

0 comments on commit b150aca

Please sign in to comment.