Skip to content

Commit

Permalink
Disable tokenizing the path by dot (oap-project#109)
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo authored and zhejiangxiaomai committed Mar 29, 2023
1 parent 73a86ee commit 7b45b43
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions velox/type/Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ std::unique_ptr<Subfield::PathElement> Tokenizer::computeNext() {
return nullptr;
}

// No need to tokenizing the path by dot because Spark treats dot as regular character.
/*
if (tryMatch(DOT)) {
std::unique_ptr<Subfield::PathElement> token = matchPathSegment();
firstSegment = false;
return token;
}
}*/

if (tryMatch(OPEN_BRACKET)) {
std::unique_ptr<Subfield::PathElement> token = tryMatch(QUOTE)
Expand Down Expand Up @@ -144,8 +146,9 @@ std::unique_ptr<Subfield::PathElement> Tokenizer::matchUnquotedSubscript() {
}

bool Tokenizer::isUnquotedPathCharacter(char c) {
// Add dot here because Spark treats dot as regular character.
return c == ':' || c == '$' || c == '-' || c == '/' || c == '@' || c == '|' ||
c == '#' || isUnquotedSubscriptCharacter(c);
c == '#' || c == '.' || isUnquotedSubscriptCharacter(c);
}

bool Tokenizer::isUnquotedSubscriptCharacter(char c) {
Expand Down

0 comments on commit 7b45b43

Please sign in to comment.