Skip to content

Commit

Permalink
clean up code, fix more parsing issues, and add more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
zhicwu committed Aug 28, 2023
1 parent 800e8ef commit fcd4347
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 48 deletions.
101 changes: 58 additions & 43 deletions clickhouse-data/src/main/java/com/clickhouse/data/ClickHouseUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -830,9 +830,7 @@ public static Object parseJson(String json) {
value = list.get(0);
}

if (hasValue) {
break;
}
break;
}

if (!hasValue) {
Expand Down Expand Up @@ -1121,24 +1119,33 @@ public static int skipSingleLineComment(String args, int startIndex, int len) {
* Skip nested multi-line comment.
*
* @param args non-null string to scan
* @param startIndex start index, optionally start of the multi-line comment
* @param startIndex start index, MUST AFTER the beginning of the multi-line
* comment
* @param len end index, usually length of the given string
* @return index next to end of the outter most multi-line comment
* @throws IllegalArgumentException when multi-line comment is unclosed
*/
public static int skipMultiLineComment(String args, int startIndex, int len) {
int openIndex = args.indexOf("/*", startIndex);
if (openIndex == startIndex) {
openIndex = args.indexOf("/*", startIndex + 2);
}
int closeIndex = args.indexOf("*/", startIndex);
int commentLevel = 1;

if (closeIndex < startIndex) {
throw new IllegalArgumentException("Unclosed multi-line comment");
for (int i = startIndex; i < len; i++) {
char ch = args.charAt(i);
boolean hasNext = i < len - 1;
if (ch == '/' && hasNext && args.charAt(i + 1) == '*') {
i++;
commentLevel++;
} else if (ch == '*' && hasNext && args.charAt(i + 1) == '/') {
i++;
if (--commentLevel == 0) {
return i + 1;
}
}
if (commentLevel <= 0) {
break;
}
}

return openIndex < startIndex || openIndex > closeIndex ? closeIndex + 2
: skipMultiLineComment(args, closeIndex + 2, len);
throw new IllegalArgumentException("Unclosed multi-line comment");
}

/**
Expand Down Expand Up @@ -1171,7 +1178,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, char..
i = skipQuotedString(args, i, len, ch) - 1;
} else if (isOpenBracket(ch)) {
i = skipBrackets(args, i, len, ch) - 1;
} else if (i + 1 < len) {
} else if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand Down Expand Up @@ -1255,7 +1262,6 @@ public static int skipContentsUntil(String args, int startIndex, int len, String
String keyword = keywords[j];
if (keyword == null || keyword.isEmpty()) {
index++;
continue;
} else {
int klen = keyword.length();
if (index + klen >= len) {
Expand All @@ -1269,9 +1275,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, String
break;
} else {
char ch = args.charAt(i);
if (Character.isWhitespace(ch)) {
continue;
} else if (i + 1 < len) {
if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand All @@ -1280,7 +1284,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, String
} else {
return len;
}
} else {
} else if (!Character.isWhitespace(ch)) {
return len;
}
}
Expand All @@ -1299,7 +1303,6 @@ public static int readNameOrQuotedString(String args, int startIndex, int len, S
if (++i < len) {
builder.append(args.charAt(i));
}
continue;
} else if (isQuote(ch)) {
if (ch == quote) {
if (i + 1 < len && args.charAt(i + 1) == ch) {
Expand Down Expand Up @@ -1334,9 +1337,7 @@ public static int readEnumValues(String args, int startIndex, int len, Map<Strin
StringBuilder builder = new StringBuilder();
for (int i = startIndex; i < len; i++) {
char ch = args.charAt(i);
if (Character.isWhitespace(ch)) {
continue;
} else if (ch == '\'') {
if (ch == '\'') {
i = readNameOrQuotedString(args, i, len, builder);
name = builder.toString();
builder.setLength(0);
Expand All @@ -1345,28 +1346,23 @@ public static int readEnumValues(String args, int startIndex, int len, Map<Strin
if (index >= i) {
for (i = index + 1; i < len; i++) {
ch = args.charAt(i);
if (Character.isWhitespace(ch)) {
continue;
} else if (ch >= '0' && ch <= '9') {
if (ch >= '0' && ch <= '9') {
builder.append(ch);
} else if (ch == ',') {
values.put(name, Integer.parseInt(builder.toString()));
builder.setLength(0);
name = null;
break;
} else if (ch == ')') {
values.put(name, Integer.parseInt(builder.toString()));
return i + 1;
} else {
} else if (!Character.isWhitespace(ch)) {
throw new IllegalArgumentException("Invalid character when reading enum");
}
}

continue;
} else {
throw new IllegalArgumentException("Expect = after enum value but not found");
}
} else {
} else if (!Character.isWhitespace(ch)) {
throw new IllegalArgumentException("Invalid enum declaration");
}
}
Expand All @@ -1388,9 +1384,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer<
if (ch == '[') {
startIndex = i + 1;
break;
} else if (Character.isWhitespace(ch)) {
continue;
} else if (i + 1 < len) {
} else if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand All @@ -1400,7 +1394,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer<
startIndex = i;
break;
}
} else {
} else if (!Character.isWhitespace(ch)) {
startIndex = i;
break;
}
Expand Down Expand Up @@ -1437,7 +1431,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer<
String str = builder.toString();
func.accept(str.isEmpty() || ClickHouseValues.NULL_EXPR.equalsIgnoreCase(str) ? null : str);
builder.setLength(0);
} else if (i + 1 < len) {
} else if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand Down Expand Up @@ -1469,9 +1463,7 @@ public static int readParameters(String args, int startIndex, int len, List<Stri
if (ch == '(') {
startIndex = i + 1;
break;
} else if (Character.isWhitespace(ch)) {
continue;
} else if (i + 1 < len) {
} else if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand All @@ -1481,16 +1473,33 @@ public static int readParameters(String args, int startIndex, int len, List<Stri
startIndex = i;
break;
}
} else {
} else if (!Character.isWhitespace(ch)) {
startIndex = i;
break;
}
}

boolean expectWs = false;
for (int i = startIndex; i < len; i++) {
char ch = args.charAt(i);
if (Character.isWhitespace(ch) && stack.isEmpty()) {
continue;
if (Character.isWhitespace(ch)) {
if (builder.length() > 0) {
for (int j = i + 1; j < len; j++) {
ch = args.charAt(j);
if (ch == ',' || ch == '=' || ch == '-' || ch == '/' || isOpenBracket(ch)
|| isCloseBracket(ch)) {
i = j - 1;
break;
} else if (!Character.isWhitespace(ch)) {
i = j - 1;
if (expectWs) {
builder.append(' ');
}
break;
}
}
}
expectWs = false;
} else if (isQuote(ch)) {
builder.append(ch);
for (int j = i + 1; j < len; j++) {
Expand All @@ -1506,10 +1515,12 @@ public static int readParameters(String args, int startIndex, int len, List<Stri
}
}
}
expectWs = false;
} else if (isOpenBracket(ch)) {
builder.append(ch);
stack.push(closeBracket);
closeBracket = getCloseBracket(ch);
expectWs = false;
} else if (ch == closeBracket) {
if (stack.isEmpty()) {
len = i + 1;
Expand All @@ -1518,14 +1529,16 @@ public static int readParameters(String args, int startIndex, int len, List<Stri
builder.append(ch);
closeBracket = stack.pop();
}
expectWs = false;
} else if (ch == ',') {
if (!stack.isEmpty()) {
builder.append(ch);
} else {
params.add(builder.toString());
builder.setLength(0);
}
} else if (i + 1 < len) {
expectWs = false;
} else if ((ch == '-' || ch == '/') && i + 1 < len) {
char nextCh = args.charAt(i + 1);
if (ch == '-' && nextCh == '-') {
i = skipSingleLineComment(args, i + 2, len) - 1;
Expand All @@ -1534,8 +1547,10 @@ public static int readParameters(String args, int startIndex, int len, List<Stri
} else {
builder.append(ch);
}
expectWs = false;
} else {
builder.append(ch);
expectWs = ch != '=';
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ public void testAggregationFunction() {
Assert.assertTrue(column.isAggregateFunction());
Assert.assertEquals(column.getDataType(), ClickHouseDataType.AggregateFunction);
Assert.assertEquals(column.getAggregateFunction(), ClickHouseAggregateFunction.quantiles);
Assert.assertEquals(column.getFunction(), "quantiles(0.5, 0.9)");
Assert.assertEquals(column.getFunction(), "quantiles(0.5,0.9)");
Assert.assertEquals(column.getNestedColumns(),
Collections.singletonList(ClickHouseColumn.of("", "Nullable(UInt64)")));
Assert.assertFalse(column.isFixedLength(), "Should not have fixed length in byte");
Expand Down Expand Up @@ -276,6 +276,24 @@ public void testSimpleAggregationFunction() {
ClickHouseColumn c = ClickHouseColumn.of("a", "SimpleAggregateFunction(max, UInt64)");
Assert.assertEquals(c.getDataType(), ClickHouseDataType.SimpleAggregateFunction);
Assert.assertEquals(c.getNestedColumns().get(0).getDataType(), ClickHouseDataType.UInt64);

// https://github.com/ClickHouse/clickhouse-java/issues/1389
c = ClickHouseColumn.of("a", "SimpleAggregateFunction(anyLast, Nested(a String, b String))");
Assert.assertEquals(c.getDataType(), ClickHouseDataType.SimpleAggregateFunction);
Assert.assertEquals(c.getNestedColumns().get(0).getDataType(), ClickHouseDataType.Nested);
Assert.assertEquals(c.getNestedColumns().get(0).getNestedColumns(),
ClickHouseColumn.parse("a String, b String"));
Assert.assertEquals(
ClickHouseColumn.of("a", "SimpleAggregateFunction ( anyLast , Nested ( a String , b String ) )")
.getParameters(),
c.getParameters());
Assert.assertEquals(
ClickHouseColumn.of("a",
"SimpleAggregateFunction(anyLast,Nested(a String,b String,`c c` Nested(d Int32, e Tuple(UInt32, Nullable(String)))))")
.getParameters(),
ClickHouseColumn.of("a",
" SimpleAggregateFunction ( /** test **/anyLast -- test\n , Nested ( a String , b String,\n\t `c c` \t Nested(d Int32, e Tuple(UInt32, Nullable(String))) ) )")
.getParameters());
}

@Test(groups = { "unit" })
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,19 @@ public void testSkipSingleLineComment() {

@Test(groups = { "unit" })
public void testSkipMultipleLineComment() {
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("", 0, 0));
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/", 0, 1));
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*", 0, 2));
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/**", 0, 3));
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*/*/", 0, 5));
Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*/**/", 0, 6));
Assert.assertThrows(IllegalArgumentException.class,
() -> ClickHouseUtils.skipMultiLineComment("/*/***/", 0, 7));

Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/**/", 1, 4), 4);
Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/*/**/*/", 2, 8), 8);
Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/*/*/**/*/*/", 2, 12), 12);

String args = "select 1 /* select 1/*one*/ -- a */, 2";
Assert.assertEquals(ClickHouseUtils.skipMultiLineComment(args, 11, args.length()),
args.lastIndexOf("*/") + 2);
Expand Down Expand Up @@ -459,23 +472,23 @@ public void testReadParameters() {
List<String> params = new LinkedList<>();
Assert.assertEquals(ClickHouseUtils.readParameters(args, args.indexOf('('), args.length(), params),
args.lastIndexOf(')') + 1);
Assert.assertEquals(params, Arrays.asList("quantiles(0.5, 'c \\'''([1],2) d',0.9)", "UInt64"));
Assert.assertEquals(params, Arrays.asList("quantiles(0.5,'c \\'''([1],2) d',0.9)", "UInt64"));

params.clear();
args = " ('a'/* a*/, 1-- test\n, b)";
Assert.assertEquals(ClickHouseUtils.readParameters(args, 0, args.length(), params), args.length());
Assert.assertEquals(params, Arrays.asList("'a'", "1", "b"));

params.clear();
args = " a, b c";
args = " a = 2 -- enum value\n, /** type declaration **/ b c , `d` /*e*/ --f";
Assert.assertEquals(ClickHouseUtils.readParameters(args, 0, args.length(), params), args.length());
Assert.assertEquals(params, Arrays.asList("a", "bc"));
Assert.assertEquals(params, Arrays.asList("a=2", "b c", "`d`"));

params.clear();
args = "column1 SimpleAggregateFunction(anyLast, Nested(a string, b string))";
Assert.assertEquals(ClickHouseUtils.readParameters(args, args.indexOf('('), args.length(), params),
args.lastIndexOf(')') + 1);
Assert.assertEquals(params, Arrays.asList("anyLast", "Nested(a string, b string)"));
Assert.assertEquals(params, Arrays.asList("anyLast", "Nested(a string,b string)"));
}

@Test(groups = { "unit" })
Expand Down

0 comments on commit fcd4347

Please sign in to comment.