From fcd43473e61ffabf8c7e9836f0bb747611a0c501 Mon Sep 17 00:00:00 2001 From: Zhichun Wu Date: Mon, 28 Aug 2023 17:50:18 +0800 Subject: [PATCH] clean up code, fix more parsing issues, and add more tests --- .../com/clickhouse/data/ClickHouseUtils.java | 101 ++++++++++-------- .../clickhouse/data/ClickHouseColumnTest.java | 20 +++- .../clickhouse/data/ClickHouseUtilsTest.java | 21 +++- 3 files changed, 94 insertions(+), 48 deletions(-) diff --git a/clickhouse-data/src/main/java/com/clickhouse/data/ClickHouseUtils.java b/clickhouse-data/src/main/java/com/clickhouse/data/ClickHouseUtils.java index 7d533329f..817a232c4 100644 --- a/clickhouse-data/src/main/java/com/clickhouse/data/ClickHouseUtils.java +++ b/clickhouse-data/src/main/java/com/clickhouse/data/ClickHouseUtils.java @@ -830,9 +830,7 @@ public static Object parseJson(String json) { value = list.get(0); } - if (hasValue) { - break; - } + break; } if (!hasValue) { @@ -1121,24 +1119,33 @@ public static int skipSingleLineComment(String args, int startIndex, int len) { * Skip nested multi-line comment. * * @param args non-null string to scan - * @param startIndex start index, optionally start of the multi-line comment + * @param startIndex start index, MUST AFTER the beginning of the multi-line + * comment * @param len end index, usually length of the given string * @return index next to end of the outter most multi-line comment * @throws IllegalArgumentException when multi-line comment is unclosed */ public static int skipMultiLineComment(String args, int startIndex, int len) { - int openIndex = args.indexOf("/*", startIndex); - if (openIndex == startIndex) { - openIndex = args.indexOf("/*", startIndex + 2); - } - int closeIndex = args.indexOf("*/", startIndex); + int commentLevel = 1; - if (closeIndex < startIndex) { - throw new IllegalArgumentException("Unclosed multi-line comment"); + for (int i = startIndex; i < len; i++) { + char ch = args.charAt(i); + boolean hasNext = i < len - 1; + if (ch == '/' && hasNext && args.charAt(i + 1) == '*') { + i++; + commentLevel++; + } else if (ch == '*' && hasNext && args.charAt(i + 1) == '/') { + i++; + if (--commentLevel == 0) { + return i + 1; + } + } + if (commentLevel <= 0) { + break; + } } - return openIndex < startIndex || openIndex > closeIndex ? closeIndex + 2 - : skipMultiLineComment(args, closeIndex + 2, len); + throw new IllegalArgumentException("Unclosed multi-line comment"); } /** @@ -1171,7 +1178,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, char.. i = skipQuotedString(args, i, len, ch) - 1; } else if (isOpenBracket(ch)) { i = skipBrackets(args, i, len, ch) - 1; - } else if (i + 1 < len) { + } else if ((ch == '-' || ch == '/') && i + 1 < len) { char nextCh = args.charAt(i + 1); if (ch == '-' && nextCh == '-') { i = skipSingleLineComment(args, i + 2, len) - 1; @@ -1255,7 +1262,6 @@ public static int skipContentsUntil(String args, int startIndex, int len, String String keyword = keywords[j]; if (keyword == null || keyword.isEmpty()) { index++; - continue; } else { int klen = keyword.length(); if (index + klen >= len) { @@ -1269,9 +1275,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, String break; } else { char ch = args.charAt(i); - if (Character.isWhitespace(ch)) { - continue; - } else if (i + 1 < len) { + if ((ch == '-' || ch == '/') && i + 1 < len) { char nextCh = args.charAt(i + 1); if (ch == '-' && nextCh == '-') { i = skipSingleLineComment(args, i + 2, len) - 1; @@ -1280,7 +1284,7 @@ public static int skipContentsUntil(String args, int startIndex, int len, String } else { return len; } - } else { + } else if (!Character.isWhitespace(ch)) { return len; } } @@ -1299,7 +1303,6 @@ public static int readNameOrQuotedString(String args, int startIndex, int len, S if (++i < len) { builder.append(args.charAt(i)); } - continue; } else if (isQuote(ch)) { if (ch == quote) { if (i + 1 < len && args.charAt(i + 1) == ch) { @@ -1334,9 +1337,7 @@ public static int readEnumValues(String args, int startIndex, int len, Map= i) { for (i = index + 1; i < len; i++) { ch = args.charAt(i); - if (Character.isWhitespace(ch)) { - continue; - } else if (ch >= '0' && ch <= '9') { + if (ch >= '0' && ch <= '9') { builder.append(ch); } else if (ch == ',') { values.put(name, Integer.parseInt(builder.toString())); builder.setLength(0); - name = null; break; } else if (ch == ')') { values.put(name, Integer.parseInt(builder.toString())); return i + 1; - } else { + } else if (!Character.isWhitespace(ch)) { throw new IllegalArgumentException("Invalid character when reading enum"); } } - - continue; } else { throw new IllegalArgumentException("Expect = after enum value but not found"); } - } else { + } else if (!Character.isWhitespace(ch)) { throw new IllegalArgumentException("Invalid enum declaration"); } } @@ -1388,9 +1384,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer< if (ch == '[') { startIndex = i + 1; break; - } else if (Character.isWhitespace(ch)) { - continue; - } else if (i + 1 < len) { + } else if ((ch == '-' || ch == '/') && i + 1 < len) { char nextCh = args.charAt(i + 1); if (ch == '-' && nextCh == '-') { i = skipSingleLineComment(args, i + 2, len) - 1; @@ -1400,7 +1394,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer< startIndex = i; break; } - } else { + } else if (!Character.isWhitespace(ch)) { startIndex = i; break; } @@ -1437,7 +1431,7 @@ public static int readValueArray(String args, int startIndex, int len, Consumer< String str = builder.toString(); func.accept(str.isEmpty() || ClickHouseValues.NULL_EXPR.equalsIgnoreCase(str) ? null : str); builder.setLength(0); - } else if (i + 1 < len) { + } else if ((ch == '-' || ch == '/') && i + 1 < len) { char nextCh = args.charAt(i + 1); if (ch == '-' && nextCh == '-') { i = skipSingleLineComment(args, i + 2, len) - 1; @@ -1469,9 +1463,7 @@ public static int readParameters(String args, int startIndex, int len, List 0) { + for (int j = i + 1; j < len; j++) { + ch = args.charAt(j); + if (ch == ',' || ch == '=' || ch == '-' || ch == '/' || isOpenBracket(ch) + || isCloseBracket(ch)) { + i = j - 1; + break; + } else if (!Character.isWhitespace(ch)) { + i = j - 1; + if (expectWs) { + builder.append(' '); + } + break; + } + } + } + expectWs = false; } else if (isQuote(ch)) { builder.append(ch); for (int j = i + 1; j < len; j++) { @@ -1506,10 +1515,12 @@ public static int readParameters(String args, int startIndex, int len, List ClickHouseUtils.skipMultiLineComment("", 0, 0)); + Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/", 0, 1)); + Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*", 0, 2)); + Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/**", 0, 3)); + Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*/*/", 0, 5)); + Assert.assertThrows(IllegalArgumentException.class, () -> ClickHouseUtils.skipMultiLineComment("/*/**/", 0, 6)); + Assert.assertThrows(IllegalArgumentException.class, + () -> ClickHouseUtils.skipMultiLineComment("/*/***/", 0, 7)); + + Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/**/", 1, 4), 4); + Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/*/**/*/", 2, 8), 8); + Assert.assertEquals(ClickHouseUtils.skipMultiLineComment("/*/*/**/*/*/", 2, 12), 12); + String args = "select 1 /* select 1/*one*/ -- a */, 2"; Assert.assertEquals(ClickHouseUtils.skipMultiLineComment(args, 11, args.length()), args.lastIndexOf("*/") + 2); @@ -459,7 +472,7 @@ public void testReadParameters() { List params = new LinkedList<>(); Assert.assertEquals(ClickHouseUtils.readParameters(args, args.indexOf('('), args.length(), params), args.lastIndexOf(')') + 1); - Assert.assertEquals(params, Arrays.asList("quantiles(0.5, 'c \\'''([1],2) d',0.9)", "UInt64")); + Assert.assertEquals(params, Arrays.asList("quantiles(0.5,'c \\'''([1],2) d',0.9)", "UInt64")); params.clear(); args = " ('a'/* a*/, 1-- test\n, b)"; @@ -467,15 +480,15 @@ public void testReadParameters() { Assert.assertEquals(params, Arrays.asList("'a'", "1", "b")); params.clear(); - args = " a, b c"; + args = " a = 2 -- enum value\n, /** type declaration **/ b c , `d` /*e*/ --f"; Assert.assertEquals(ClickHouseUtils.readParameters(args, 0, args.length(), params), args.length()); - Assert.assertEquals(params, Arrays.asList("a", "bc")); + Assert.assertEquals(params, Arrays.asList("a=2", "b c", "`d`")); params.clear(); args = "column1 SimpleAggregateFunction(anyLast, Nested(a string, b string))"; Assert.assertEquals(ClickHouseUtils.readParameters(args, args.indexOf('('), args.length(), params), args.lastIndexOf(')') + 1); - Assert.assertEquals(params, Arrays.asList("anyLast", "Nested(a string, b string)")); + Assert.assertEquals(params, Arrays.asList("anyLast", "Nested(a string,b string)")); } @Test(groups = { "unit" })