Skip to content

Commit

Permalink
fix: allow reserved keywords on variables names (#6572)
Browse files Browse the repository at this point in the history
  • Loading branch information
spena authored Nov 16, 2020
1 parent ee31fde commit 2da360a
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ identifier

variableName
: IDENTIFIER
| nonReserved
;

variableValue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,14 @@
import io.confluent.ksql.parser.SqlBaseParser.SingleStatementContext;
import io.confluent.ksql.parser.exception.ParseFailedException;
import io.confluent.ksql.parser.tree.Statement;
import io.confluent.ksql.util.ParserUtil;
import java.util.List;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.stream.Collectors;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.PredictionMode;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.ParseCancellationException;
Expand All @@ -40,29 +35,7 @@
public class DefaultKsqlParser implements KsqlParser {
// CHECKSTYLE_RULES.ON: ClassDataAbstractionCoupling

private static final BaseErrorListener ERROR_LISTENER = new BaseErrorListener() {
@Override
public void syntaxError(
final Recognizer<?, ?> recognizer,
final Object offendingSymbol,
final int line,
final int charPositionInLine,
final String message,
final RecognitionException e
) {
if (offendingSymbol instanceof Token && isKeywordError(
message, ((Token) offendingSymbol).getText())) {
//Checks if the error is a reserved keyword error
final String tokenName = ((Token) offendingSymbol).getText();
final String newMessage =
"\"" + tokenName + "\" is a reserved keyword and it can't be used as an identifier."
+ " You can use it as an identifier by escaping it as \'" + tokenName + "\' ";
throw new ParsingException(newMessage, e, line, charPositionInLine);
} else {
throw new ParsingException(message, e, line, charPositionInLine);
}
}
};
private static final BaseErrorListener ERROR_VALIDATOR = new SyntaxErrorValidator();

@Override
public List<ParsedStatement> parse(final String sql) {
Expand Down Expand Up @@ -115,10 +88,10 @@ private static SqlBaseParser.StatementsContext getParseTree(final String sql) {
final SqlBaseParser sqlBaseParser = new SqlBaseParser(tokenStream);

sqlBaseLexer.removeErrorListeners();
sqlBaseLexer.addErrorListener(ERROR_LISTENER);
sqlBaseLexer.addErrorListener(ERROR_VALIDATOR);

sqlBaseParser.removeErrorListeners();
sqlBaseParser.addErrorListener(ERROR_LISTENER);
sqlBaseParser.addErrorListener(ERROR_VALIDATOR);

final Function<SqlBaseParser, ParserRuleContext> parseFunction = SqlBaseParser::statements;

Expand All @@ -143,16 +116,4 @@ private static String getStatementString(final SingleStatementContext singleStat
singleStatementContext.stop.getStopIndex()
));
}

/**
* checks if the error is a reserved keyword error by checking the message and offendingSymbol
* @param message the error message
* @param offendingSymbol the symbol that caused the error
* @return
*/
private static boolean isKeywordError(final String message, final String offendingSymbol) {
final Matcher m = ParserUtil.EXTRANEOUS_INPUT_PATTERN.matcher(message);

return m.find() && ParserUtil.isReserved(offendingSymbol);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright 2020 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.parser;

import com.google.common.collect.ImmutableMap;
import io.confluent.ksql.parser.SqlBaseParser.VariableNameContext;
import io.confluent.ksql.util.ParserKeywordValidatorUtil;
import io.confluent.ksql.util.ParserUtil;
import java.util.Map;
import java.util.function.BiFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;

/**
* This class is a syntax error validator used in the {@link DefaultKsqlParser} class, which
* validates errors during SQL parsing. It implements {@link BaseErrorListener} so it can be
* added as an error listener to the {@code SqlBaseLexer}.
* </p>
* This class is called for every error caused by ANTlr parsing. Some errors, like reserved keywords
* in identifiers for some statements, are valid and should be allowed in the syntax, and others
* errors should return a better error message.
*/
public class SyntaxErrorValidator extends BaseErrorListener {
private static final Pattern EXTRANEOUS_INPUT_PATTERN = Pattern.compile(
"extraneous input.*expecting.*");
private static final Pattern MISMATCHED_INPUT_PATTERN = Pattern.compile(
"mismatched input.*expecting.*");

// List of validators per ANTLr context.
private static final Map<Class<?>, BiFunction<String, RecognitionException, Boolean>>
ERROR_VALIDATORS = ImmutableMap.of(
VariableNameContext.class, SyntaxErrorValidator::isValidVariableName
);

/**
* Check if a variable name uses a SQL reserved keyword that should be valid as a variable.
* </p>
* Valid reserved keywords in identifiers are not easy to configure in ANTLr {@code SqlBase.g4}.
* ANTLr rules must specify what a reserved keyword is so they are allowed (see
* nonReservedKeyword in {@code SqlBase.g4}), but it is not scalable. There are too many keywords
* used in SqlBase.g4, and not all statements allow the same reserved keywords. This method
* checks against the whole list of symbolic names (or keywords) from ANTLr, and verifies the
* variable name matches those.
*
* @param errorMessage The error message thrown by ANTLr parsers.
* @param exception The exception that contains the offending token.
* @return True if a reserved keyword is a valid variable name; false otherwise.
*/
private static boolean isValidVariableName(
final String errorMessage,
final RecognitionException exception
) {
if (MISMATCHED_INPUT_PATTERN.matcher(errorMessage).matches()) {
if (exception.getOffendingToken() != null) {
return ParserKeywordValidatorUtil.getKsqlReservedWords()
.contains(exception.getOffendingToken().getText());
}
}

return false;
}

private static boolean validateErrorContext(
final RuleContext context,
final String errorMessage,
final RecognitionException exception
) {
final BiFunction<String, RecognitionException, Boolean> validator =
ERROR_VALIDATORS.get(context.getClass());
if (validator == null) {
return false;
}

return validator.apply(errorMessage, exception);
}

private static boolean shouldIgnoreSyntaxError(
final String errorMessage,
final RecognitionException exception
) {
if (exception.getCtx() != null) {
return validateErrorContext(exception.getCtx(), errorMessage, exception);
}

return false;
}

/**
* checks if the error is a reserved keyword error by checking the message and offendingSymbol
* @param message the error message
* @param offendingSymbol the symbol that caused the error
* @return
*/
private static boolean isKeywordError(final String message, final String offendingSymbol) {
final Matcher m = EXTRANEOUS_INPUT_PATTERN.matcher(message);
return m.find() && ParserUtil.isReserved(offendingSymbol);
}

@Override
public void syntaxError(
final Recognizer<?, ?> recognizer,
final Object offendingSymbol,
final int line,
final int charPositionInLine,
final String message,
final RecognitionException e
) {
if (e != null && shouldIgnoreSyntaxError(message, e)) {
return;
}

if (offendingSymbol instanceof Token && isKeywordError(
message, ((Token) offendingSymbol).getText())) {
//Checks if the error is a reserved keyword error
final String tokenName = ((Token) offendingSymbol).getText();
final String newMessage =
"\"" + tokenName + "\" is a reserved keyword and it can't be used as an identifier."
+ " You can use it as an identifier by escaping it as \'" + tokenName + "\' ";
throw new ParsingException(newMessage, e, line, charPositionInLine);
} else {
throw new ParsingException(message, e, line, charPositionInLine);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ public final class ParserUtil {
* @see org.apache.kafka.streams.state.StoreBuilder#name
*/
private static final Pattern VALID_SOURCE_NAMES = Pattern.compile("[a-zA-Z0-9_-]*");
public static final Pattern EXTRANEOUS_INPUT_PATTERN = Pattern.compile(
"extraneous input.*expecting.*");

private static final LookupTranslator ESCAPE_SYMBOLS = new LookupTranslator(
new String[][]{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Copyright 2020 Confluent Inc.
*
* Licensed under the Confluent Community License (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.confluent.io/confluent-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*/

package io.confluent.ksql.parser;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

import io.confluent.ksql.parser.SqlBaseParser.CreateStreamContext;
import io.confluent.ksql.parser.SqlBaseParser.VariableNameContext;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.Token;
import org.easymock.EasyMockRunner;
import org.junit.Test;
import org.junit.runner.RunWith;

@RunWith(EasyMockRunner.class)
public class SyntaxErrorValidatorTest {
private SyntaxErrorValidator syntaxErrorValidator = new SyntaxErrorValidator();

@Test
public void shouldAllowReservedKeywordsOnVariableName() {
// Given:
final String errorMessage = "mismatched input 'topic' expecting IDENTIFIER";
final RecognitionException exception =
givenException(mock(VariableNameContext.class), getToken("topic"));

// When/Then:
callSyntaxError(errorMessage, exception);
}

@Test
public void shouldThrowDefaultParsingExceptionIfExceptionIsNull() {
// Given:
final String errorMessage = "mismatched input 'topic' expecting IDENTIFIER";
final RecognitionException exception = null;

// When
final Exception e = assertThrows(
ParsingException.class,
() -> callSyntaxError(errorMessage, exception)
);

// Then:
assertThat(e.getMessage(),
containsString("mismatched input 'topic' expecting IDENTIFIER"));
}

@Test
public void shouldThrowDefaultParsingExceptionIfContextIsNull() {
// Given:
final String errorMessage = "mismatched input 'topic' expecting IDENTIFIER";
final RecognitionException exception = givenException(null, getToken("topic"));

// When
final Exception e = assertThrows(
ParsingException.class,
() -> callSyntaxError(errorMessage, exception)
);

// Then:
assertThat(e.getMessage(),
containsString("mismatched input 'topic' expecting IDENTIFIER"));
}

@Test
public void shouldThrowOnInvalidNonReservedKeywordVariableName() {
// Given:
final String errorMessage = "mismatched input '1' expecting IDENTIFIER";
final RecognitionException exception =
givenException(mock(VariableNameContext.class), getToken("1"));

// When
final Exception e = assertThrows(
ParsingException.class,
() -> callSyntaxError(errorMessage, exception)
);

// Then:
assertThat(e.getMessage(),
containsString("mismatched input '1' expecting IDENTIFIER"));
}

@Test
public void shouldThrowWhenReservedKeywordUsedAsIdentifierOnNoVariablesNames() {
// Given:
final String errorMessage = "extraneous input 'size' expecting IDENTIFIER";
final RecognitionException exception =
givenException(mock(CreateStreamContext.class), getToken("size"));

// When:
final Exception e = assertThrows(
ParsingException.class,
() -> callSyntaxError(errorMessage, exception)
);

// Then:
assertThat(e.getMessage(),
containsString("\"size\" is a reserved keyword and it can't be used as an identifier"));
}

private void callSyntaxError(final String errorMessage, final RecognitionException exception) {
syntaxErrorValidator.syntaxError(
null,
(exception != null) ? exception.getOffendingToken() : null,
0,
0,
errorMessage,
exception
);
}

private RecognitionException givenException(
final RuleContext context,
final Token offendingToken
) {
final RecognitionException exception = mock(RecognitionException.class);
when(exception.getCtx()).thenReturn(context);
when(exception.getOffendingToken()).thenReturn(offendingToken);
return exception;
}

private Token getToken(final String text) {
final Token token = mock(Token.class);
when(token.getText()).thenReturn(text);
return token;
}
}

0 comments on commit 2da360a

Please sign in to comment.