antlr · robstoll · May 12, 2014 · May 13, 2014 · Jun 30, 2014 · Jul 20, 2014
diff --git a/README.txt b/README.txt
@@ -18,7 +18,7 @@ version of ANTLR for 9 years.
 
 You should use v3 in conjunction with ANTLRWorks:
 
-    http://www.antlr.org/works/index.html
+    http://www.antlr3.org/works/
 
 and gUnit (grammar unit testing tool included in distribution):
 
@@ -98,7 +98,7 @@ How is ANTLR v3 different than ANTLR v2?
 
 See "What is the difference between ANTLR v2 and v3?"
 
-    http://www.antlr.org/wiki/pages/viewpage.action?pageId=719
+    http://www.antlr.org/wiki/pages/viewpage.action?pageId=2687279
 
 See migration guide:
 
@@ -110,8 +110,8 @@ How do I install this damn thing?
 
 You will have grabbed either of these:
 
-	http://antlr.org/download/antlr-3.5-complete-no-st3.jar
-	http://antlr.org/download/antlr-3.5-complete.jar
+    http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
+	http://www.antlr3.org/download/antlr-3.5.2-complete.jar
 
 It has all of the jars you need combined into one. Then you need to
 add antlr-3.5-complete.jar to your CLASSPATH or add to arg list; e.g., on unix:
@@ -139,4 +139,4 @@ Please see the FAQ
 
 How can I contribute to ANTLR v3?
 
-http://www.antlr.org/wiki/pages/viewpage.action?pageId=33947666
+http://www.antlr.org/wiki/pages/viewpage.action?pageId=2687297
diff --git a/contributors.txt b/contributors.txt
@@ -48,6 +48,7 @@ the end of the following contributors list.
 CONTRIBUTORS:
 
 YYYY/MM/DD, github id, Full name, email
+2014/05/15, robstoll, Robert Stoll, [email protected]
 2013/04/17, ibre5041, Ivan Brezina, [email protected]
 2013/02/19, murrayju, Justin Murray, [email protected]
 2012/07/12, parrt, Terence Parr, [email protected]

diff --git a/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java b/tool/src/main/java/org/antlr/tool/AssignTokenTypesBehavior.java
@@ -90,10 +90,20 @@ protected void trackString(GrammarAST t) {
 			return;
 		}
 		// otherwise add literal to token types if referenced from parser rule
-		// or in the tokens{} section
+		// or in the tokens{} section - since a parser can contain a tokenVocab as well  
+		// (in order to predefine the order of the tokens - can reduce branching) 
+		// and it looks like those tokens are not in the tokens section 
+		// (because grammar.getTokenType(t.getText()) will not be Label.INVALID) 
+		// we ensure that literals are still recorded for combined parsers which
+		// have defined a tokenVocab
+		String tokenVocab = (String) grammar.getOption("tokenVocab");
+		boolean hasTokenVocabAndIsCombinedParser = tokenVocab != null && 
+				grammar.type == Grammar.COMBINED;
+
 		if ( (currentRuleName==null ||
-			  Character.isLowerCase(currentRuleName.charAt(0))) &&
-																grammar.getTokenType(t.getText())==Label.INVALID )
+			  Character.isLowerCase(currentRuleName.charAt(0))) && 
+			  (hasTokenVocabAndIsCombinedParser ||
+			  grammar.getTokenType(t.getText())==Label.INVALID ))
 		{
 			stringLiterals.put(t.getText(), UNASSIGNED_IN_PARSER_RULE);
 		}

diff --git a/tool/src/main/java/org/antlr/tool/Grammar.java b/tool/src/main/java/org/antlr/tool/Grammar.java
@@ -2392,26 +2392,23 @@ else if ( token == '\'' ) {
 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
 									   Utils.integer(lineNum));
-					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
-					token = tokenizer.nextToken();
+					token = recoverToNextLine(tokenizer);
 					continue;
 				}
 				token = tokenizer.nextToken();
 				if ( token != '=' ) {
 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
 									   Utils.integer(lineNum));
-					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
-					token = tokenizer.nextToken();
+					token = recoverToNextLine(tokenizer);
 					continue;
 				}
 				token = tokenizer.nextToken(); // skip '='
 				if ( token != StreamTokenizer.TT_NUMBER ) {
 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
 									   Utils.integer(lineNum));
-					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
-					token = tokenizer.nextToken();
+					token = recoverToNextLine(tokenizer);
 					continue;
 				}
 				int tokenType = (int)tokenizer.nval;
@@ -2424,11 +2421,13 @@ else if ( token == '\'' ) {
 					ErrorManager.error(ErrorManager.MSG_TOKENS_FILE_SYNTAX_ERROR,
 									   vocabName+CodeGenerator.VOCAB_FILE_EXTENSION,
 									   Utils.integer(lineNum));
-					while ( tokenizer.nextToken() != StreamTokenizer.TT_EOL ) {}
-					token = tokenizer.nextToken();
+					token = recoverToNextLine(tokenizer);
 					continue;
 				}
 				token = tokenizer.nextToken(); // skip newline
+				while(token == StreamTokenizer.TT_EOL ){
+					token = tokenizer.nextToken(); 
+				}
 			}
 			br.close();
 		}
@@ -2449,6 +2448,15 @@ else if ( token == '\'' ) {
 		return composite.maxTokenType;
 	}
 
+	private int recoverToNextLine(StreamTokenizer tokenizer) throws IOException {
+		int token = tokenizer.nextToken();
+		while (token  != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF) {
+			token = tokenizer.nextToken();
+		}
+		token = tokenizer.nextToken();
+		return token;
+	}
+
 	/** Given a token type, get a meaningful name for it such as the ID
 	 *  or string literal.  If this is a lexer and the ttype is in the
 	 *  char vocabulary, compute an ANTLR-valid (possibly escaped) char literal.

diff --git a/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java b/tool/src/test/java/org/antlr/test/TestCompositeGrammars.java
@@ -443,6 +443,164 @@ public class TestCompositeGrammars extends BaseTest {
 
 		assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size());
 	}
+
+	@Test public void testTokenVocabEmptyNoWarningsLikeNoLexerRuleCorrespondingToToken() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabWrongIdentifierAtEndOfFileRecoversCorrectly() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "#");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		//twice an error, once in parser composite and once in lexer composite
+		assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabMissingEqualAtEndOfFileRecoversCorrectly() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "tokenFromTokenVocab");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		//twice an error, once in parser composite and once in lexer composite
+		assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabMissingNumberAtEndOfFileRecoversCorrectly() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "tokenFromTokenVocab=");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		//twice an error, once in parser composite and once in lexer composite
+		assertEquals("unexpected errors: "+equeue, 2, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabCommentsOnOwnLineNoErrorNoWarnings() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "TokenFromTokenVocab=4\n"
+				  + "//some comments on a new line\n'token'=4\n");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabWithEmptyLineNoErrorNoWarnings() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "TokenFromTokenVocab=4\n\n'token'=4\n");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
+
+	@Test public void testTokenVocabNonReferencedTokensNoWarningsLikeNoLexerRuleCorrespondingToToken() 
+			throws Exception {
+		ErrorQueue equeue = new ErrorQueue();
+		ErrorManager.setErrorListener(equeue);		
+
+		mkdir(tmpdir);
+		writeFile(tmpdir, "Foo.tokens", "ReservedTokenNotYetUsedInParserRule=4\n'reserved'=4\n");
+
+		String grammar = "grammar Foo;\n" 
+				+ "options {tokenVocab=Foo;}\n"
+				+ "tokens{TokenFromTokenVocab='token';}\n" 
+				+ "a : TokenFromTokenVocab;\n"
+				+ "WS : (' '|'\\n') {$channel=HIDDEN;} ;\n";
+
+		writeFile(tmpdir, "Foo.g", grammar);
+		Tool antlr = newTool(new String[] {"-lib", tmpdir, tmpdir + "/Foo.g"});
+		antlr.process();
+
+		assertEquals("unexpected errors: "+equeue, 0, equeue.errors.size());
+		assertEquals("unexpected warnings: "+equeue, 0, equeue.warnings.size());
+	}
 
 	@Test public void testSyntaxErrorsInImportsNotThrownOut() throws Exception {
 		ErrorQueue equeue = new ErrorQueue();