Skip to content

Commit

Permalink
gccrs: Fix token lexed as a float literal
Browse files Browse the repository at this point in the history
The lexer cannot distinguish the difference between a float literal and a
tuple index in some cases. This means we should fix this while parsing
depending on the context.

gcc/rust/ChangeLog:

	* expand/rust-macro-invoc-lexer.cc (MacroInvocLexer::split_current_token):
	Add implementation for multiple token split.
	* expand/rust-macro-invoc-lexer.h: Add function prototype.
	* expand/rust-proc-macro-invoc-lexer.cc (ProcMacroInvocLexer::split_current_token):
	Add implementation for 2+ token split for procedural macros.
	* expand/rust-proc-macro-invoc-lexer.h: Add function prototype.
	* lex/rust-lex.cc (Lexer::split_current_token): Add function to split a
	token in multiple other tokens.
	* lex/rust-lex.h: Add function prototype for split_current_token.
	* parse/rust-parse-impl.h (Parser::left_denotation): Handle float tuple
	index identified as a float literal.

Signed-off-by: Pierre-Emmanuel Patry <[email protected]>
  • Loading branch information
P-E-P authored and CohenArthur committed Jan 12, 2024
1 parent 364de75 commit 11d33ad
Show file tree
Hide file tree
Showing 7 changed files with 71 additions and 0 deletions.
16 changes: 16 additions & 0 deletions gcc/rust/expand/rust-macro-invoc-lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,22 @@ MacroInvocLexer::split_current_token (TokenId new_left, TokenId new_right)
std::unique_ptr<AST::Token> (new AST::Token (l_tok)));
}

void
MacroInvocLexer::split_current_token (std::vector<TokenPtr> new_tokens)
{
rust_assert (new_tokens.size () > 0);

auto current_pos = token_stream.begin () + offs;

token_stream.erase (current_pos);

for (size_t i = 1; i < new_tokens.size (); i++)
{
token_stream.insert (current_pos + i, std::unique_ptr<AST::Token> (
new AST::Token (new_tokens[i])));
}
}

std::vector<std::unique_ptr<AST::Token>>
MacroInvocLexer::get_token_slice (size_t start_idx, size_t end_idx) const
{
Expand Down
2 changes: 2 additions & 0 deletions gcc/rust/expand/rust-macro-invoc-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ class MacroInvocLexer : public MacroInvocLexerBase<std::unique_ptr<AST::Token>>
// this will only work with "simple" tokens like punctuation.
void split_current_token (TokenId new_left, TokenId new_right);

void split_current_token (std::vector<TokenPtr> new_tokens);

std::vector<std::unique_ptr<AST::Token>>
get_token_slice (size_t start_idx, size_t end_idx) const;
};
Expand Down
15 changes: 15 additions & 0 deletions gcc/rust/expand/rust-proc-macro-invoc-lexer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,19 @@ ProcMacroInvocLexer::split_current_token (TokenId new_left, TokenId new_right)
token_stream.insert (current_pos, r_tok);
}

void
ProcMacroInvocLexer::split_current_token (std::vector<TokenPtr> new_tokens)
{
rust_assert (new_tokens.size () > 0);

auto current_pos = token_stream.begin () + offs;

token_stream.erase (current_pos);

for (size_t i = 1; i < new_tokens.size (); i++)
{
token_stream.insert (current_pos + i, new_tokens[i]);
}
}

} // namespace Rust
2 changes: 2 additions & 0 deletions gcc/rust/expand/rust-proc-macro-invoc-lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class ProcMacroInvocLexer : public MacroInvocLexerBase<const_TokenPtr>
// closes (i.e. T<U<X>> where >> is wrongly lexed as one token). Note that
// this will only work with "simple" tokens like punctuation.
void split_current_token (TokenId new_left, TokenId new_right);

void split_current_token (std::vector<TokenPtr> new_tokens);
};
} // namespace Rust

Expand Down
12 changes: 12 additions & 0 deletions gcc/rust/lex/rust-lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2529,6 +2529,18 @@ Lexer::split_current_token (TokenId new_left, TokenId new_right)
token_queue.insert (1, std::move (new_right_tok));
}

void
Lexer::split_current_token (std::vector<TokenPtr> new_tokens)
{
rust_assert (new_tokens.size () > 0);
token_queue.replace_current_value (new_tokens[0]);

for (size_t i = 1; i < new_tokens.size (); i++)
{
token_queue.insert (i, new_tokens[i]);
}
}

void
Lexer::start_line (int current_line, int current_column)
{
Expand Down
2 changes: 2 additions & 0 deletions gcc/rust/lex/rust-lex.h
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,8 @@ class Lexer
* this will only work with "simple" tokens like punctuation. */
void split_current_token (TokenId new_left, TokenId new_right);

void split_current_token (std::vector<TokenPtr> new_tokens);

Linemap *get_line_map () { return line_map; }
std::string get_filename () { return std::string (input.get_filename ()); }

Expand Down
22 changes: 22 additions & 0 deletions gcc/rust/parse/rust-parse-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -12895,6 +12895,28 @@ Parser<ManagedTokenSource>::left_denotation (const_TokenPtr tok,
std::move (outer_attrs),
restrictions);
}
else if (next_tok->get_id () == FLOAT_LITERAL)
{
// Lexer has misidentified a tuple index as a float literal
// eg: `(x, (y, z)).1.0` -> 1.0 has been identified as a float
// literal. This means we should split it into three new separate
// tokens, the first tuple index, the dot and the second tuple
// index.
auto current_loc = next_tok->get_locus ();
auto str = next_tok->get_str ();
auto dot_pos = str.find (".");
auto prefix = str.substr (0, dot_pos);
auto suffix = str.substr (dot_pos + 1);
lexer.split_current_token (
{Token::make_int (current_loc, std::move (prefix),
CORETYPE_PURE_DECIMAL),
Token::make (DOT, current_loc + 1),
Token::make_int (current_loc + 2, std::move (suffix),
CORETYPE_PURE_DECIMAL)});
return parse_tuple_index_expr (tok, std::move (left),
std::move (outer_attrs),
restrictions);
}
else if (next_tok->get_id () == IDENTIFIER
&& lexer.peek_token (1)->get_id () != LEFT_PAREN
&& lexer.peek_token (1)->get_id () != SCOPE_RESOLUTION)
Expand Down

0 comments on commit 11d33ad

Please sign in to comment.