Skip to content

Commit

Permalink
Move to 'parsing' token streams in the lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
jfecher committed Jun 24, 2024
1 parent c75749e commit 8ecae64
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 75 deletions.
4 changes: 2 additions & 2 deletions compiler/noirc_frontend/src/ast/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ pub enum ExpressionKind {
Tuple(Vec<Expression>),
Lambda(Box<Lambda>),
Parenthesized(Box<Expression>),
Quote(Tokens, Span),
Quote(Tokens),
Unquote(Box<Expression>),
Comptime(BlockExpression, Span),

Expand Down Expand Up @@ -553,7 +553,7 @@ impl Display for ExpressionKind {
Error => write!(f, "Error"),
Resolved(_) => write!(f, "?Resolved"),
Unquote(expr) => write!(f, "$({expr})"),
Quote(tokens, _) => {
Quote(tokens) => {
let tokens = vecmap(&tokens.0, ToString::to_string);
write!(f, "quote {{ {} }}", tokens.join(" "))
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/noirc_frontend/src/elaborator/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ impl<'context> Elaborator<'context> {
ExpressionKind::Tuple(tuple) => self.elaborate_tuple(tuple),
ExpressionKind::Lambda(lambda) => self.elaborate_lambda(*lambda),
ExpressionKind::Parenthesized(expr) => return self.elaborate_expression(*expr),
ExpressionKind::Quote(quote, _) => self.elaborate_quote(quote),
ExpressionKind::Quote(quote) => self.elaborate_quote(quote),
ExpressionKind::Comptime(comptime, _) => {
return self.elaborate_comptime_block(comptime, expr.span)
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/noirc_frontend/src/hir/resolution/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,7 @@ impl<'a> Resolver<'a> {
ExpressionKind::Parenthesized(sub_expr) => return self.resolve_expression(*sub_expr),

// The quoted expression isn't resolved since we don't want errors if variables aren't defined
ExpressionKind::Quote(block, _) => HirExpression::Quote(block),
ExpressionKind::Quote(block) => HirExpression::Quote(block),
ExpressionKind::Comptime(block, _) => {
HirExpression::Comptime(self.resolve_block(block))
}
Expand Down
6 changes: 6 additions & 0 deletions compiler/noirc_frontend/src/lexer/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ pub enum LexerErrorKind {
"'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."
)]
InvalidEscape { escaped: char, span: Span },
#[error("Invalid quote delimiter `{delimiter}`, valid delimiters are `{{`, `[`, and `(`")]
InvalidQuoteDelimiter { delimiter: SpannedToken },
}

impl From<LexerErrorKind> for ParserError {
Expand All @@ -47,6 +49,7 @@ impl LexerErrorKind {
LexerErrorKind::UnterminatedBlockComment { span } => *span,
LexerErrorKind::UnterminatedStringLiteral { span } => *span,
LexerErrorKind::InvalidEscape { span, .. } => *span,
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => delimiter.to_span(),
}
}

Expand Down Expand Up @@ -92,6 +95,9 @@ impl LexerErrorKind {
("Unterminated string literal".to_string(), "Unterminated string literal".to_string(), *span),
LexerErrorKind::InvalidEscape { escaped, span } =>
(format!("'\\{escaped}' is not a valid escape sequence. Use '\\' for a literal backslash character."), "Invalid escape sequence".to_string(), *span),
LexerErrorKind::InvalidQuoteDelimiter { delimiter } => {
("Invalid quote delimiter `{delimiter}`".to_string(), "Valid delimiters are `{`, `[`, and `(`".to_string(), delimiter.to_span())
},
}
}
}
Expand Down
86 changes: 83 additions & 3 deletions compiler/noirc_frontend/src/lexer/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ impl<'a> Lexer<'a> {
Some('"') => self.eat_string_literal(),
Some('f') => self.eat_format_string_or_alpha_numeric(),
Some('r') => self.eat_raw_string_or_alpha_numeric(),
Some('q') => self.eat_quote_or_alpha_numeric(),
Some('#') => self.eat_attribute(),
Some(ch) if ch.is_ascii_alphanumeric() || ch == '_' => self.eat_alpha_numeric(ch),
Some(ch) => {
Expand Down Expand Up @@ -310,14 +311,25 @@ impl<'a> Lexer<'a> {
//XXX(low): Can increase performance if we use iterator semantic and utilize some of the methods on String. See below
// https://doc.rust-lang.org/stable/std/primitive.str.html#method.rsplit
fn eat_word(&mut self, initial_char: char) -> SpannedTokenResult {
let start = self.position;
let (start, word, end) = self.lex_word(initial_char);
self.lookup_word_token(word, start, end)
}

/// Lex the next word in the input stream. Returns (start position, word, end position)
fn lex_word(&mut self, initial_char: char) -> (Position, String, Position) {
let start = self.position;
let word = self.eat_while(Some(initial_char), |ch| {
ch.is_ascii_alphabetic() || ch.is_numeric() || ch == '_'
});
(start, word, self.position)
}

let end = self.position;

fn lookup_word_token(
&self,
word: String,
start: Position,
end: Position,
) -> SpannedTokenResult {
// Check if word either an identifier or a keyword
if let Some(keyword_token) = Keyword::lookup_keyword(&word) {
return Ok(keyword_token.into_span(start, end));
Expand Down Expand Up @@ -509,6 +521,44 @@ impl<'a> Lexer<'a> {
}
}

fn eat_quote_or_alpha_numeric(&mut self) -> SpannedTokenResult {
let (start, word, end) = self.lex_word('q');
if word != "quote" {
return self.lookup_word_token(word, start, end);
}

let delimiter = self.next_token()?;
let (start_delim, end_delim) = match delimiter.token() {
Token::LeftBrace => (Token::LeftBrace, Token::RightBrace),
Token::LeftBracket => (Token::LeftBracket, Token::RightBracket),
Token::LeftParen => (Token::LeftParen, Token::RightParen),
_ => return Err(LexerErrorKind::InvalidQuoteDelimiter { delimiter }),
};

let mut tokens = Vec::new();
let mut nested_delimiters = 1;

while nested_delimiters != 0 {
let token = self.next_token()?;

if *token.token() == start_delim {
nested_delimiters += 1;
} else if *token.token() == end_delim {
nested_delimiters -= 1;
}

tokens.push(token);
}

// Pop the closing delimiter from the token stream
if !tokens.is_empty() {
tokens.pop();
}

let end = self.position;
Ok(Token::Quote(Tokens(tokens)).into_span(start, end))
}

fn parse_comment(&mut self, start: u32) -> SpannedTokenResult {
let doc_style = match self.peek_char() {
Some('!') => {
Expand Down Expand Up @@ -604,6 +654,8 @@ impl<'a> Iterator for Lexer<'a> {

#[cfg(test)]
mod tests {
use iter_extended::vecmap;

use super::*;
use crate::token::{FunctionAttribute, SecondaryAttribute, TestScope};

Expand Down Expand Up @@ -1232,4 +1284,32 @@ mod tests {
}
}
}

#[test]
fn test_quote() {
// cases is a vector of pairs of (test string, expected # of tokens in token stream)
let cases = vec![
("quote {}", 0),
("quote { a.b }", 3),
("quote { ) ( }", 2), // invalid syntax is fine in a quote
("quote { { } }", 2), // Nested `{` and `}` shouldn't close the quote as long as they are matched.
("quote { 1 { 2 { 3 { 4 { 5 } 4 4 } 3 3 } 2 2 } 1 1 }", 21),
("quote [ } } ]", 2), // In addition to `{}`, `[]`, and `()` can also be used as delimiters.
("quote [ } foo[] } ]", 5),
("quote ( } () } )", 4),
];

for (source, expected_stream_length) in cases {
let mut tokens = vecmap(Lexer::new(source), |result| result.unwrap().into_token());

// All examples should be a single TokenStream token followed by an EOF token.
assert_eq!(tokens.len(), 2, "Unexpected token count: {tokens:?}");

tokens.pop();
match tokens.pop().unwrap() {
Token::Quote(stream) => assert_eq!(stream.0.len(), expected_stream_length),
other => panic!("test_quote test failure! Expected a single TokenStream token, got {other} for input `{source}`")
}
}
}
}
21 changes: 16 additions & 5 deletions compiler/noirc_frontend/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ pub enum BorrowedToken<'input> {
Attribute(Attribute),
LineComment(&'input str, Option<DocStyle>),
BlockComment(&'input str, Option<DocStyle>),
Quote(&'input Tokens),
/// <
Less,
/// <=
Expand Down Expand Up @@ -122,6 +123,8 @@ pub enum Token {
Attribute(Attribute),
LineComment(String, Option<DocStyle>),
BlockComment(String, Option<DocStyle>),
// A `quote { ... }` along with the tokens in its token stream.
Quote(Tokens),
/// <
Less,
/// <=
Expand Down Expand Up @@ -219,6 +222,7 @@ pub fn token_to_borrowed_token(token: &Token) -> BorrowedToken<'_> {
Token::Attribute(ref a) => BorrowedToken::Attribute(a.clone()),
Token::LineComment(ref s, _style) => BorrowedToken::LineComment(s, *_style),
Token::BlockComment(ref s, _style) => BorrowedToken::BlockComment(s, *_style),
Token::Quote(stream) => BorrowedToken::Quote(stream),
Token::IntType(ref i) => BorrowedToken::IntType(i.clone()),
Token::Less => BorrowedToken::Less,
Token::LessEqual => BorrowedToken::LessEqual,
Expand Down Expand Up @@ -266,7 +270,7 @@ pub enum DocStyle {
Inner,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct SpannedToken(Spanned<Token>);

impl PartialEq<SpannedToken> for Token {
Expand Down Expand Up @@ -332,6 +336,13 @@ impl fmt::Display for Token {
Token::Attribute(ref a) => write!(f, "{a}"),
Token::LineComment(ref s, _style) => write!(f, "//{s}"),
Token::BlockComment(ref s, _style) => write!(f, "/*{s}*/"),
Token::Quote(ref stream) => {
write!(f, "quote {{")?;
for token in stream.0.iter() {
write!(f, " {token}")?;
}
write!(f, "}}")
}
Token::IntType(ref i) => write!(f, "{i}"),
Token::Less => write!(f, "<"),
Token::LessEqual => write!(f, "<="),
Expand Down Expand Up @@ -382,6 +393,7 @@ pub enum TokenKind {
Literal,
Keyword,
Attribute,
Quote,
UnquoteMarker,
}

Expand All @@ -393,6 +405,7 @@ impl fmt::Display for TokenKind {
TokenKind::Literal => write!(f, "literal"),
TokenKind::Keyword => write!(f, "keyword"),
TokenKind::Attribute => write!(f, "attribute"),
TokenKind::Quote => write!(f, "quote"),
TokenKind::UnquoteMarker => write!(f, "macro result"),
}
}
Expand All @@ -410,6 +423,7 @@ impl Token {
Token::Keyword(_) => TokenKind::Keyword,
Token::Attribute(_) => TokenKind::Attribute,
Token::UnquoteMarker(_) => TokenKind::UnquoteMarker,
Token::Quote(_) => TokenKind::Quote,
tok => TokenKind::Token(tok.clone()),
}
}
Expand Down Expand Up @@ -874,7 +888,6 @@ pub enum Keyword {
Mod,
Mut,
Pub,
Quote,
Quoted,
Return,
ReturnData,
Expand Down Expand Up @@ -923,7 +936,6 @@ impl fmt::Display for Keyword {
Keyword::Mod => write!(f, "mod"),
Keyword::Mut => write!(f, "mut"),
Keyword::Pub => write!(f, "pub"),
Keyword::Quote => write!(f, "quote"),
Keyword::Quoted => write!(f, "Quoted"),
Keyword::Return => write!(f, "return"),
Keyword::ReturnData => write!(f, "return_data"),
Expand Down Expand Up @@ -975,7 +987,6 @@ impl Keyword {
"mod" => Keyword::Mod,
"mut" => Keyword::Mut,
"pub" => Keyword::Pub,
"quote" => Keyword::Quote,
"Quoted" => Keyword::Quoted,
"return" => Keyword::Return,
"return_data" => Keyword::ReturnData,
Expand All @@ -1002,7 +1013,7 @@ impl Keyword {
}
}

#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Tokens(pub Vec<SpannedToken>);

type TokenMapIter = Map<IntoIter<SpannedToken>, fn(SpannedToken) -> (Token, Span)>;
Expand Down
1 change: 0 additions & 1 deletion compiler/noirc_frontend/src/noir_parser.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ extern {
"mod" => BorrowedToken::Keyword(noir_token::Keyword::Mod),
"mut" => BorrowedToken::Keyword(noir_token::Keyword::Mut),
"pub" => BorrowedToken::Keyword(noir_token::Keyword::Pub),
"quote" => BorrowedToken::Keyword(noir_token::Keyword::Quote),
"return" => BorrowedToken::Keyword(noir_token::Keyword::Return),
"return_data" => BorrowedToken::Keyword(noir_token::Keyword::ReturnData),
"str" => BorrowedToken::Keyword(noir_token::Keyword::String),
Expand Down
73 changes: 12 additions & 61 deletions compiler/noirc_frontend/src/parser/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ use crate::ast::{
};
use crate::lexer::{lexer::from_spanned_token_result, Lexer};
use crate::parser::{force, ignore_then_commit, statement_recovery};
use crate::token::{Keyword, SpannedToken, Token, TokenKind, Tokens};
use crate::token::{Keyword, Token, TokenKind};

use chumsky::prelude::*;
use iter_extended::vecmap;
Expand Down Expand Up @@ -1103,66 +1103,17 @@ where
}

fn quote() -> impl NoirParser<ExpressionKind> {
keyword(Keyword::Quote).ignore_then(spanned(token_stream_block(false, true))).validate(
|(tokens, block_span), span, emit| {
emit(ParserError::with_reason(
ParserErrorReason::ExperimentalFeature("quoted expressions"),
span,
));
ExpressionKind::Quote(Tokens(tokens), block_span)
},
)
}

/// Parses a stream of tokens terminated by '{' or '}'.
/// - parse_braces: if true, parses '{' and '}' surrounding the token stream.
/// - include_braces: if true, include the surrounding braces in the returned tokens vec
fn token_stream_block(
include_braces: bool,
parse_braces: bool,
) -> impl NoirParser<Vec<SpannedToken>> {
let append_vecs = |(mut vec1, mut vec2): (Vec<_>, _)| {
vec1.append(&mut vec2);
vec1
};

// Parse a stream of tokens ending in '{' or '}'.
// - If we ended with a '}': end
// - If we ended with a '{': recursively parse another token stream block
let inner_stream = spanned(none_of([Token::LeftBrace, Token::RightBrace]))
.map(|(token, span)| SpannedToken::new(token, span))
.repeated()
.then(one_of([Token::LeftBrace, Token::RightBrace]).rewind().then_with(move |end| {
match end {
Token::LeftBrace => token_stream_block(true, true)
.then(token_stream_block(false, false))
.map(append_vecs)
.boxed(),
_ => empty().map(|_| Vec::new()).boxed(),
}
}))
.map(append_vecs);

if parse_braces {
spanned(just(Token::LeftBrace))
.then(inner_stream)
.then(spanned(just(Token::RightBrace)))
.map(move |((left_brace, mut stream), right_brace)| {
let mut ret = if include_braces {
vec![SpannedToken::new(Token::LeftBrace, left_brace.1)]
} else {
vec![]
};
ret.append(&mut stream);
if include_braces {
ret.push(SpannedToken::new(Token::RightBrace, right_brace.1));
}
ret
})
.boxed()
} else {
inner_stream.boxed()
}
token_kind(TokenKind::Quote).validate(|token, span, emit| {
let tokens = match token {
Token::Quote(tokens) => tokens,
_ => unreachable!("token_kind(Quote) should guarantee parsing only a quote token"),
};
emit(ParserError::with_reason(
ParserErrorReason::ExperimentalFeature("quoted expressions"),
span,
));
ExpressionKind::Quote(tokens)
})
}

/// unquote: '$' variable
Expand Down
2 changes: 1 addition & 1 deletion tooling/nargo_fmt/src/rewrite/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ pub(crate) fn rewrite(
format!("{path_string}{turbofish}")
}
ExpressionKind::Lambda(_) => visitor.slice(span).to_string(),
ExpressionKind::Quote(_, block_span) => format!("quote {}", visitor.slice(block_span)),
ExpressionKind::Quote(_) => visitor.slice(span).to_string(),
ExpressionKind::Comptime(block, block_span) => {
format!("comptime {}", rewrite_block(visitor, block, block_span))
}
Expand Down

0 comments on commit 8ecae64

Please sign in to comment.