refactor(parser): implement NthToken for T (biomejs#2727)

chansuke · May 7, 2024 · 01c440d · 01c440d
1 parent c269016
commit 01c440d
Show file tree

Hide file tree

Showing 24 changed files with 305 additions and 506 deletions.
diff --git a/crates/biome_css_parser/src/lexer/mod.rs b/crates/biome_css_parser/src/lexer/mod.rs
@@ -8,6 +8,7 @@ use biome_parser::diagnostic::ParseDiagnostic;
 use biome_parser::lexer::{
     LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
 };
+use biome_rowan::SyntaxKind;
 use biome_unicode_table::{
     is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
 };

diff --git a/crates/biome_css_parser/src/token_source.rs b/crates/biome_css_parser/src/token_source.rs
@@ -3,48 +3,27 @@ use crate::CssParserOptions;
 use biome_css_syntax::CssSyntaxKind::EOF;
 use biome_css_syntax::{CssSyntaxKind, TextRange};
 use biome_parser::diagnostic::ParseDiagnostic;
-use biome_parser::lexer::{BufferedLexer, LexContext};
-use biome_parser::prelude::{BumpWithContext, NthToken, TokenSource};
-use biome_parser::token_source::{TokenSourceCheckpoint, Trivia};
+use biome_parser::lexer::BufferedLexer;
+use biome_parser::prelude::{BumpWithContext, TokenSource};
+use biome_parser::token_source::{TokenSourceCheckpoint, TokenSourceWithBufferedLexer, Trivia};
 use biome_rowan::TriviaPieceKind;
-use std::collections::VecDeque;
 
 pub(crate) struct CssTokenSource<'src> {
-    lexer: BufferedLexer<'src, CssLexer<'src>>,
+    lexer: BufferedLexer<CssSyntaxKind, CssLexer<'src>>,
 
     /// List of the skipped trivia. Needed to construct the CST and compute the non-trivia token offsets.
     pub(super) trivia_list: Vec<Trivia>,
-    /// Cache for the non-trivia token lookahead. For example for the source `.class {};` if the
-    /// [TokenSource]'s currently positioned at the start of the file (`.`). The `nth(2)` non-trivia token,
-    /// as returned by the [TokenSource], is the `{` token but retrieving it requires skipping over the
-    /// one whitespace trivia tokens (between `class` and `{`).
-    /// The [TokenSource] state then is:
-    ///
-    /// * `non_trivia_lookahead`: [IDENT: 'class', L_CURLY]
-    /// * `lookahead_offset`: 3 (the `{` is the 3th token after the `.` keyword)
-    non_trivia_lookahead: VecDeque<Lookahead>,
-
-    /// Offset of the last cached lookahead token from the current [BufferedLexer] token.
-    lookahead_offset: usize,
-}
-
-#[derive(Debug, Copy, Clone)]
-struct Lookahead {
-    kind: CssSyntaxKind,
-    after_newline: bool,
 }
 
 #[allow(dead_code)]
 pub(crate) type CssTokenSourceCheckpoint = TokenSourceCheckpoint<CssSyntaxKind>;
 
 impl<'src> CssTokenSource<'src> {
     /// Creates a new token source.
-    pub(crate) fn new(lexer: BufferedLexer<'src, CssLexer<'src>>) -> CssTokenSource<'src> {
+    pub(crate) fn new(lexer: BufferedLexer<CssSyntaxKind, CssLexer<'src>>) -> CssTokenSource<'src> {
         CssTokenSource {
             lexer,
             trivia_list: vec![],
-            lookahead_offset: 0,
-            non_trivia_lookahead: VecDeque::new(),
         }
     }
 
@@ -60,15 +39,10 @@ impl<'src> CssTokenSource<'src> {
     }
 
     fn next_non_trivia_token(&mut self, context: CssLexContext, first_token: bool) {
-        let mut processed_tokens = 0;
         let mut trailing = !first_token;
 
-        // Drop the last cached lookahead, we're now moving past it
-        self.non_trivia_lookahead.pop_front();
-
         loop {
             let kind = self.lexer.next_token(context);
-            processed_tokens += 1;
 
             let trivia_kind = TriviaPieceKind::try_from(kind);
 
@@ -87,60 +61,10 @@ impl<'src> CssTokenSource<'src> {
                 }
             }
         }
-
-        if self.lookahead_offset != 0 {
-            debug_assert!(self.lookahead_offset >= processed_tokens);
-            self.lookahead_offset -= processed_tokens;
-        }
     }
 
     pub fn re_lex(&mut self, mode: CssReLexContext) -> CssSyntaxKind {
-        let current_kind = self.current();
-
-        let new_kind = self.lexer.re_lex(mode);
-
-        // Only need to clear the lookahead cache when the token did change
-        if current_kind != new_kind {
-            self.non_trivia_lookahead.clear();
-            self.lookahead_offset = 0;
-        }
-
-        new_kind
-    }
-
-    #[inline(always)]
-    fn lookahead(&mut self, n: usize) -> Option<Lookahead> {
-        assert_ne!(n, 0);
-
-        // Return the cached token if any
-        if let Some(lookahead) = self.non_trivia_lookahead.get(n - 1) {
-            return Some(*lookahead);
-        }
-
-        // Jump right to where we've left of last time rather than going through all tokens again.
-        let iter = self.lexer.lookahead().skip(self.lookahead_offset);
-        let mut remaining = n - self.non_trivia_lookahead.len();
-
-        for item in iter {
-            self.lookahead_offset += 1;
-
-            if !item.kind().is_trivia() {
-                remaining -= 1;
-
-                let lookahead = Lookahead {
-                    after_newline: item.has_preceding_line_break(),
-                    kind: item.kind(),
-                };
-
-                self.non_trivia_lookahead.push_back(lookahead);
-
-                if remaining == 0 {
-                    return Some(lookahead);
-                }
-            }
-        }
-
-        None
+        self.lexer.re_lex(mode)
     }
 
     /// Creates a checkpoint to which it can later return using [Self::rewind].
@@ -157,8 +81,6 @@ impl<'src> CssTokenSource<'src> {
         assert!(self.trivia_list.len() >= checkpoint.trivia_len as usize);
         self.trivia_list.truncate(checkpoint.trivia_len as usize);
         self.lexer.rewind(checkpoint.lexer_checkpoint);
-        self.non_trivia_lookahead.clear();
-        self.lookahead_offset = 0;
     }
 }
 
@@ -199,22 +121,12 @@ impl<'source> BumpWithContext for CssTokenSource<'source> {
 
     fn bump_with_context(&mut self, context: Self::Context) {
         if self.current() != EOF {
-            if !context.is_regular() {
-                self.lookahead_offset = 0;
-                self.non_trivia_lookahead.clear();
-            }
-
             self.next_non_trivia_token(context, false);
         }
     }
 
     fn skip_as_trivia_with_context(&mut self, context: Self::Context) {
         if self.current() != EOF {
-            if !context.is_regular() {
-                self.lookahead_offset = 0;
-                self.non_trivia_lookahead.clear();
-            }
-
             self.trivia_list.push(Trivia::new(
                 TriviaPieceKind::Skipped,
                 self.current_range(),
@@ -226,25 +138,8 @@ impl<'source> BumpWithContext for CssTokenSource<'source> {
     }
 }
 
-impl<'source> NthToken for CssTokenSource<'source> {
-    /// Gets the kind of the nth non-trivia token
-    #[inline(always)]
-    fn nth(&mut self, n: usize) -> CssSyntaxKind {
-        if n == 0 {
-            self.current()
-        } else {
-            self.lookahead(n).map_or(EOF, |lookahead| lookahead.kind)
-        }
-    }
-
-    /// Returns true if the nth non-trivia token is preceded by a line break
-    #[inline(always)]
-    fn has_nth_preceding_line_break(&mut self, n: usize) -> bool {
-        if n == 0 {
-            self.has_preceding_line_break()
-        } else {
-            self.lookahead(n)
-                .map_or(false, |lookahead| lookahead.after_newline)
-        }
+impl<'source> TokenSourceWithBufferedLexer<CssLexer<'source>> for CssTokenSource<'source> {
+    fn lexer(&mut self) -> &mut BufferedLexer<CssSyntaxKind, CssLexer<'source>> {
+        &mut self.lexer
     }
 }
diff --git a/crates/biome_css_parser/tests/spec_test.rs b/crates/biome_css_parser/tests/spec_test.rs
@@ -134,8 +134,10 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_
 #[test]
 pub fn quick_test() {
     let code = r#"
-        @color-profile DEVICE-CMYK
-    @color-profile
+div {
+
+	background: src(var(--foo));
+}
 
     "#;
 

diff --git a/crates/biome_css_syntax/src/lib.rs b/crates/biome_css_syntax/src/lib.rs
@@ -12,7 +12,7 @@ pub use file_source::CssFileSource;
 pub use syntax_node::*;
 
 use crate::CssSyntaxKind::*;
-use biome_rowan::{AstNode, RawSyntaxKind};
+use biome_rowan::{AstNode, RawSyntaxKind, SyntaxKind};
 
 impl From<u16> for CssSyntaxKind {
     fn from(d: u16) -> CssSyntaxKind {
@@ -28,16 +28,6 @@ impl From<CssSyntaxKind> for u16 {
 }
 
 impl CssSyntaxKind {
-    pub fn is_trivia(self) -> bool {
-        matches!(
-            self,
-            CssSyntaxKind::NEWLINE
-                | CssSyntaxKind::WHITESPACE
-                | CssSyntaxKind::COMMENT
-                | CssSyntaxKind::MULTILINE_COMMENT
-        )
-    }
-
     /// Returns `true` for any contextual or non-contextual keyword
     #[inline]
     pub const fn is_keyword(self) -> bool {
@@ -148,6 +138,16 @@ impl biome_rowan::SyntaxKind for CssSyntaxKind {
         CssSyntaxKind::is_list(*self)
     }
 
+    fn is_trivia(self) -> bool {
+        matches!(
+            self,
+            CssSyntaxKind::NEWLINE
+                | CssSyntaxKind::WHITESPACE
+                | CssSyntaxKind::COMMENT
+                | CssSyntaxKind::MULTILINE_COMMENT
+        )
+    }
+
     fn to_string(&self) -> Option<&'static str> {
         CssSyntaxKind::to_string(self)
     }

diff --git a/crates/biome_graphql_parser/src/lexer/mod.rs b/crates/biome_graphql_parser/src/lexer/mod.rs
@@ -5,6 +5,7 @@ mod tests;
 use biome_graphql_syntax::{GraphqlSyntaxKind, GraphqlSyntaxKind::*, TextLen, TextSize, T};
 use biome_parser::diagnostic::ParseDiagnostic;
 use biome_parser::lexer::{Lexer, LexerCheckpoint, LexerWithCheckpoint, TokenFlags};
+use biome_rowan::SyntaxKind;
 use std::ops::Add;
 
 #[derive(Debug)]