Skip to content

Commit

Permalink
refactor(parser): implement NthToken for T (biomejs#2727)
Browse files Browse the repository at this point in the history
  • Loading branch information
denbezrukov authored May 7, 2024
1 parent c269016 commit 01c440d
Show file tree
Hide file tree
Showing 24 changed files with 305 additions and 506 deletions.
1 change: 1 addition & 0 deletions crates/biome_css_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{
LexContext, Lexer, LexerCheckpoint, LexerWithCheckpoint, ReLexer, TokenFlags,
};
use biome_rowan::SyntaxKind;
use biome_unicode_table::{
is_css_id_continue, is_css_id_start, lookup_byte, Dispatch, Dispatch::*,
};
Expand Down
123 changes: 9 additions & 114 deletions crates/biome_css_parser/src/token_source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,27 @@ use crate::CssParserOptions;
use biome_css_syntax::CssSyntaxKind::EOF;
use biome_css_syntax::{CssSyntaxKind, TextRange};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{BufferedLexer, LexContext};
use biome_parser::prelude::{BumpWithContext, NthToken, TokenSource};
use biome_parser::token_source::{TokenSourceCheckpoint, Trivia};
use biome_parser::lexer::BufferedLexer;
use biome_parser::prelude::{BumpWithContext, TokenSource};
use biome_parser::token_source::{TokenSourceCheckpoint, TokenSourceWithBufferedLexer, Trivia};
use biome_rowan::TriviaPieceKind;
use std::collections::VecDeque;

pub(crate) struct CssTokenSource<'src> {
lexer: BufferedLexer<'src, CssLexer<'src>>,
lexer: BufferedLexer<CssSyntaxKind, CssLexer<'src>>,

/// List of the skipped trivia. Needed to construct the CST and compute the non-trivia token offsets.
pub(super) trivia_list: Vec<Trivia>,
/// Cache for the non-trivia token lookahead. For example for the source `.class {};` if the
/// [TokenSource]'s currently positioned at the start of the file (`.`). The `nth(2)` non-trivia token,
/// as returned by the [TokenSource], is the `{` token but retrieving it requires skipping over the
/// one whitespace trivia tokens (between `class` and `{`).
/// The [TokenSource] state then is:
///
/// * `non_trivia_lookahead`: [IDENT: 'class', L_CURLY]
/// * `lookahead_offset`: 3 (the `{` is the 3th token after the `.` keyword)
non_trivia_lookahead: VecDeque<Lookahead>,

/// Offset of the last cached lookahead token from the current [BufferedLexer] token.
lookahead_offset: usize,
}

#[derive(Debug, Copy, Clone)]
struct Lookahead {
kind: CssSyntaxKind,
after_newline: bool,
}

#[allow(dead_code)]
pub(crate) type CssTokenSourceCheckpoint = TokenSourceCheckpoint<CssSyntaxKind>;

impl<'src> CssTokenSource<'src> {
/// Creates a new token source.
pub(crate) fn new(lexer: BufferedLexer<'src, CssLexer<'src>>) -> CssTokenSource<'src> {
pub(crate) fn new(lexer: BufferedLexer<CssSyntaxKind, CssLexer<'src>>) -> CssTokenSource<'src> {
CssTokenSource {
lexer,
trivia_list: vec![],
lookahead_offset: 0,
non_trivia_lookahead: VecDeque::new(),
}
}

Expand All @@ -60,15 +39,10 @@ impl<'src> CssTokenSource<'src> {
}

fn next_non_trivia_token(&mut self, context: CssLexContext, first_token: bool) {
let mut processed_tokens = 0;
let mut trailing = !first_token;

// Drop the last cached lookahead, we're now moving past it
self.non_trivia_lookahead.pop_front();

loop {
let kind = self.lexer.next_token(context);
processed_tokens += 1;

let trivia_kind = TriviaPieceKind::try_from(kind);

Expand All @@ -87,60 +61,10 @@ impl<'src> CssTokenSource<'src> {
}
}
}

if self.lookahead_offset != 0 {
debug_assert!(self.lookahead_offset >= processed_tokens);
self.lookahead_offset -= processed_tokens;
}
}

pub fn re_lex(&mut self, mode: CssReLexContext) -> CssSyntaxKind {
let current_kind = self.current();

let new_kind = self.lexer.re_lex(mode);

// Only need to clear the lookahead cache when the token did change
if current_kind != new_kind {
self.non_trivia_lookahead.clear();
self.lookahead_offset = 0;
}

new_kind
}

#[inline(always)]
fn lookahead(&mut self, n: usize) -> Option<Lookahead> {
assert_ne!(n, 0);

// Return the cached token if any
if let Some(lookahead) = self.non_trivia_lookahead.get(n - 1) {
return Some(*lookahead);
}

// Jump right to where we've left of last time rather than going through all tokens again.
let iter = self.lexer.lookahead().skip(self.lookahead_offset);
let mut remaining = n - self.non_trivia_lookahead.len();

for item in iter {
self.lookahead_offset += 1;

if !item.kind().is_trivia() {
remaining -= 1;

let lookahead = Lookahead {
after_newline: item.has_preceding_line_break(),
kind: item.kind(),
};

self.non_trivia_lookahead.push_back(lookahead);

if remaining == 0 {
return Some(lookahead);
}
}
}

None
self.lexer.re_lex(mode)
}

/// Creates a checkpoint to which it can later return using [Self::rewind].
Expand All @@ -157,8 +81,6 @@ impl<'src> CssTokenSource<'src> {
assert!(self.trivia_list.len() >= checkpoint.trivia_len as usize);
self.trivia_list.truncate(checkpoint.trivia_len as usize);
self.lexer.rewind(checkpoint.lexer_checkpoint);
self.non_trivia_lookahead.clear();
self.lookahead_offset = 0;
}
}

Expand Down Expand Up @@ -199,22 +121,12 @@ impl<'source> BumpWithContext for CssTokenSource<'source> {

fn bump_with_context(&mut self, context: Self::Context) {
if self.current() != EOF {
if !context.is_regular() {
self.lookahead_offset = 0;
self.non_trivia_lookahead.clear();
}

self.next_non_trivia_token(context, false);
}
}

fn skip_as_trivia_with_context(&mut self, context: Self::Context) {
if self.current() != EOF {
if !context.is_regular() {
self.lookahead_offset = 0;
self.non_trivia_lookahead.clear();
}

self.trivia_list.push(Trivia::new(
TriviaPieceKind::Skipped,
self.current_range(),
Expand All @@ -226,25 +138,8 @@ impl<'source> BumpWithContext for CssTokenSource<'source> {
}
}

impl<'source> NthToken for CssTokenSource<'source> {
/// Gets the kind of the nth non-trivia token
#[inline(always)]
fn nth(&mut self, n: usize) -> CssSyntaxKind {
if n == 0 {
self.current()
} else {
self.lookahead(n).map_or(EOF, |lookahead| lookahead.kind)
}
}

/// Returns true if the nth non-trivia token is preceded by a line break
#[inline(always)]
fn has_nth_preceding_line_break(&mut self, n: usize) -> bool {
if n == 0 {
self.has_preceding_line_break()
} else {
self.lookahead(n)
.map_or(false, |lookahead| lookahead.after_newline)
}
impl<'source> TokenSourceWithBufferedLexer<CssLexer<'source>> for CssTokenSource<'source> {
fn lexer(&mut self) -> &mut BufferedLexer<CssSyntaxKind, CssLexer<'source>> {
&mut self.lexer
}
}
6 changes: 4 additions & 2 deletions crates/biome_css_parser/tests/spec_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ pub fn run(test_case: &str, _snapshot_name: &str, test_directory: &str, outcome_
#[test]
pub fn quick_test() {
let code = r#"
@color-profile DEVICE-CMYK
@color-profile
div {
background: src(var(--foo));
}
"#;

Expand Down
22 changes: 11 additions & 11 deletions crates/biome_css_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pub use file_source::CssFileSource;
pub use syntax_node::*;

use crate::CssSyntaxKind::*;
use biome_rowan::{AstNode, RawSyntaxKind};
use biome_rowan::{AstNode, RawSyntaxKind, SyntaxKind};

impl From<u16> for CssSyntaxKind {
fn from(d: u16) -> CssSyntaxKind {
Expand All @@ -28,16 +28,6 @@ impl From<CssSyntaxKind> for u16 {
}

impl CssSyntaxKind {
pub fn is_trivia(self) -> bool {
matches!(
self,
CssSyntaxKind::NEWLINE
| CssSyntaxKind::WHITESPACE
| CssSyntaxKind::COMMENT
| CssSyntaxKind::MULTILINE_COMMENT
)
}

/// Returns `true` for any contextual or non-contextual keyword
#[inline]
pub const fn is_keyword(self) -> bool {
Expand Down Expand Up @@ -148,6 +138,16 @@ impl biome_rowan::SyntaxKind for CssSyntaxKind {
CssSyntaxKind::is_list(*self)
}

fn is_trivia(self) -> bool {
matches!(
self,
CssSyntaxKind::NEWLINE
| CssSyntaxKind::WHITESPACE
| CssSyntaxKind::COMMENT
| CssSyntaxKind::MULTILINE_COMMENT
)
}

fn to_string(&self) -> Option<&'static str> {
CssSyntaxKind::to_string(self)
}
Expand Down
1 change: 1 addition & 0 deletions crates/biome_graphql_parser/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod tests;
use biome_graphql_syntax::{GraphqlSyntaxKind, GraphqlSyntaxKind::*, TextLen, TextSize, T};
use biome_parser::diagnostic::ParseDiagnostic;
use biome_parser::lexer::{Lexer, LexerCheckpoint, LexerWithCheckpoint, TokenFlags};
use biome_rowan::SyntaxKind;
use std::ops::Add;

#[derive(Debug)]
Expand Down
Loading

0 comments on commit 01c440d

Please sign in to comment.