From 1c18408e5223f34857c514c78a3885e102ea39b1 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 10 May 2024 16:24:44 +0530 Subject: [PATCH] Add `Tokens` newtype wrapper --- crates/ruff_linter/src/importer/insertion.rs | 5 ++- crates/ruff_linter/src/linter.rs | 24 +++++++++---- crates/ruff_linter/src/rules/pyflakes/mod.rs | 3 +- crates/ruff_linter/src/test.rs | 6 ++-- crates/ruff_python_parser/src/lib.rs | 36 +++++++++++++++++-- crates/ruff_python_parser/src/token.rs | 5 +++ .../tests/block_comments.rs | 3 +- crates/ruff_server/src/lint.rs | 3 +- crates/ruff_wasm/src/lib.rs | 3 +- 9 files changed, 64 insertions(+), 24 deletions(-) diff --git a/crates/ruff_linter/src/importer/insertion.rs b/crates/ruff_linter/src/importer/insertion.rs index 5cd6ae200c21c..274147a756e84 100644 --- a/crates/ruff_linter/src/importer/insertion.rs +++ b/crates/ruff_linter/src/importer/insertion.rs @@ -321,7 +321,6 @@ mod tests { use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; - use ruff_python_parser::lexer::LexResult; use ruff_python_parser::{parse_suite, Mode}; use ruff_source_file::{LineEnding, Locator}; use ruff_text_size::TextSize; @@ -332,7 +331,7 @@ mod tests { fn start_of_file() -> Result<()> { fn insert(contents: &str) -> Result { let program = parse_suite(contents)?; - let tokens: Vec = ruff_python_parser::tokenize(contents, Mode::Module); + let tokens = ruff_python_parser::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Ok(Insertion::start_of_file(&program, &locator, &stylist)) @@ -443,7 +442,7 @@ x = 1 #[test] fn start_of_block() { fn insert(contents: &str, offset: TextSize) -> Insertion { - let tokens: Vec = ruff_python_parser::tokenize(contents, Mode::Module); + let tokens = ruff_python_parser::tokenize(contents, Mode::Module); let locator = Locator::new(contents); let stylist = Stylist::from_tokens(&tokens, &locator); Insertion::start_of_block(offset, &locator, &stylist, PySourceType::default()) diff --git a/crates/ruff_linter/src/linter.rs b/crates/ruff_linter/src/linter.rs index cb3ac3318b784..ab94efe3fd09a 100644 --- a/crates/ruff_linter/src/linter.rs +++ b/crates/ruff_linter/src/linter.rs @@ -4,7 +4,7 @@ use std::path::Path; use anyhow::{anyhow, Result}; use colored::Colorize; -use itertools::Itertools; +use itertools::{Either, Itertools}; use log::error; use rustc_hash::FxHashMap; @@ -14,7 +14,7 @@ use ruff_python_ast::{PySourceType, Suite}; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; use ruff_python_parser::lexer::LexResult; -use ruff_python_parser::{AsMode, ParseError}; +use ruff_python_parser::{AsMode, ParseError, TokenKind, Tokens}; use ruff_source_file::{Locator, SourceFileBuilder}; use ruff_text_size::Ranged; @@ -353,7 +353,7 @@ pub fn add_noqa_to_path( let contents = source_kind.source_code(); // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(contents, source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents); @@ -518,8 +518,7 @@ pub fn lint_fix<'a>( // Continuously fix until the source code stabilizes. loop { // Tokenize once. - let tokens: Vec = - ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); // Map row and column locations to byte slices (lazily). let locator = Locator::new(transformed.source_code()); @@ -715,7 +714,7 @@ impl<'a> ParseSource<'a> { #[derive(Debug, Clone)] pub enum TokenSource<'a> { /// Use the precomputed tokens to generate the AST. - Tokens(Vec), + Tokens(Tokens), /// Use the precomputed tokens and AST. Precomputed { tokens: &'a [LexResult], @@ -723,6 +722,17 @@ pub enum TokenSource<'a> { }, } +impl TokenSource<'_> { + pub fn kinds(&self) -> impl Iterator + '_ { + match self { + TokenSource::Tokens(tokens) => Either::Left(tokens.kinds()), + TokenSource::Precomputed { tokens, .. } => { + Either::Right(tokens.iter().flatten().map(|(tok, _)| tok.kind())) + } + } + } +} + impl Deref for TokenSource<'_> { type Target = [LexResult]; @@ -743,7 +753,7 @@ impl<'a> TokenSource<'a> { ) -> Result, ParseError> { match self { Self::Tokens(tokens) => Ok(AstSource::Ast(ruff_python_parser::parse_program_tokens( - tokens, + tokens.into_inner(), source_kind.source_code(), source_type.is_ipynb(), )?)), diff --git a/crates/ruff_linter/src/rules/pyflakes/mod.rs b/crates/ruff_linter/src/rules/pyflakes/mod.rs index 8ef30efbdc264..b4f361809878b 100644 --- a/crates/ruff_linter/src/rules/pyflakes/mod.rs +++ b/crates/ruff_linter/src/rules/pyflakes/mod.rs @@ -11,7 +11,6 @@ mod tests { use anyhow::Result; use regex::Regex; - use ruff_python_parser::lexer::LexResult; use test_case::test_case; @@ -591,7 +590,7 @@ mod tests { let source_type = PySourceType::default(); let source_kind = SourceKind::Python(contents.to_string()); let settings = LinterSettings::for_rules(Linter::Pyflakes.rules()); - let tokens: Vec = ruff_python_parser::tokenize(&contents, source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(&contents, source_type.as_mode()); let locator = Locator::new(&contents); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff_linter/src/test.rs b/crates/ruff_linter/src/test.rs index d83c5f54030f7..63a69f3857a90 100644 --- a/crates/ruff_linter/src/test.rs +++ b/crates/ruff_linter/src/test.rs @@ -16,7 +16,6 @@ use ruff_notebook::NotebookError; use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; use ruff_python_parser::AsMode; use ruff_python_trivia::textwrap::dedent; use ruff_source_file::{Locator, SourceFileBuilder}; @@ -111,8 +110,7 @@ pub(crate) fn test_contents<'a>( settings: &LinterSettings, ) -> (Vec, Cow<'a, SourceKind>) { let source_type = PySourceType::from(path); - let tokens: Vec = - ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(source_kind.source_code(), source_type.as_mode()); let locator = Locator::new(source_kind.source_code()); let stylist = Stylist::from_tokens(&tokens, &locator); let indexer = Indexer::from_tokens(&tokens, &locator); @@ -177,7 +175,7 @@ pub(crate) fn test_contents<'a>( transformed = Cow::Owned(transformed.updated(fixed_contents, &source_map)); - let tokens: Vec = + let tokens = ruff_python_parser::tokenize(transformed.source_code(), source_type.as_mode()); let locator = Locator::new(transformed.source_code()); let stylist = Stylist::from_tokens(&tokens, &locator); diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index ee7a7399fdd67..75a11b6b30e7d 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -110,6 +110,8 @@ //! [parsing]: https://en.wikipedia.org/wiki/Parsing //! [lexer]: crate::lexer +use std::ops::Deref; + use crate::lexer::{lex, lex_starts_at, LexResult}; pub use crate::error::{FStringErrorType, ParseError, ParseErrorType}; @@ -339,8 +341,38 @@ pub fn parse_tokens(tokens: Vec, source: &str, mode: Mode) -> Result< } } +/// Tokens represents a vector of [`LexResult`]. +#[derive(Debug, Clone)] +pub struct Tokens(Vec); + +impl Tokens { + /// Returns an iterator over the [`TokenKind`] corresponding to the tokens. + pub fn kinds(&self) -> impl Iterator + '_ { + self.iter().flatten().map(|(tok, _)| tok.kind()) + } + + /// Consumes the [`Tokens`], returning the underlying vector of [`LexResult`]. + pub fn into_inner(self) -> Vec { + self.0 + } +} + +impl From> for Tokens { + fn from(value: Vec) -> Self { + Self(value) + } +} + +impl Deref for Tokens { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + /// Collect tokens up to and including the first error. -pub fn tokenize(contents: &str, mode: Mode) -> Vec { +pub fn tokenize(contents: &str, mode: Mode) -> Tokens { let mut tokens: Vec = allocate_tokens_vec(contents); for tok in lexer::lex(contents, mode) { let is_err = tok.is_err(); @@ -350,7 +382,7 @@ pub fn tokenize(contents: &str, mode: Mode) -> Vec { } } - tokens + Tokens(tokens) } /// Tokenizes all tokens. diff --git a/crates/ruff_python_parser/src/token.rs b/crates/ruff_python_parser/src/token.rs index 0e1ecfaf0cbfd..e177af90de532 100644 --- a/crates/ruff_python_parser/src/token.rs +++ b/crates/ruff_python_parser/src/token.rs @@ -228,6 +228,11 @@ pub enum Tok { } impl Tok { + #[inline] + pub fn kind(&self) -> TokenKind { + TokenKind::from_token(self) + } + pub fn start_marker(mode: Mode) -> Self { match mode { Mode::Module | Mode::Ipython => Tok::StartModule, diff --git a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs index df0142b3c1798..fe6cc47ac9d7d 100644 --- a/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs +++ b/crates/ruff_python_trivia_integration_tests/tests/block_comments.rs @@ -1,5 +1,4 @@ use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; use ruff_python_parser::{tokenize, Mode}; use ruff_source_file::Locator; use ruff_text_size::TextSize; @@ -38,7 +37,7 @@ fn block_comments_indented_block() { fn block_comments_single_line_is_not_a_block() { // arrange let source = "\n"; - let tokens: Vec = tokenize(source, Mode::Module); + let tokens = tokenize(source, Mode::Module); let locator = Locator::new(source); let indexer = Indexer::from_tokens(&tokens, &locator); diff --git a/crates/ruff_server/src/lint.rs b/crates/ruff_server/src/lint.rs index fdf4c54a77302..9e735302771bd 100644 --- a/crates/ruff_server/src/lint.rs +++ b/crates/ruff_server/src/lint.rs @@ -12,7 +12,6 @@ use ruff_linter::{ use ruff_python_ast::PySourceType; use ruff_python_codegen::Stylist; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::LexResult; use ruff_python_parser::AsMode; use ruff_source_file::Locator; use ruff_text_size::Ranged; @@ -63,7 +62,7 @@ pub(crate) fn check( let source_kind = SourceKind::Python(contents.to_string()); // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(contents, source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); // Map row and column locations to byte slices (lazily). let locator = Locator::with_index(contents, index); diff --git a/crates/ruff_wasm/src/lib.rs b/crates/ruff_wasm/src/lib.rs index c9dd3603e9132..56843a82e0c5a 100644 --- a/crates/ruff_wasm/src/lib.rs +++ b/crates/ruff_wasm/src/lib.rs @@ -17,7 +17,6 @@ use ruff_python_ast::{Mod, PySourceType}; use ruff_python_codegen::Stylist; use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext, QuoteStyle}; use ruff_python_index::{CommentRangesBuilder, Indexer}; -use ruff_python_parser::lexer::LexResult; use ruff_python_parser::{parse_tokens, tokenize_all, AsMode, Mode, Program}; use ruff_python_trivia::CommentRanges; use ruff_source_file::{Locator, SourceLocation}; @@ -162,7 +161,7 @@ impl Workspace { let source_kind = SourceKind::Python(contents.to_string()); // Tokenize once. - let tokens: Vec = ruff_python_parser::tokenize(contents, source_type.as_mode()); + let tokens = ruff_python_parser::tokenize(contents, source_type.as_mode()); // Map row and column locations to byte slices (lazily). let locator = Locator::new(contents);