Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Maintain synchronicity between the lexer and the parser #11457

Merged
merged 33 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
bab4747
Make `Lexer` lazy (#11244)
dhruvmanila May 17, 2024
b005ede
Add checkpoint logic for the parser (#11441)
dhruvmanila May 17, 2024
72c49c0
Bump soft keyword as name token (#11459)
dhruvmanila May 17, 2024
0a237a4
Update prev token end before token bump (#11473)
dhruvmanila May 20, 2024
f32718c
Use lookahead to resolve `type` soft keyword (#11442)
dhruvmanila May 21, 2024
a4604f6
Use speculative parsing for `match` statement (#11443)
dhruvmanila May 21, 2024
29a5200
Consider soft keyword for various parsing logic (#11474)
dhruvmanila May 27, 2024
c04692d
Use struct fields in `TokenValue` enum (#11475)
dhruvmanila May 27, 2024
3a9aff8
Update lexer and parser test snapshots (#11476)
dhruvmanila May 27, 2024
061a39c
Update parser API to merge lexing and parsing (#11494)
dhruvmanila May 27, 2024
6a30a2f
Update parser API references, expose `Program` to linter (#11505)
dhruvmanila May 28, 2024
0aa2100
Replace most usages of `lex_starts_at` with `Tokens` (#11511)
dhruvmanila May 28, 2024
0d86ac4
Replace `lex_starts_at` with `Tokens` in the formatter (#11515)
dhruvmanila May 28, 2024
4a5d360
Update token-based rules to use `Tokens` (#11529)
dhruvmanila May 29, 2024
001e350
Replace `lex` usages (#11562)
dhruvmanila May 29, 2024
cdc606c
Update lexer to only emit the `TokenKind` (#11563)
dhruvmanila May 29, 2024
6590667
Implement `TokenFlags` stored on each `Token` (#11578)
dhruvmanila May 29, 2024
65a34a0
Use `CommentRanges` from the parsed output (#11591)
dhruvmanila May 29, 2024
2e5145a
Update `Stylist`, `Indexer` to use tokens from parsed output (#11592)
dhruvmanila May 29, 2024
d26d9d2
Consider "gap" between tokens for range query (#11610)
dhruvmanila May 30, 2024
017f646
Fix various bugs found via running the test suite (#11611)
dhruvmanila May 30, 2024
fda48b5
Fix all compilation errors (#11612)
dhruvmanila May 30, 2024
cffcdab
Update lexer snapshots with token flags (#11614)
dhruvmanila May 30, 2024
fef9a3f
Fix CI
dhruvmanila May 30, 2024
bc63bf9
Classify `match not foo` as keyword (#11626)
dhruvmanila May 31, 2024
83e61d4
Rename `Program` to `Parsed`, shorten `tokens_in_range` (#11627)
dhruvmanila May 31, 2024
f98019b
Remove `Tok` enum (#11628)
dhruvmanila May 31, 2024
6473c27
Use enum values instead of struct with single field (#11629)
dhruvmanila May 31, 2024
ce79f14
fixup! Use enum values instead of struct with single field (#11629)
dhruvmanila May 31, 2024
c403063
Reset cursor at dedent after whitespace (#11630)
dhruvmanila May 31, 2024
8728d38
fixup! Rename `Program` to `Parsed`, shorten `tokens_in_range` (#11627)
dhruvmanila Jun 3, 2024
ce32024
Use unchecked parser API in test case with syntax error
dhruvmanila Jun 3, 2024
e3d09f3
Use struct destructuring for rewinding checkpoint
dhruvmanila Jun 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 12 additions & 10 deletions crates/red_knot/src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,19 @@ impl Parsed {
let result = ruff_python_parser::parse(text, Mode::Module);

let (module, errors) = match result {
Ok(ast::Mod::Module(module)) => (module, vec![]),
Ok(ast::Mod::Expression(expression)) => (
ast::ModModule {
range: expression.range(),
body: vec![ast::Stmt::Expr(ast::StmtExpr {
Ok(parsed) => match parsed.into_syntax() {
ast::Mod::Module(module) => (module, vec![]),
ast::Mod::Expression(expression) => (
ast::ModModule {
range: expression.range(),
value: expression.body,
})],
},
vec![],
),
body: vec![ast::Stmt::Expr(ast::StmtExpr {
range: expression.range(),
value: expression.body,
})],
},
vec![],
),
},
Err(errors) => (
ast::ModModule {
range: TextRange::default(),
Expand Down
1 change: 0 additions & 1 deletion crates/ruff_benchmark/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ codspeed-criterion-compat = { workspace = true, default-features = false, option
ruff_linter = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_python_formatter = { workspace = true }
ruff_python_index = { workspace = true }
ruff_python_parser = { workspace = true }

[lints]
Expand Down
27 changes: 6 additions & 21 deletions crates/ruff_benchmark/benches/formatter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@ use ruff_benchmark::criterion::{
};
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_formatter::{format_module_ast, PreviewMode, PyFormatOptions};
use ruff_python_index::CommentRangesBuilder;
use ruff_python_parser::lexer::lex;
use ruff_python_parser::{allocate_tokens_vec, parse_tokens, Mode};
use ruff_python_parser::{parse, Mode};

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -52,28 +50,15 @@ fn benchmark_formatter(criterion: &mut Criterion) {
BenchmarkId::from_parameter(case.name()),
&case,
|b, case| {
let mut tokens = allocate_tokens_vec(case.code());
let mut comment_ranges = CommentRangesBuilder::default();

for result in lex(case.code(), Mode::Module) {
let (token, range) = result.expect("Input to be a valid python program.");

comment_ranges.visit_token(&token, range);
tokens.push(Ok((token, range)));
}

let comment_ranges = comment_ranges.finish();

// Parse the AST.
let module = parse_tokens(tokens, case.code(), Mode::Module)
.expect("Input to be a valid python program");
// Parse the source.
let parsed =
parse(case.code(), Mode::Module).expect("Input should be a valid Python code");

b.iter(|| {
let options = PyFormatOptions::from_extension(Path::new(case.name()))
.with_preview(PreviewMode::Enabled);
let formatted =
format_module_ast(&module, &comment_ranges, case.code(), options)
.expect("Formatting to succeed");
let formatted = format_module_ast(&parsed, case.code(), options)
.expect("Formatting to succeed");

formatted.print().expect("Printing to succeed")
});
Expand Down
14 changes: 10 additions & 4 deletions crates/ruff_benchmark/benches/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use ruff_benchmark::criterion::{
criterion_group, criterion_main, measurement::WallTime, BenchmarkId, Criterion, Throughput,
};
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_parser::{lexer, Mode};
use ruff_python_parser::{lexer, Mode, TokenKind};

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -47,9 +47,15 @@ fn benchmark_lexer(criterion: &mut Criterion<WallTime>) {
&case,
|b, case| {
b.iter(|| {
let result =
lexer::lex(case.code(), Mode::Module).find(std::result::Result::is_err);
assert_eq!(result, None, "Input to be a valid Python program");
let mut lexer = lexer::lex(case.code(), Mode::Module);
loop {
let token = lexer.next_token();
match token {
TokenKind::EndOfFile => break,
TokenKind::Unknown => panic!("Input to be a valid Python source code"),
_ => {}
}
}
});
},
);
Expand Down
14 changes: 6 additions & 8 deletions crates/ruff_benchmark/benches/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use ruff_linter::settings::{flags, LinterSettings};
use ruff_linter::source_kind::SourceKind;
use ruff_linter::{registry::Rule, RuleSelector};
use ruff_python_ast::PySourceType;
use ruff_python_parser::{parse_program_tokens, tokenize, Mode};
use ruff_python_parser::parse_module;

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -54,15 +54,13 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) {
BenchmarkId::from_parameter(case.name()),
&case,
|b, case| {
// Tokenize the source.
let tokens = tokenize(case.code(), Mode::Module);

// Parse the source.
let ast = parse_program_tokens(tokens.clone(), case.code(), false).unwrap();
let parsed =
parse_module(case.code()).expect("Input should be a valid Python code");

b.iter_batched(
|| (ast.clone(), tokens.clone()),
|(ast, tokens)| {
|| parsed.clone(),
|parsed| {
let path = case.path();
let result = lint_only(
&path,
Expand All @@ -71,7 +69,7 @@ fn benchmark_linter(mut group: BenchmarkGroup, settings: &LinterSettings) {
flags::Noqa::Enabled,
&SourceKind::Python(case.code().to_string()),
PySourceType::from(path.as_path()),
ParseSource::Precomputed { tokens, ast },
ParseSource::Precomputed(parsed),
);

// Assert that file contains no parse errors
Expand Down
6 changes: 4 additions & 2 deletions crates/ruff_benchmark/benches/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use ruff_benchmark::criterion::{
use ruff_benchmark::{TestCase, TestFile, TestFileDownloadError};
use ruff_python_ast::statement_visitor::{walk_stmt, StatementVisitor};
use ruff_python_ast::Stmt;
use ruff_python_parser::parse_suite;
use ruff_python_parser::parse_module;

#[cfg(target_os = "windows")]
#[global_allocator]
Expand Down Expand Up @@ -60,7 +60,9 @@ fn benchmark_parser(criterion: &mut Criterion<WallTime>) {
&case,
|b, case| {
b.iter(|| {
let parsed = parse_suite(case.code()).unwrap();
let parsed = parse_module(case.code())
.expect("Input should be a valid Python code")
.into_suite();

let mut visitor = CountVisitor { count: 0 };
visitor.visit_body(&parsed);
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_dev/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ ruff_python_formatter = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_python_stdlib = { workspace = true }
ruff_python_trivia = { workspace = true }
ruff_text_size = { workspace = true }
ruff_workspace = { workspace = true, features = ["schemars"] }

anyhow = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_dev/src/print_ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) fn main(args: &Args) -> Result<()> {
args.file.display()
)
})?;
let python_ast = parse(source_kind.source_code(), source_type.as_mode())?;
let python_ast = parse(source_kind.source_code(), source_type.as_mode())?.into_syntax();
println!("{python_ast:#?}");
Ok(())
}
13 changes: 8 additions & 5 deletions crates/ruff_dev/src/print_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use anyhow::Result;

use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::PySourceType;
use ruff_python_parser::{lexer, AsMode};
use ruff_python_parser::parse_unchecked_source;
use ruff_text_size::Ranged;

#[derive(clap::Args)]
pub(crate) struct Args {
Expand All @@ -24,11 +25,13 @@ pub(crate) fn main(args: &Args) -> Result<()> {
args.file.display()
)
})?;
for (tok, range) in lexer::lex(source_kind.source_code(), source_type.as_mode()).flatten() {
let parsed = parse_unchecked_source(source_kind.source_code(), source_type);
for token in parsed.tokens() {
println!(
"{start:#?} {tok:#?} {end:#?}",
start = range.start(),
end = range.end()
"{start:#?} {kind:#?} {end:#?}",
start = token.start(),
end = token.end(),
kind = token.kind(),
);
}
Ok(())
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1160,7 +1160,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
}
}
if checker.enabled(Rule::PrintfStringFormatting) {
pyupgrade::rules::printf_string_formatting(checker, expr, right);
pyupgrade::rules::printf_string_formatting(checker, bin_op, format_string);
}
if checker.enabled(Rule::BadStringFormatCharacter) {
pylint::rules::bad_string_format_character::percent(
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff_linter/src/checkers/ast/analyze/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ pub(crate) fn statement(stmt: &Stmt, checker: &mut Checker) {
pyupgrade::rules::deprecated_c_element_tree(checker, stmt);
}
if checker.enabled(Rule::DeprecatedImport) {
pyupgrade::rules::deprecated_import(checker, stmt, names, module, level);
pyupgrade::rules::deprecated_import(checker, import_from);
}
if checker.enabled(Rule::UnnecessaryBuiltinImport) {
if let Some(module) = module {
Expand Down
26 changes: 18 additions & 8 deletions crates/ruff_linter/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ use itertools::Itertools;
use log::debug;
use ruff_python_ast::{
self as ast, AnyParameterRef, Comprehension, ElifElseClause, ExceptHandler, Expr, ExprContext,
FStringElement, Keyword, MatchCase, Parameter, Parameters, Pattern, Stmt, Suite, UnaryOp,
FStringElement, Keyword, MatchCase, ModModule, Parameter, Parameters, Pattern, Stmt, Suite,
UnaryOp,
};
use ruff_python_parser::Parsed;
use ruff_text_size::{Ranged, TextRange, TextSize};

use ruff_diagnostics::{Diagnostic, IsolationLevel};
Expand Down Expand Up @@ -174,6 +176,8 @@ impl ExpectedDocstringKind {
}

pub(crate) struct Checker<'a> {
/// The parsed [`Parsed`].
parsed: &'a Parsed<ModModule>,
/// The [`Path`] to the file under analysis.
path: &'a Path,
/// The [`Path`] to the package containing the current file.
Expand Down Expand Up @@ -223,6 +227,7 @@ pub(crate) struct Checker<'a> {
impl<'a> Checker<'a> {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
parsed: &'a Parsed<ModModule>,
settings: &'a LinterSettings,
noqa_line_for: &'a NoqaMapping,
noqa: flags::Noqa,
Expand All @@ -232,12 +237,12 @@ impl<'a> Checker<'a> {
locator: &'a Locator,
stylist: &'a Stylist,
indexer: &'a Indexer,
importer: Importer<'a>,
source_type: PySourceType,
cell_offsets: Option<&'a CellOffsets>,
notebook_index: Option<&'a NotebookIndex>,
) -> Checker<'a> {
Checker {
parsed,
settings,
noqa_line_for,
noqa,
Expand All @@ -248,7 +253,7 @@ impl<'a> Checker<'a> {
locator,
stylist,
indexer,
importer,
importer: Importer::new(parsed, locator, stylist),
semantic: SemanticModel::new(&settings.typing_modules, path, module),
visit: deferred::Visit::default(),
analyze: deferred::Analyze::default(),
Expand Down Expand Up @@ -318,6 +323,11 @@ impl<'a> Checker<'a> {
}
}

/// The [`Parsed`] output for the current file, which contains the tokens, AST, and more.
pub(crate) const fn parsed(&self) -> &'a Parsed<ModModule> {
self.parsed
}

/// The [`Locator`] for the current file, which enables extraction of source code from byte
/// offsets.
pub(crate) const fn locator(&self) -> &'a Locator<'a> {
Expand Down Expand Up @@ -2326,7 +2336,7 @@ impl<'a> Checker<'a> {

#[allow(clippy::too_many_arguments)]
pub(crate) fn check_ast(
python_ast: &Suite,
parsed: &Parsed<ModModule>,
locator: &Locator,
stylist: &Stylist,
indexer: &Indexer,
Expand Down Expand Up @@ -2356,10 +2366,11 @@ pub(crate) fn check_ast(
} else {
ModuleSource::File(path)
},
python_ast,
python_ast: parsed.suite(),
};

let mut checker = Checker::new(
parsed,
settings,
noqa_line_for,
noqa,
Expand All @@ -2369,16 +2380,15 @@ pub(crate) fn check_ast(
locator,
stylist,
indexer,
Importer::new(python_ast, locator, stylist),
source_type,
cell_offsets,
notebook_index,
);
checker.bind_builtins();

// Iterate over the AST.
checker.visit_module(python_ast);
checker.visit_body(python_ast);
checker.visit_module(parsed.suite());
checker.visit_body(parsed.suite());

// Visit any deferred syntax nodes. Take care to visit in order, such that we avoid adding
// new deferred nodes after visiting nodes of that kind. For example, visiting a deferred
Expand Down
Loading
Loading