Skip to content

Commit

Permalink
Use Jupyter mode for the parser with Notebook files
Browse files Browse the repository at this point in the history
  • Loading branch information
dhruvmanila committed Jul 6, 2023
1 parent d810bc9 commit 9e7eab2
Show file tree
Hide file tree
Showing 11 changed files with 76 additions and 20 deletions.
6 changes: 3 additions & 3 deletions crates/ruff/src/importer/insertion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ mod tests {
use ruff_text_size::TextSize;
use rustpython_parser::ast::Suite;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Parse;
use rustpython_parser::{Mode, Parse};

use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_whitespace::LineEnding;
Expand All @@ -313,7 +313,7 @@ mod tests {
fn start_of_file() -> Result<()> {
fn insert(contents: &str) -> Result<Insertion> {
let program = Suite::parse(contents, "<filename>")?;
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Ok(Insertion::start_of_file(&program, &locator, &stylist))
Expand Down Expand Up @@ -424,7 +424,7 @@ x = 1
#[test]
fn start_of_block() {
fn insert(contents: &str, offset: TextSize) -> Insertion {
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);
let locator = Locator::new(contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
Insertion::start_of_block(offset, &locator, &stylist)
Expand Down
3 changes: 2 additions & 1 deletion crates/ruff/src/jupyter/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::path::Path;

use itertools::Itertools;
use once_cell::sync::OnceCell;
use rustpython_parser::Mode;
use serde::Serialize;
use serde_json::error::Category;

Expand Down Expand Up @@ -151,7 +152,7 @@ impl Notebook {
)
})?;
// Check if tokenizing was successful and the file is non-empty
if (ruff_rustpython::tokenize(&contents))
if (ruff_rustpython::tokenize(&contents, Mode::Module))
.last()
.map_or(true, Result::is_err)
{
Expand Down
30 changes: 24 additions & 6 deletions crates/ruff/src/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use itertools::Itertools;
use log::error;
use rustc_hash::FxHashMap;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::ParseError;
use rustpython_parser::{Mode, ParseError};

use ruff_diagnostics::Diagnostic;
use ruff_python_ast::imports::ImportMap;
Expand Down Expand Up @@ -134,7 +134,12 @@ pub fn check_path(
.iter_enabled()
.any(|rule_code| rule_code.lint_source().is_imports());
if use_ast || use_imports || use_doc_lines {
match ruff_rustpython::parse_program_tokens(tokens, &path.to_string_lossy()) {
let mode = if source_kind.map_or(false, |kind| kind.is_jupyter()) {
Mode::Jupyter
} else {
Mode::Module
};
match ruff_rustpython::parse_program_tokens(tokens, mode, &path.to_string_lossy()) {
Ok(python_ast) => {
if use_ast {
diagnostics.extend(check_ast(
Expand Down Expand Up @@ -255,7 +260,7 @@ pub fn add_noqa_to_path(path: &Path, package: Option<&Path>, settings: &Settings
let contents = std::fs::read_to_string(path)?;

// Tokenize once.
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(&contents);
Expand Down Expand Up @@ -320,9 +325,16 @@ pub fn lint_only(
package: Option<&Path>,
settings: &Settings,
noqa: flags::Noqa,
source_kind: Option<&SourceKind>,
) -> LinterResult<(Vec<Message>, Option<ImportMap>)> {
let mode = if source_kind.map_or(false, |source_kind| source_kind.is_jupyter()) {
Mode::Jupyter
} else {
Mode::Module
};

// Tokenize once.
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, mode);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
Expand Down Expand Up @@ -352,7 +364,7 @@ pub fn lint_only(
&directives,
settings,
noqa,
None,
source_kind,
);

result.map(|(diagnostics, imports)| {
Expand Down Expand Up @@ -411,10 +423,16 @@ pub fn lint_fix<'a>(
// Track whether the _initial_ source code was parseable.
let mut parseable = false;

let mode = if source_kind.is_jupyter() {
Mode::Jupyter
} else {
Mode::Module
};

// Continuously autofix until the source code stabilizes.
loop {
// Tokenize once.
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&transformed, mode);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(&transformed);
Expand Down
3 changes: 2 additions & 1 deletion crates/ruff/src/rules/pyflakes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ mod tests {
use anyhow::Result;
use regex::Regex;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Mode;
use test_case::test_case;

use ruff_diagnostics::Diagnostic;
Expand Down Expand Up @@ -499,7 +500,7 @@ mod tests {
fn flakes(contents: &str, expected: &[Rule]) {
let contents = dedent(contents);
let settings = Settings::for_rules(Linter::Pyflakes.rules());
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, Mode::Module);
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down
10 changes: 8 additions & 2 deletions crates/ruff/src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use itertools::Itertools;
use ruff_textwrap::dedent;
use rustc_hash::FxHashMap;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Mode;

use ruff_diagnostics::{AutofixKind, Diagnostic};
use ruff_python_ast::source_code::{Indexer, Locator, SourceFileBuilder, Stylist};
Expand Down Expand Up @@ -97,8 +98,13 @@ pub(crate) fn max_iterations() -> usize {
/// A convenient wrapper around [`check_path`], that additionally
/// asserts that autofixes converge after a fixed number of iterations.
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
let mode = if source_kind.is_jupyter() {
Mode::Jupyter
} else {
Mode::Module
};
let contents = source_kind.content().to_string();
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&contents, mode);
let locator = Locator::new(&contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down Expand Up @@ -160,7 +166,7 @@ fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings)
notebook.update(&source_map, &fixed_contents);
};

let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(&fixed_contents, mode);
let locator = Locator::new(&fixed_contents);
let stylist = Stylist::from_tokens(&tokens, &locator);
let indexer = Indexer::from_tokens(&tokens, &locator);
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_benchmark/benches/linter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ fn benchmark_linter(mut group: BenchmarkGroup<WallTime>, settings: &Settings) {
None,
settings,
flags::Noqa::Enabled,
None,
);

// Assert that file contains no parse errors
Expand Down
20 changes: 18 additions & 2 deletions crates/ruff_cli/src/diagnostics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,12 +195,26 @@ pub(crate) fn lint_path(
(result, fixed)
} else {
// If we fail to autofix, lint the original source code.
let result = lint_only(&contents, path, package, &settings.lib, noqa);
let result = lint_only(
&contents,
path,
package,
&settings.lib,
noqa,
Some(&source_kind),
);
let fixed = FxHashMap::default();
(result, fixed)
}
} else {
let result = lint_only(&contents, path, package, &settings.lib, noqa);
let result = lint_only(
&contents,
path,
package,
&settings.lib,
noqa,
Some(&source_kind),
);
let fixed = FxHashMap::default();
(result, fixed)
};
Expand Down Expand Up @@ -307,6 +321,7 @@ pub(crate) fn lint_stdin(
package,
settings,
noqa,
Some(&source_kind),
);
let fixed = FxHashMap::default();

Expand All @@ -324,6 +339,7 @@ pub(crate) fn lint_stdin(
package,
settings,
noqa,
Some(&source_kind),
);
let fixed = FxHashMap::default();
(result, fixed)
Expand Down
10 changes: 9 additions & 1 deletion crates/ruff_dev/src/print_tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,19 @@ pub(crate) struct Args {
/// Python file for which to generate the AST.
#[arg(required = true)]
file: PathBuf,
/// Run in Jupyter mode i.e., allow line magics (%), shell commands (!), and help (?).
#[arg(long)]
jupyter: bool,
}

pub(crate) fn main(args: &Args) -> Result<()> {
let contents = fs::read_to_string(&args.file)?;
for (tok, range) in lexer::lex(&contents, Mode::Module).flatten() {
let mode = if args.jupyter {
Mode::Jupyter
} else {
Mode::Module
};
for (tok, range) in lexer::lex(&contents, mode).flatten() {
println!(
"{start:#?} {tok:#?} {end:#?}",
start = range.start(),
Expand Down
3 changes: 3 additions & 0 deletions crates/ruff_python_ast/src/token_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ pub enum TokenKind {
/// Token value for a newline that is not a logical line break. These are filtered out of
/// the token stream prior to parsing.
NonLogicalNewline,
/// Token value for a Jupyter magic command.
MagicCommand,
/// Token value for an indent.
Indent,
/// Token value for a dedent.
Expand Down Expand Up @@ -341,6 +343,7 @@ impl TokenKind {
Tok::Comment(_) => TokenKind::Comment,
Tok::Newline => TokenKind::Newline,
Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
Tok::MagicCommand(_) => TokenKind::MagicCommand,
Tok::Indent => TokenKind::Indent,
Tok::Dedent => TokenKind::Dedent,
Tok::EndOfFile => TokenKind::EndOfFile,
Expand Down
7 changes: 4 additions & 3 deletions crates/ruff_rustpython/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode, ParseError};

/// Collect tokens up to and including the first error.
pub fn tokenize(contents: &str) -> Vec<LexResult> {
pub fn tokenize(contents: &str, mode: Mode) -> Vec<LexResult> {
let mut tokens: Vec<LexResult> = vec![];
for tok in lexer::lex(contents, Mode::Module) {
for tok in lexer::lex(contents, mode) {
let is_err = tok.is_err();
tokens.push(tok);
if is_err {
Expand All @@ -19,9 +19,10 @@ pub fn tokenize(contents: &str) -> Vec<LexResult> {
/// Parse a full Python program from its tokens.
pub fn parse_program_tokens(
lxr: Vec<LexResult>,
mode: Mode,
source_path: &str,
) -> anyhow::Result<Suite, ParseError> {
parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top {
parser::parse_tokens(lxr, mode, source_path).map(|top| match top {
Mod::Module(ModModule { body, .. }) => body,
_ => unreachable!(),
})
Expand Down
3 changes: 2 additions & 1 deletion crates/ruff_wasm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::path::Path;

use rustpython_parser::lexer::LexResult;
use rustpython_parser::Mode;
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::*;

Expand Down Expand Up @@ -180,7 +181,7 @@ pub fn check(contents: &str, options: JsValue) -> Result<JsValue, JsValue> {
Settings::from_configuration(configuration, Path::new(".")).map_err(|e| e.to_string())?;

// Tokenize once.
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents, Mode::Module);

// Map row and column locations to byte slices (lazily).
let locator = Locator::new(contents);
Expand Down

0 comments on commit 9e7eab2

Please sign in to comment.