diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 3e3f05f555f40..93dd398730034 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -18,9 +18,11 @@ cargo-fuzz = true [dependencies] ruff_linter = { path = "../crates/ruff_linter" } +ruff_python_ast = { path = "../crates/ruff_python_ast" } ruff_python_codegen = { path = "../crates/ruff_python_codegen" } ruff_python_parser = { path = "../crates/ruff_python_parser" } ruff_source_file = { path = "../crates/ruff_source_file" } +ruff_python_formatter = { path = "../crates/ruff_python_formatter"} arbitrary = { version = "1.3.0", features = ["derive"] } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false } @@ -38,10 +40,18 @@ path = "fuzz_targets/ruff_parse_simple.rs" name = "ruff_fix_validity" path = "fuzz_targets/ruff_fix_validity.rs" +[[bin]] +name = "ruff_formatter_validity" +path = "fuzz_targets/ruff_formatter_validity.rs" + [[bin]] name = "ruff_parse_idempotency" path = "fuzz_targets/ruff_parse_idempotency.rs" +[[bin]] +name = "ruff_formatter_idempotency" +path = "fuzz_targets/ruff_formatter_idempotency.rs" + [profile.release] opt-level = 3 debug = true diff --git a/fuzz/README.md b/fuzz/README.md index 2c3a8b76f67c8..1b91e57d05c1f 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -101,3 +101,16 @@ This fuzz harness checks that fixes applied by Ruff do not introduce new errors [`ruff_linter::test::test_snippet`](../crates/ruff_linter/src/test.rs) testing utility. It currently is only configured to use default settings, but may be extended in future versions to test non-default linter settings. + +### `ruff_formatter_idempotency` + +This fuzz harness ensures that the formatter is [idempotent](https://en.wikipedia.org/wiki/Idempotence) +which detects possible unsteady states of Ruff's formatter. + +### `ruff_formatter_validity` + +This fuzz harness checks that Ruff's formatter does not introduce new linter errors/warnings by +linting once, counting the number of each error type, then formatting, then linting again and +ensuring that the number of each error type does not increase across formats. This has the +beneficial side effect of discovering cases where the linter does not discover a lint error when +it should have due to a formatting inconsistency. diff --git a/fuzz/corpus/ruff_formatter_idempotency b/fuzz/corpus/ruff_formatter_idempotency new file mode 120000 index 0000000000000..38dc5bc1ea310 --- /dev/null +++ b/fuzz/corpus/ruff_formatter_idempotency @@ -0,0 +1 @@ +ruff_fix_validity \ No newline at end of file diff --git a/fuzz/corpus/ruff_formatter_validity b/fuzz/corpus/ruff_formatter_validity new file mode 120000 index 0000000000000..38dc5bc1ea310 --- /dev/null +++ b/fuzz/corpus/ruff_formatter_validity @@ -0,0 +1 @@ +ruff_fix_validity \ No newline at end of file diff --git a/fuzz/fuzz_targets/ruff_formatter_idempotency.rs b/fuzz/fuzz_targets/ruff_formatter_idempotency.rs new file mode 100644 index 0000000000000..f9f0e37632f16 --- /dev/null +++ b/fuzz/fuzz_targets/ruff_formatter_idempotency.rs @@ -0,0 +1,47 @@ +//! Fuzzer harness which double formats the input and access the idempotency or unsteady state of the +//! ruff's formatter. + +#![no_main] + +use libfuzzer_sys::{fuzz_target, Corpus}; +use similar::TextDiff; + +use ruff_python_formatter::{format_module_source, PyFormatOptions}; + +fn do_fuzz(case: &[u8]) -> Corpus { + // Throw away inputs which aren't utf-8 + let Ok(code) = std::str::from_utf8(case) else { + return Corpus::Reject; + }; + + let options = PyFormatOptions::default(); + // format the code once + if let Ok(formatted) = format_module_source(code, options.clone()) { + let formatted = formatted.as_code(); + + // reformat the code second time + if let Ok(reformatted) = format_module_source(formatted, options.clone()) { + let reformatted = reformatted.as_code(); + + if formatted != reformatted { + let diff = TextDiff::from_lines(formatted, reformatted) + .unified_diff() + .header("Formatted Once", "Formatted Twice") + .to_string(); + panic!( + "\nReformatting the code a second time resulted in formatting changes.\nInput: {:?}\ndiff:\n{}", + code, diff + ); + } + } else { + panic!( + "Unable to format the code second time:\nInput:{:?}\nformatted:\n{:?}", + code, formatted + ); + } + } + + Corpus::Keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) }); diff --git a/fuzz/fuzz_targets/ruff_formatter_validity.rs b/fuzz/fuzz_targets/ruff_formatter_validity.rs new file mode 100644 index 0000000000000..286509992f9e4 --- /dev/null +++ b/fuzz/fuzz_targets/ruff_formatter_validity.rs @@ -0,0 +1,98 @@ +//! Fuzzer harness which actively tries to find testcases that cause Ruff to introduce errors into +//! the resulting file. + +#![no_main] + +use std::collections::HashMap; +use std::sync::OnceLock; + +use libfuzzer_sys::{fuzz_target, Corpus}; +use ruff_linter::linter::ParseSource; +use ruff_linter::settings::flags::Noqa; +use ruff_linter::settings::LinterSettings; +use ruff_linter::source_kind::SourceKind; +use ruff_python_ast::PySourceType; +use ruff_python_formatter::{format_module_source, PyFormatOptions}; +use similar::TextDiff; + +static SETTINGS: OnceLock = OnceLock::new(); + +fn do_fuzz(case: &[u8]) -> Corpus { + // throw away inputs which aren't utf-8 + let Ok(code) = std::str::from_utf8(case) else { + return Corpus::Reject; + }; + + // the settings are immutable to test_snippet, so we avoid re-initialising here + let linter_settings = SETTINGS.get_or_init(LinterSettings::default); + let format_options = PyFormatOptions::default(); + + let linter_results = ruff_linter::linter::lint_only( + "fuzzed-source.py".as_ref(), + None, + &linter_settings, + Noqa::Enabled, + &SourceKind::Python(code.to_string()), + PySourceType::Python, + ParseSource::None, + ); + + if linter_results.error.is_some() { + return Corpus::Keep; // keep, but don't continue + } + + let mut warnings = HashMap::new(); + + for msg in linter_results.data.0 { + let count: &mut usize = warnings.entry(msg.kind.name).or_default(); + *count += 1; + } + + // format the code once + if let Ok(formatted) = format_module_source(code, format_options.clone()) { + let formatted = formatted.as_code().to_string(); + + let linter_results = ruff_linter::linter::lint_only( + "fuzzed-source.py".as_ref(), + None, + &linter_settings, + Noqa::Enabled, + &SourceKind::Python(formatted.clone()), + PySourceType::Python, + ParseSource::None, + ); + + assert!( + linter_results.error.is_none(), + "formatter introduced a parse error" + ); + + for msg in linter_results.data.0 { + if let Some(count) = warnings.get_mut(&msg.kind.name) { + if let Some(decremented) = count.checked_sub(1) { + *count = decremented; + } else { + panic!( + "formatter introduced additional linter warning: {msg:?}\ndiff: {}", + TextDiff::from_lines(code, &formatted) + .unified_diff() + .header("Unformatted", "Formatted") + .to_string() + ); + } + } else { + panic!( + "formatter introduced new linter warning that was not previously present: {msg:?}\ndiff: {}", + TextDiff::from_lines(code, &formatted) + .unified_diff() + .header("Unformatted", "Formatted") + .to_string() + ); + } + } + } + + Corpus::Keep +} + +fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });