Skip to content

Commit

Permalink
Refactor shebang parsing to remove regex dependency (#5690)
Browse files Browse the repository at this point in the history
## Summary

Similar to #5567, we can remove the use of regex, plus simplify the
representation (use `Option`), add snapshot tests, etc.

This is about 100x faster than using a regex for cases that match (2.5ns
vs. 250ns). It's obviously not a hot path, but I prefer the consistency
with other similar comment-parsing. I may DRY these up into some common
functionality later on.
  • Loading branch information
charliermarsh authored Jul 11, 2023
1 parent 30bec3f commit 511ec0d
Show file tree
Hide file tree
Showing 10 changed files with 161 additions and 128 deletions.
50 changes: 26 additions & 24 deletions crates/ruff/src/checkers/physical_lines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use ruff_python_whitespace::UniversalNewlines;

use crate::registry::Rule;
use crate::rules::flake8_copyright::rules::missing_copyright_notice;
use crate::rules::flake8_executable::helpers::{extract_shebang, ShebangDirective};
use crate::rules::flake8_executable::helpers::ShebangDirective;
use crate::rules::flake8_executable::rules::{
shebang_missing, shebang_newline, shebang_not_executable, shebang_python, shebang_whitespace,
};
Expand Down Expand Up @@ -87,33 +87,35 @@ pub(crate) fn check_physical_lines(
|| enforce_shebang_newline
|| enforce_shebang_python
{
let shebang = extract_shebang(&line);
if enforce_shebang_not_executable {
if let Some(diagnostic) = shebang_not_executable(path, line.range(), &shebang) {
diagnostics.push(diagnostic);
if let Some(shebang) = ShebangDirective::try_extract(&line) {
has_any_shebang = true;
if enforce_shebang_not_executable {
if let Some(diagnostic) =
shebang_not_executable(path, line.range(), &shebang)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_missing {
if !has_any_shebang && matches!(shebang, ShebangDirective::Match(..)) {
has_any_shebang = true;
if enforce_shebang_whitespace {
if let Some(diagnostic) =
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_whitespace {
if let Some(diagnostic) =
shebang_whitespace(line.range(), &shebang, fix_shebang_whitespace)
{
diagnostics.push(diagnostic);
if enforce_shebang_newline {
if let Some(diagnostic) =
shebang_newline(line.range(), &shebang, index == 0)
{
diagnostics.push(diagnostic);
}
}
}
if enforce_shebang_newline {
if let Some(diagnostic) = shebang_newline(line.range(), &shebang, index == 0) {
diagnostics.push(diagnostic);
}
}
if enforce_shebang_python {
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
diagnostics.push(diagnostic);
if enforce_shebang_python {
if let Some(diagnostic) = shebang_python(line.range(), &shebang) {
diagnostics.push(diagnostic);
}
}
} else {
}
}
}
Expand Down
120 changes: 62 additions & 58 deletions crates/ruff/src/rules/flake8_executable/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,84 +5,88 @@ use std::path::Path;

#[cfg(target_family = "unix")]
use anyhow::Result;
use once_cell::sync::Lazy;
use regex::Regex;
use ruff_text_size::{TextLen, TextSize};

static SHEBANG_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?P<spaces>\s*)#!(?P<directive>.*)").unwrap());

/// A shebang directive (e.g., `#!/usr/bin/env python3`).
#[derive(Debug, PartialEq, Eq)]
pub(crate) enum ShebangDirective<'a> {
None,
// whitespace length, start of the shebang, contents
Match(TextSize, TextSize, &'a str),
pub(crate) struct ShebangDirective<'a> {
/// The offset of the directive contents (e.g., `/usr/bin/env python3`) from the start of the
/// line.
pub(crate) offset: TextSize,
/// The contents of the directive (e.g., `"/usr/bin/env python3"`).
pub(crate) contents: &'a str,
}

pub(crate) fn extract_shebang(line: &str) -> ShebangDirective {
// Minor optimization to avoid matches in the common case.
if !line.contains('!') {
return ShebangDirective::None;
impl<'a> ShebangDirective<'a> {
/// Parse a shebang directive from a line, or return `None` if the line does not contain a
/// shebang directive.
pub(crate) fn try_extract(line: &'a str) -> Option<Self> {
// Trim whitespace.
let directive = Self::lex_whitespace(line);

// Trim the `#!` prefix.
let directive = Self::lex_char(directive, '#')?;
let directive = Self::lex_char(directive, '!')?;

Some(Self {
offset: line.text_len() - directive.text_len(),
contents: directive,
})
}
match SHEBANG_REGEX.captures(line) {
Some(caps) => match caps.name("spaces") {
Some(spaces) => match caps.name("directive") {
Some(matches) => ShebangDirective::Match(
spaces.as_str().text_len(),
TextSize::try_from(matches.start()).unwrap(),
matches.as_str(),
),
None => ShebangDirective::None,
},
None => ShebangDirective::None,
},
None => ShebangDirective::None,

/// Lex optional leading whitespace.
#[inline]
fn lex_whitespace(line: &str) -> &str {
line.trim_start()
}

/// Lex a specific character, or return `None` if the character is not the first character in
/// the line.
#[inline]
fn lex_char(line: &str, c: char) -> Option<&str> {
let mut chars = line.chars();
if chars.next() == Some(c) {
Some(chars.as_str())
} else {
None
}
}
}

#[cfg(target_family = "unix")]
pub(crate) fn is_executable(filepath: &Path) -> Result<bool> {
{
let metadata = filepath.metadata()?;
let permissions = metadata.permissions();
Ok(permissions.mode() & 0o111 != 0)
}
pub(super) fn is_executable(filepath: &Path) -> Result<bool> {
let metadata = filepath.metadata()?;
let permissions = metadata.permissions();
Ok(permissions.mode() & 0o111 != 0)
}

#[cfg(test)]
mod tests {
use ruff_text_size::TextSize;
use insta::assert_debug_snapshot;

use crate::rules::flake8_executable::helpers::ShebangDirective;

use crate::rules::flake8_executable::helpers::{
extract_shebang, ShebangDirective, SHEBANG_REGEX,
};
#[test]
fn shebang_non_match() {
let source = "not a match";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

#[test]
fn shebang_regex() {
// Positive cases
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/python"));
assert!(SHEBANG_REGEX.is_match("#!/usr/bin/env python"));
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
assert!(SHEBANG_REGEX.is_match(" #!/usr/bin/env python"));
fn shebang_end_of_line() {
let source = "print('test') #!/usr/bin/python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

// Negative cases
assert!(!SHEBANG_REGEX.is_match("hello world"));
#[test]
fn shebang_match() {
let source = "#!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}

#[test]
fn shebang_extract_match() {
assert_eq!(extract_shebang("not a match"), ShebangDirective::None);
assert_eq!(
extract_shebang("#!/usr/bin/env python"),
ShebangDirective::Match(TextSize::from(0), TextSize::from(2), "/usr/bin/env python")
);
assert_eq!(
extract_shebang(" #!/usr/bin/env python"),
ShebangDirective::Match(TextSize::from(2), TextSize::from(4), "/usr/bin/env python")
);
assert_eq!(
extract_shebang("print('test') #!/usr/bin/python"),
ShebangDirective::None
);
fn shebang_leading_space() {
let source = " #!/usr/bin/env python";
assert_debug_snapshot!(ShebangDirective::try_extract(source));
}
}
19 changes: 8 additions & 11 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_newline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,14 @@ pub(crate) fn shebang_newline(
shebang: &ShebangDirective,
first_line: bool,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if first_line {
None
} else {
let diagnostic = Diagnostic::new(
ShebangNotFirstLine,
TextRange::at(range.start() + start, content.text_len()),
);
Some(diagnostic)
}
} else {
let ShebangDirective { offset, contents } = shebang;

if first_line {
None
} else {
Some(Diagnostic::new(
ShebangNotFirstLine,
TextRange::at(range.start() + offset, contents.text_len()),
))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,16 @@ pub(crate) fn shebang_not_executable(
range: TextRange,
shebang: &ShebangDirective,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if let Ok(false) = is_executable(filepath) {
let diagnostic = Diagnostic::new(
ShebangNotExecutable,
TextRange::at(range.start() + start, content.text_len()),
);
return Some(diagnostic);
}
let ShebangDirective { offset, contents } = shebang;

if let Ok(false) = is_executable(filepath) {
let diagnostic = Diagnostic::new(
ShebangNotExecutable,
TextRange::at(range.start() + offset, contents.text_len()),
);
return Some(diagnostic);
}

None
}

Expand Down
19 changes: 7 additions & 12 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,14 @@ impl Violation for ShebangMissingPython {

/// EXE003
pub(crate) fn shebang_python(range: TextRange, shebang: &ShebangDirective) -> Option<Diagnostic> {
if let ShebangDirective::Match(_, start, content) = shebang {
if content.contains("python") || content.contains("pytest") {
None
} else {
let diagnostic = Diagnostic::new(
ShebangMissingPython,
TextRange::at(range.start() + start, content.text_len())
.sub_start(TextSize::from(2)),
);
let ShebangDirective { offset, contents } = shebang;

Some(diagnostic)
}
} else {
if contents.contains("python") || contents.contains("pytest") {
None
} else {
Some(Diagnostic::new(
ShebangMissingPython,
TextRange::at(range.start() + offset, contents.text_len()).sub_start(TextSize::from(2)),
))
}
}
34 changes: 19 additions & 15 deletions crates/ruff/src/rules/flake8_executable/rules/shebang_whitespace.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use ruff_text_size::{TextRange, TextSize};
use std::ops::Sub;

use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
Expand Down Expand Up @@ -49,22 +50,25 @@ pub(crate) fn shebang_whitespace(
shebang: &ShebangDirective,
autofix: bool,
) -> Option<Diagnostic> {
if let ShebangDirective::Match(n_spaces, start, ..) = shebang {
if *n_spaces > TextSize::from(0) && *start == n_spaces + TextSize::from(2) {
let mut diagnostic = Diagnostic::new(
ShebangLeadingWhitespace,
TextRange::at(range.start(), *n_spaces),
);
if autofix {
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
range.start(),
*n_spaces,
))));
}
Some(diagnostic)
} else {
None
let ShebangDirective {
offset,
contents: _,
} = shebang;

if *offset > TextSize::from(2) {
let leading_space_start = range.start();
let leading_space_len = offset.sub(TextSize::new(2));
let mut diagnostic = Diagnostic::new(
ShebangLeadingWhitespace,
TextRange::at(leading_space_start, leading_space_len),
);
if autofix {
diagnostic.set_fix(Fix::automatic(Edit::range_deletion(TextRange::at(
leading_space_start,
leading_space_len,
))));
}
Some(diagnostic)
} else {
None
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
None
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(
ShebangDirective {
offset: 4,
contents: "/usr/bin/env python",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
Some(
ShebangDirective {
offset: 2,
contents: "/usr/bin/env python",
},
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
source: crates/ruff/src/rules/flake8_executable/helpers.rs
expression: "ShebangDirective::try_extract(source)"
---
None

0 comments on commit 511ec0d

Please sign in to comment.