diff --git a/Cargo.lock b/Cargo.lock index bf3a5c1c61233..18699b6ddeabf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2381,6 +2381,7 @@ dependencies = [ "itertools 0.11.0", "lalrpop", "lalrpop-util", + "memchr", "ruff_python_ast", "ruff_text_size", "rustc-hash", diff --git a/crates/ruff_python_parser/Cargo.toml b/crates/ruff_python_parser/Cargo.toml index a5d4208623392..46a97eabd43c5 100644 --- a/crates/ruff_python_parser/Cargo.toml +++ b/crates/ruff_python_parser/Cargo.toml @@ -22,6 +22,7 @@ bitflags = { workspace = true } is-macro = { workspace = true } itertools = { workspace = true } lalrpop-util = { version = "0.20.0", default-features = false } +memchr = { workspace = true } unicode-ident = { workspace = true } unicode_names2 = { workspace = true } rustc-hash = { workspace = true } diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 448a3e7b34681..b4f3436d5aeab 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -407,7 +407,9 @@ impl<'source> Lexer<'source> { #[cfg(debug_assertions)] debug_assert_eq!(self.cursor.previous(), '#'); - self.cursor.eat_while(|c| !matches!(c, '\n' | '\r')); + let bytes = self.cursor.rest().as_bytes(); + let offset = memchr::memchr2(b'\n', b'\r', bytes).unwrap_or(bytes.len()); + self.cursor.skip_bytes(offset); Tok::Comment(self.token_text().to_string()) } diff --git a/crates/ruff_python_parser/src/lexer/cursor.rs b/crates/ruff_python_parser/src/lexer/cursor.rs index c026c88e9b7fb..91c7d30c53b05 100644 --- a/crates/ruff_python_parser/src/lexer/cursor.rs +++ b/crates/ruff_python_parser/src/lexer/cursor.rs @@ -127,4 +127,21 @@ impl<'a> Cursor<'a> { self.bump(); } } + + /// Skips the next `count` bytes. + /// + /// ## Panics + /// - If `count` is larger than the remaining bytes in the input stream. + /// - If `count` indexes into a multi-byte character. + pub(super) fn skip_bytes(&mut self, count: usize) { + #[cfg(debug_assertions)] + { + self.prev_char = self.chars.as_str()[..count] + .chars() + .next_back() + .unwrap_or('\0'); + } + + self.chars = self.chars.as_str()[count..].chars(); + } }