From 7217c367782f733f4621198da794a6448e2db5fc Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Mon, 10 Jun 2024 13:14:21 +0200 Subject: [PATCH] Add parsed_module query --- Cargo.lock | 2 + crates/ruff_db/Cargo.toml | 2 + crates/ruff_db/src/file_system.rs | 44 ++++++++++--- crates/ruff_db/src/lib.rs | 4 +- crates/ruff_db/src/parsed.rs | 99 ++++++++++++++++++++++++++++ crates/ruff_db/src/source.rs | 15 +++-- crates/ruff_python_ast/src/lib.rs | 25 +++++-- crates/ruff_python_parser/src/lib.rs | 4 +- 8 files changed, 172 insertions(+), 23 deletions(-) create mode 100644 crates/ruff_db/src/parsed.rs diff --git a/Cargo.lock b/Cargo.lock index 043b3d548bc4ec..2d7cb32f9391ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2029,6 +2029,8 @@ dependencies = [ "countme", "dashmap", "filetime", + "ruff_python_ast", + "ruff_python_parser", "ruff_source_file", "ruff_text_size", "rustc-hash", diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 122e536c2b3c10..1e4da4b5dff1c5 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -11,6 +11,8 @@ repository = { workspace = true } license = { workspace = true } [dependencies] +ruff_python_ast = { workspace = true } +ruff_python_parser = { workspace = true } ruff_source_file = { workspace = true } ruff_text_size = { workspace = true } diff --git a/crates/ruff_db/src/file_system.rs b/crates/ruff_db/src/file_system.rs index adcfb41378ecd5..b66d2260f7a10b 100644 --- a/crates/ruff_db/src/file_system.rs +++ b/crates/ruff_db/src/file_system.rs @@ -46,6 +46,31 @@ impl FileSystemPath { unsafe { &*(path as *const Utf8Path as *const FileSystemPath) } } + /// Extracts the file extension, if possible. + /// + /// The extension is: + /// + /// * [`None`], if there is no file name; + /// * [`None`], if there is no embedded `.`; + /// * [`None`], if the file name begins with `.` and has no other `.`s within; + /// * Otherwise, the portion of the file name after the final `.` + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap()); + /// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap()); + /// ``` + /// + /// See [`Path::extension`] for more details. + #[inline] + #[must_use] + pub fn extension(&self) -> Option<&str> { + self.0.extension() + } + /// Converts the path to an owned [`FileSystemPathBuf`]. pub fn to_path_buf(&self) -> FileSystemPathBuf { FileSystemPathBuf(self.0.to_path_buf()) @@ -93,6 +118,14 @@ impl AsRef for FileSystemPathBuf { } } +impl Deref for FileSystemPathBuf { + type Target = FileSystemPath; + + fn deref(&self) -> &Self::Target { + self.as_path() + } +} + impl AsRef for FileSystemPath { #[inline] fn as_ref(&self) -> &FileSystemPath { @@ -121,14 +154,6 @@ impl AsRef for FileSystemPath { } } -impl Deref for FileSystemPathBuf { - type Target = FileSystemPath; - - fn deref(&self) -> &Self::Target { - self.as_path() - } -} - impl std::fmt::Debug for FileSystemPath { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) @@ -246,9 +271,10 @@ impl FileType { #[cfg(test)] mod tests { - use crate::file_system::FileRevision; use filetime::FileTime; + use crate::file_system::FileRevision; + #[test] fn revision_from_file_time() { let file_time = FileTime::now(); diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs index 8698d5b4551110..19594b441db36a 100644 --- a/crates/ruff_db/src/lib.rs +++ b/crates/ruff_db/src/lib.rs @@ -4,17 +4,19 @@ use rustc_hash::FxHasher; use salsa::DbWithJar; use crate::file_system::{FileSystem, FileSystemPath}; +use crate::parsed::parsed_module; use crate::source::{line_index, source_text}; use crate::vfs::{VendoredPath, Vfs, VfsFile}; pub mod file_system; +pub mod parsed; pub mod source; pub mod vfs; pub(crate) type FxDashMap = dashmap::DashMap>; #[salsa::jar(db=Db)] -pub struct Jar(VfsFile, source_text, line_index); +pub struct Jar(VfsFile, source_text, line_index, parsed_module); /// Database (or cupboard) that gives access to the virtual filesystem, source code, and parsed AST. pub trait Db: DbWithJar { diff --git a/crates/ruff_db/src/parsed.rs b/crates/ruff_db/src/parsed.rs new file mode 100644 index 00000000000000..25e89dd47ba458 --- /dev/null +++ b/crates/ruff_db/src/parsed.rs @@ -0,0 +1,99 @@ +use std::fmt::Formatter; +use std::ops::Deref; +use std::sync::Arc; + +use ruff_python_ast::{ModModule, PySourceType}; +use ruff_python_parser::{parse_unchecked_source, Parsed}; + +use crate::source::source_text; +use crate::vfs::{VfsFile, VfsPath}; +use crate::Db; + +/// Returns the parsed AST of `file`, including its token stream. +/// +/// The query uses Ruff's error resilient parser. That means that the parser always succeeds to produce a +/// AST even if the file contains syntax errors. The syntax errors are Parsing the module succeeds even when the file contains syntax error. The parse errors +/// are then accessible through [`Parsed::errors`]. +/// +/// The parse tree is cached between invocations, but the query doesn't make use of Salsa's optimization +/// that skips dependent queries if the AST hasn't changed. Comparing two ASTs is a non-trivial operation +/// and every offset change is directly reflected in the changed AST offsets. Ruff's AST also doesn't implement `Eq`. +/// which is required to use the optimization. +#[salsa::tracked(return_ref, no_eq)] +pub fn parsed_module(db: &dyn Db, file: VfsFile) -> Parsed { + let source = source_text(db, file); + let path = file.path(db); + + let ty = match path { + VfsPath::FileSystem(path) => path + .extension() + .map_or(PySourceType::Python, PySourceType::from_extension), + VfsPath::Vendored(_) => PySourceType::Stub, + }; + + parse_unchecked_source(&source, ty) +} + +/// Cheap cloneable wrapper around the parsed module. +#[derive(Clone, PartialEq)] +pub struct ParsedModule { + inner: Arc>, +} + +impl ParsedModule { + /// Consumes `self` and returns the Arc storing the parsed module. + pub fn into_arc(self) -> Arc> { + self.inner + } +} + +impl Deref for ParsedModule { + type Target = Parsed; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl std::fmt::Debug for ParsedModule { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ParsedModule").field(&self.inner).finish() + } +} + +#[cfg(test)] +mod tests { + use crate::file_system::FileSystemPath; + use crate::parsed::parsed_module; + use crate::tests::TestDb; + use crate::Db; + + #[test] + fn python_file() { + let mut db = TestDb::new(); + let path = FileSystemPath::new("test.py"); + + db.file_system_mut().write_file(path, "x = 10".to_string()); + + let file = db.file(path); + + let parsed = parsed_module(&db, file); + + assert!(parsed.is_valid()); + } + + #[test] + fn python_ipynb_file() { + let mut db = TestDb::new(); + let path = FileSystemPath::new("test.ipynb"); + + db.file_system_mut() + .write_file(path, "%timeit a = b".to_string()); + + let file = db.file(path); + + let parsed = parsed_module(&db, file); + + assert!(parsed.is_valid()); + } +} diff --git a/crates/ruff_db/src/source.rs b/crates/ruff_db/src/source.rs index 220f353da1d1d7..8c7f10a3f91d34 100644 --- a/crates/ruff_db/src/source.rs +++ b/crates/ruff_db/src/source.rs @@ -1,7 +1,8 @@ -use ruff_source_file::LineIndex; use std::ops::Deref; use std::sync::Arc; +use ruff_source_file::LineIndex; + use crate::vfs::VfsFile; use crate::Db; @@ -24,6 +25,8 @@ pub fn line_index(db: &dyn Db, file: VfsFile) -> LineIndex { } /// The source text of a [`VfsFile`](crate::File) +/// +/// Cheap cloneable in `O(1)`. #[derive(Clone, Eq, PartialEq)] pub struct SourceText { inner: Arc, @@ -51,14 +54,16 @@ impl std::fmt::Debug for SourceText { #[cfg(test)] mod tests { + use filetime::FileTime; + use salsa::EventKind; + + use ruff_source_file::OneIndexed; + use ruff_text_size::TextSize; + use crate::file_system::FileSystemPath; use crate::source::{line_index, source_text}; use crate::tests::TestDb; use crate::Db; - use filetime::FileTime; - use ruff_source_file::OneIndexed; - use ruff_text_size::TextSize; - use salsa::EventKind; #[test] fn re_runs_query_when_file_revision_changes() { diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 1805be77b94276..47491ebd3bec35 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::path::Path; pub use expression::*; @@ -80,13 +81,25 @@ pub enum PySourceType { Ipynb, } +impl PySourceType { + /// Infers the source type from the file extension. + /// + /// Falls back to `Python` if the extension is not recognized. + pub fn from_extension(extension: &str) -> Self { + match extension { + "py" => Self::Python, + "pyi" => Self::Stub, + "ipynb" => Self::Ipynb, + _ => Self::Python, + } + } +} + impl> From

for PySourceType { fn from(path: P) -> Self { - match path.as_ref().extension() { - Some(ext) if ext == "py" => PySourceType::Python, - Some(ext) if ext == "pyi" => PySourceType::Stub, - Some(ext) if ext == "ipynb" => PySourceType::Ipynb, - _ => PySourceType::Python, - } + path.as_ref() + .extension() + .and_then(OsStr::to_str) + .map_or(Self::Python, Self::from_extension) } } diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 5ee12ab6ae4352..17af39f96a031e 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed } /// Represents the parsed source code. -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct Parsed { syntax: T, tokens: Tokens, @@ -361,7 +361,7 @@ impl Parsed { } /// Tokens represents a vector of lexed [`Token`]. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Tokens { raw: Vec,