From d4dd96d1f4a78d75e598c9cdfb778c88c21742d7 Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Thu, 13 Jun 2024 08:37:02 +0100 Subject: [PATCH] red-knot: `source_text`, `line_index`, and `parsed_module` queries (#11822) --- Cargo.lock | 4 + crates/ruff_db/Cargo.toml | 5 + crates/ruff_db/src/file_system.rs | 28 ++++- crates/ruff_db/src/lib.rs | 6 +- crates/ruff_db/src/parsed.rs | 126 ++++++++++++++++++++++ crates/ruff_db/src/source.rs | 5 +- crates/ruff_python_ast/src/lib.rs | 25 +++-- crates/ruff_python_parser/src/lib.rs | 4 +- crates/ruff_source_file/src/line_index.rs | 5 +- 9 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 crates/ruff_db/src/parsed.rs diff --git a/Cargo.lock b/Cargo.lock index dda94078adc5e..4a26e9f54bd2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2029,6 +2029,10 @@ dependencies = [ "countme", "dashmap", "filetime", + "ruff_python_ast", + "ruff_python_parser", + "ruff_source_file", + "ruff_text_size", "rustc-hash", "salsa-2022", "tracing", diff --git a/crates/ruff_db/Cargo.toml b/crates/ruff_db/Cargo.toml index 06c4de96251d1..1e4da4b5dff1c 100644 --- a/crates/ruff_db/Cargo.toml +++ b/crates/ruff_db/Cargo.toml @@ -11,6 +11,11 @@ repository = { workspace = true } license = { workspace = true } [dependencies] +ruff_python_ast = { workspace = true } +ruff_python_parser = { workspace = true } +ruff_source_file = { workspace = true } +ruff_text_size = { workspace = true } + camino = { workspace = true } countme = { workspace = true } dashmap = { workspace = true } diff --git a/crates/ruff_db/src/file_system.rs b/crates/ruff_db/src/file_system.rs index 44ab9218916d1..06d7dd2a74717 100644 --- a/crates/ruff_db/src/file_system.rs +++ b/crates/ruff_db/src/file_system.rs @@ -48,6 +48,31 @@ impl FileSystemPath { unsafe { &*(path as *const Utf8Path as *const FileSystemPath) } } + /// Extracts the file extension, if possible. + /// + /// The extension is: + /// + /// * [`None`], if there is no file name; + /// * [`None`], if there is no embedded `.`; + /// * [`None`], if the file name begins with `.` and has no other `.`s within; + /// * Otherwise, the portion of the file name after the final `.` + /// + /// # Examples + /// + /// ``` + /// use ruff_db::file_system::FileSystemPath; + /// + /// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap()); + /// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap()); + /// ``` + /// + /// See [`Path::extension`] for more details. + #[inline] + #[must_use] + pub fn extension(&self) -> Option<&str> { + self.0.extension() + } + /// Converts the path to an owned [`FileSystemPathBuf`]. pub fn to_path_buf(&self) -> FileSystemPathBuf { FileSystemPathBuf(self.0.to_path_buf()) @@ -251,9 +276,10 @@ impl FileType { #[cfg(test)] mod tests { - use crate::file_system::FileRevision; use filetime::FileTime; + use crate::file_system::FileRevision; + #[test] fn revision_from_file_time() { let file_time = FileTime::now(); diff --git a/crates/ruff_db/src/lib.rs b/crates/ruff_db/src/lib.rs index 503fd50c1befd..22ab4aeb9bdb7 100644 --- a/crates/ruff_db/src/lib.rs +++ b/crates/ruff_db/src/lib.rs @@ -4,15 +4,19 @@ use rustc_hash::FxHasher; use salsa::DbWithJar; use crate::file_system::{FileSystem, FileSystemPath}; +use crate::parsed::parsed_module; +use crate::source::{line_index, source_text}; use crate::vfs::{VendoredPath, Vfs, VfsFile}; pub mod file_system; +pub mod parsed; +pub mod source; pub mod vfs; pub(crate) type FxDashMap = dashmap::DashMap>; #[salsa::jar(db=Db)] -pub struct Jar(VfsFile); +pub struct Jar(VfsFile, source_text, line_index, parsed_module); /// Database that gives access to the virtual filesystem, source code, and parsed AST. pub trait Db: DbWithJar { diff --git a/crates/ruff_db/src/parsed.rs b/crates/ruff_db/src/parsed.rs new file mode 100644 index 0000000000000..c4b3294c371d8 --- /dev/null +++ b/crates/ruff_db/src/parsed.rs @@ -0,0 +1,126 @@ +use std::fmt::Formatter; +use std::ops::Deref; +use std::sync::Arc; + +use ruff_python_ast::{ModModule, PySourceType}; +use ruff_python_parser::{parse_unchecked_source, Parsed}; + +use crate::source::source_text; +use crate::vfs::{VfsFile, VfsPath}; +use crate::Db; + +/// Returns the parsed AST of `file`, including its token stream. +/// +/// The query uses Ruff's error-resilient parser. That means that the parser always succeeds to produce a +/// AST even if the file contains syntax errors. The parse errors +/// are then accessible through [`Parsed::errors`]. +/// +/// The query is only cached when the [`source_text()`] hasn't changed. This is because +/// comparing two ASTs is a non-trivial operation and every offset change is directly +/// reflected in the changed AST offsets. +/// The other reason is that Ruff's AST doesn't implement `Eq` which Sala requires +/// for determining if a query result is unchanged. +#[salsa::tracked(return_ref, no_eq)] +pub fn parsed_module(db: &dyn Db, file: VfsFile) -> ParsedModule { + let source = source_text(db, file); + let path = file.path(db); + + let ty = match path { + VfsPath::FileSystem(path) => path + .extension() + .map_or(PySourceType::Python, PySourceType::from_extension), + VfsPath::Vendored(_) => PySourceType::Stub, + }; + + ParsedModule { + inner: Arc::new(parse_unchecked_source(&source, ty)), + } +} + +/// Cheap cloneable wrapper around the parsed module. +#[derive(Clone, PartialEq)] +pub struct ParsedModule { + inner: Arc>, +} + +impl ParsedModule { + /// Consumes `self` and returns the Arc storing the parsed module. + pub fn into_arc(self) -> Arc> { + self.inner + } +} + +impl Deref for ParsedModule { + type Target = Parsed; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl std::fmt::Debug for ParsedModule { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("ParsedModule").field(&self.inner).finish() + } +} + +#[cfg(test)] +mod tests { + use crate::file_system::FileSystemPath; + use crate::parsed::parsed_module; + use crate::tests::TestDb; + use crate::vfs::VendoredPath; + use crate::Db; + + #[test] + fn python_file() { + let mut db = TestDb::new(); + let path = FileSystemPath::new("test.py"); + + db.file_system_mut().write_file(path, "x = 10".to_string()); + + let file = db.file(path); + + let parsed = parsed_module(&db, file); + + assert!(parsed.is_valid()); + } + + #[test] + fn python_ipynb_file() { + let mut db = TestDb::new(); + let path = FileSystemPath::new("test.ipynb"); + + db.file_system_mut() + .write_file(path, "%timeit a = b".to_string()); + + let file = db.file(path); + + let parsed = parsed_module(&db, file); + + assert!(parsed.is_valid()); + } + + #[test] + fn vendored_file() { + let mut db = TestDb::new(); + db.vfs_mut().stub_vendored([( + "path.pyi", + r#" +import sys + +if sys.platform == "win32": + from ntpath import * + from ntpath import __all__ as __all__ +else: + from posixpath import * + from posixpath import __all__ as __all__"#, + )]); + + let file = db.vendored_file(VendoredPath::new("path.pyi")).unwrap(); + + let parsed = parsed_module(&db, file); + + assert!(parsed.is_valid()); + } +} diff --git a/crates/ruff_db/src/source.rs b/crates/ruff_db/src/source.rs index e7253c5bb7a60..af76d3c716f6a 100644 --- a/crates/ruff_db/src/source.rs +++ b/crates/ruff_db/src/source.rs @@ -96,11 +96,10 @@ mod tests { // Change the file permission only file.set_permissions(&mut db).to(Some(0o777)); - db.events().lock().unwrap().clear(); + db.clear_salsa_events(); assert_eq!(&*source_text(&db, file), "x = 10"); - let events = db.events(); - let events = events.lock().unwrap(); + let events = db.take_salsa_events(); assert!(!events .iter() diff --git a/crates/ruff_python_ast/src/lib.rs b/crates/ruff_python_ast/src/lib.rs index 1805be77b9427..47491ebd3bec3 100644 --- a/crates/ruff_python_ast/src/lib.rs +++ b/crates/ruff_python_ast/src/lib.rs @@ -1,3 +1,4 @@ +use std::ffi::OsStr; use std::path::Path; pub use expression::*; @@ -80,13 +81,25 @@ pub enum PySourceType { Ipynb, } +impl PySourceType { + /// Infers the source type from the file extension. + /// + /// Falls back to `Python` if the extension is not recognized. + pub fn from_extension(extension: &str) -> Self { + match extension { + "py" => Self::Python, + "pyi" => Self::Stub, + "ipynb" => Self::Ipynb, + _ => Self::Python, + } + } +} + impl> From

for PySourceType { fn from(path: P) -> Self { - match path.as_ref().extension() { - Some(ext) if ext == "py" => PySourceType::Python, - Some(ext) if ext == "pyi" => PySourceType::Stub, - Some(ext) if ext == "ipynb" => PySourceType::Ipynb, - _ => PySourceType::Python, - } + path.as_ref() + .extension() + .and_then(OsStr::to_str) + .map_or(Self::Python, Self::from_extension) } } diff --git a/crates/ruff_python_parser/src/lib.rs b/crates/ruff_python_parser/src/lib.rs index 5ee12ab6ae435..17af39f96a031 100644 --- a/crates/ruff_python_parser/src/lib.rs +++ b/crates/ruff_python_parser/src/lib.rs @@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed } /// Represents the parsed source code. -#[derive(Debug, Clone)] +#[derive(Debug, PartialEq, Clone)] pub struct Parsed { syntax: T, tokens: Tokens, @@ -361,7 +361,7 @@ impl Parsed { } /// Tokens represents a vector of lexed [`Token`]. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct Tokens { raw: Vec, diff --git a/crates/ruff_source_file/src/line_index.rs b/crates/ruff_source_file/src/line_index.rs index 7f9022fff4148..db4fc44211400 100644 --- a/crates/ruff_source_file/src/line_index.rs +++ b/crates/ruff_source_file/src/line_index.rs @@ -14,11 +14,12 @@ use crate::SourceLocation; /// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions. /// /// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count. -#[derive(Clone)] +#[derive(Clone, Eq, PartialEq)] pub struct LineIndex { inner: Arc, } +#[derive(Eq, PartialEq)] struct LineIndexInner { line_starts: Vec, kind: IndexKind, @@ -268,7 +269,7 @@ impl Debug for LineIndex { } } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] enum IndexKind { /// Optimized index for an ASCII only document Ascii,