Skip to content

Commit

Permalink
Add parsed_module query
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaReiser committed Jun 10, 2024
1 parent 5ae7f09 commit 7217c36
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 23 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions crates/ruff_db/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ repository = { workspace = true }
license = { workspace = true }

[dependencies]
ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_source_file = { workspace = true }
ruff_text_size = { workspace = true }

Expand Down
44 changes: 35 additions & 9 deletions crates/ruff_db/src/file_system.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,31 @@ impl FileSystemPath {
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
}

/// Extracts the file extension, if possible.
///
/// The extension is:
///
/// * [`None`], if there is no file name;
/// * [`None`], if there is no embedded `.`;
/// * [`None`], if the file name begins with `.` and has no other `.`s within;
/// * Otherwise, the portion of the file name after the final `.`
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap());
/// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap());
/// ```
///
/// See [`Path::extension`] for more details.
#[inline]
#[must_use]
pub fn extension(&self) -> Option<&str> {
self.0.extension()
}

/// Converts the path to an owned [`FileSystemPathBuf`].
pub fn to_path_buf(&self) -> FileSystemPathBuf {
FileSystemPathBuf(self.0.to_path_buf())
Expand Down Expand Up @@ -93,6 +118,14 @@ impl AsRef<FileSystemPath> for FileSystemPathBuf {
}
}

impl Deref for FileSystemPathBuf {
type Target = FileSystemPath;

fn deref(&self) -> &Self::Target {
self.as_path()
}
}

impl AsRef<FileSystemPath> for FileSystemPath {
#[inline]
fn as_ref(&self) -> &FileSystemPath {
Expand Down Expand Up @@ -121,14 +154,6 @@ impl AsRef<Path> for FileSystemPath {
}
}

impl Deref for FileSystemPathBuf {
type Target = FileSystemPath;

fn deref(&self) -> &Self::Target {
self.as_path()
}
}

impl std::fmt::Debug for FileSystemPath {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
Expand Down Expand Up @@ -246,9 +271,10 @@ impl FileType {

#[cfg(test)]
mod tests {
use crate::file_system::FileRevision;
use filetime::FileTime;

use crate::file_system::FileRevision;

#[test]
fn revision_from_file_time() {
let file_time = FileTime::now();
Expand Down
4 changes: 3 additions & 1 deletion crates/ruff_db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@ use rustc_hash::FxHasher;
use salsa::DbWithJar;

use crate::file_system::{FileSystem, FileSystemPath};
use crate::parsed::parsed_module;
use crate::source::{line_index, source_text};
use crate::vfs::{VendoredPath, Vfs, VfsFile};

pub mod file_system;
pub mod parsed;
pub mod source;
pub mod vfs;

pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;

#[salsa::jar(db=Db)]
pub struct Jar(VfsFile, source_text, line_index);
pub struct Jar(VfsFile, source_text, line_index, parsed_module);

/// Database (or cupboard) that gives access to the virtual filesystem, source code, and parsed AST.
pub trait Db: DbWithJar<Jar> {
Expand Down
99 changes: 99 additions & 0 deletions crates/ruff_db/src/parsed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;

use ruff_python_ast::{ModModule, PySourceType};
use ruff_python_parser::{parse_unchecked_source, Parsed};

use crate::source::source_text;
use crate::vfs::{VfsFile, VfsPath};
use crate::Db;

/// Returns the parsed AST of `file`, including its token stream.
///
/// The query uses Ruff's error resilient parser. That means that the parser always succeeds to produce a
/// AST even if the file contains syntax errors. The syntax errors are Parsing the module succeeds even when the file contains syntax error. The parse errors
/// are then accessible through [`Parsed::errors`].
///
/// The parse tree is cached between invocations, but the query doesn't make use of Salsa's optimization
/// that skips dependent queries if the AST hasn't changed. Comparing two ASTs is a non-trivial operation
/// and every offset change is directly reflected in the changed AST offsets. Ruff's AST also doesn't implement `Eq`.
/// which is required to use the optimization.
#[salsa::tracked(return_ref, no_eq)]
pub fn parsed_module(db: &dyn Db, file: VfsFile) -> Parsed<ModModule> {
let source = source_text(db, file);
let path = file.path(db);

let ty = match path {
VfsPath::FileSystem(path) => path
.extension()
.map_or(PySourceType::Python, PySourceType::from_extension),
VfsPath::Vendored(_) => PySourceType::Stub,
};

parse_unchecked_source(&source, ty)
}

/// Cheap cloneable wrapper around the parsed module.
#[derive(Clone, PartialEq)]
pub struct ParsedModule {
inner: Arc<Parsed<ModModule>>,
}

impl ParsedModule {
/// Consumes `self` and returns the Arc storing the parsed module.
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
self.inner
}
}

impl Deref for ParsedModule {
type Target = Parsed<ModModule>;

fn deref(&self) -> &Self::Target {
&self.inner
}
}

impl std::fmt::Debug for ParsedModule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ParsedModule").field(&self.inner).finish()
}
}

#[cfg(test)]
mod tests {
use crate::file_system::FileSystemPath;
use crate::parsed::parsed_module;
use crate::tests::TestDb;
use crate::Db;

#[test]
fn python_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");

db.file_system_mut().write_file(path, "x = 10".to_string());

let file = db.file(path);

let parsed = parsed_module(&db, file);

assert!(parsed.is_valid());
}

#[test]
fn python_ipynb_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.ipynb");

db.file_system_mut()
.write_file(path, "%timeit a = b".to_string());

let file = db.file(path);

let parsed = parsed_module(&db, file);

assert!(parsed.is_valid());
}
}
15 changes: 10 additions & 5 deletions crates/ruff_db/src/source.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use ruff_source_file::LineIndex;
use std::ops::Deref;
use std::sync::Arc;

use ruff_source_file::LineIndex;

use crate::vfs::VfsFile;
use crate::Db;

Expand All @@ -24,6 +25,8 @@ pub fn line_index(db: &dyn Db, file: VfsFile) -> LineIndex {
}

/// The source text of a [`VfsFile`](crate::File)
///
/// Cheap cloneable in `O(1)`.
#[derive(Clone, Eq, PartialEq)]
pub struct SourceText {
inner: Arc<str>,
Expand Down Expand Up @@ -51,14 +54,16 @@ impl std::fmt::Debug for SourceText {

#[cfg(test)]
mod tests {
use filetime::FileTime;
use salsa::EventKind;

use ruff_source_file::OneIndexed;
use ruff_text_size::TextSize;

use crate::file_system::FileSystemPath;
use crate::source::{line_index, source_text};
use crate::tests::TestDb;
use crate::Db;
use filetime::FileTime;
use ruff_source_file::OneIndexed;
use ruff_text_size::TextSize;
use salsa::EventKind;

#[test]
fn re_runs_query_when_file_revision_changes() {
Expand Down
25 changes: 19 additions & 6 deletions crates/ruff_python_ast/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::Path;

pub use expression::*;
Expand Down Expand Up @@ -80,13 +81,25 @@ pub enum PySourceType {
Ipynb,
}

impl PySourceType {
/// Infers the source type from the file extension.
///
/// Falls back to `Python` if the extension is not recognized.
pub fn from_extension(extension: &str) -> Self {
match extension {
"py" => Self::Python,
"pyi" => Self::Stub,
"ipynb" => Self::Ipynb,
_ => Self::Python,
}
}
}

impl<P: AsRef<Path>> From<P> for PySourceType {
fn from(path: P) -> Self {
match path.as_ref().extension() {
Some(ext) if ext == "py" => PySourceType::Python,
Some(ext) if ext == "pyi" => PySourceType::Stub,
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
_ => PySourceType::Python,
}
path.as_ref()
.extension()
.and_then(OsStr::to_str)
.map_or(Self::Python, Self::from_extension)
}
}
4 changes: 2 additions & 2 deletions crates/ruff_python_parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
}

/// Represents the parsed source code.
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Clone)]
pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
Expand Down Expand Up @@ -361,7 +361,7 @@ impl Parsed<ModExpression> {
}

/// Tokens represents a vector of lexed [`Token`].
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Tokens {
raw: Vec<Token>,

Expand Down

0 comments on commit 7217c36

Please sign in to comment.