Skip to content

Commit

Permalink
store::Snapshot holds a DigestTree rather than PathStats.
Browse files Browse the repository at this point in the history
[ci skip-build-wheels]
  • Loading branch information
stuhood committed Mar 1, 2022
1 parent a48f59d commit 4ddf601
Show file tree
Hide file tree
Showing 15 changed files with 442 additions and 359 deletions.
2 changes: 1 addition & 1 deletion src/python/pants/engine/fs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,7 @@ def test_snapshot_properties() -> None:
digest = Digest("a" * 64, 1000)
snapshot = Snapshot._unsafe_create(digest, ["f.ext", "dir/f.ext"], ["dir"])
assert snapshot.digest == digest
assert snapshot.files == ("f.ext", "dir/f.ext")
assert snapshot.files == ("dir/f.ext", "f.ext")
assert snapshot.dirs == ("dir",)


Expand Down
2 changes: 1 addition & 1 deletion src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion src/rust/engine/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,6 @@ crossbeam-channel = "0.5"
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34", features=["smallvec"] }
derivative = "2.2"
async-oncecell = "0.2"
either = "1.6"
fnv = "1.0.5"
fs = { path = "fs" }
futures = "0.3"
Expand Down
219 changes: 149 additions & 70 deletions src/rust/engine/fs/src/directory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::sync::Arc;
use deepsize::{known_deep_size, DeepSizeOf};
use internment::Intern;
use itertools::Itertools;
use lazy_static::lazy_static;

// TODO: Extract protobuf-specific pieces to a new crate.
use grpc_util::prost::MessageExt;
Expand All @@ -18,10 +19,13 @@ use protos::gen::build::bazel::remote::execution::v2 as remexec;

use crate::PathStat;

pub const EMPTY_DIRECTORY_DIGEST: DirectoryDigest = DirectoryDigest {
digest: EMPTY_DIGEST,
tree: None,
};
lazy_static! {
pub static ref EMPTY_DIGEST_TREE: DigestTrie = DigestTrie(vec![].into());
pub static ref EMPTY_DIRECTORY_DIGEST: DirectoryDigest = DirectoryDigest {
digest: EMPTY_DIGEST,
tree: Some(EMPTY_DIGEST_TREE.clone()),
};
}

/// A Digest for a directory, optionally with its content stored as a DigestTrie.
///
Expand Down Expand Up @@ -62,53 +66,22 @@ impl DirectoryDigest {
Self { digest, tree: None }
}

pub fn from_path_stats(
path_stats: Vec<PathStat>,
file_digests: &HashMap<PathBuf, Digest>,
) -> Result<Self, String> {
let path_stats = PathStat::normalize_path_stats(path_stats)?;
let digest_tree = DigestTrie::from_sorted_paths(
PathBuf::new(),
path_stats.iter().map(|p| p.into()).collect(),
file_digests,
);
Ok(Self {
digest: digest_tree.compute_root_digest(),
tree: Some(digest_tree),
})
}

/// Returns the digests reachable from this DirectoryDigest.
///
/// If this DirectoryDigest has been persisted to disk (i.e., does not have a DigestTrie) then
/// this will only include the root.
pub fn digests(&self) -> Vec<Digest> {
let tree = if let Some(tree) = &self.tree {
tree
if let Some(tree) = &self.tree {
let mut digests = tree.digests();
digests.push(self.digest);
digests
} else {
return vec![self.digest];
};

// Walk the tree and collect Digests.
let mut digests = Vec::new();
digests.push(self.digest);
let mut stack = tree.0.iter().collect::<Vec<_>>();
while let Some(entry) = stack.pop() {
match entry {
Entry::Directory(d) => {
digests.push(d.digest);
stack.extend(d.tree.0.iter());
}
Entry::File(f) => {
digests.push(f.digest);
}
}
vec![self.digest]
}
digests
}
}

#[derive(Copy, Clone, Eq, PartialEq)]
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
struct Name(Intern<String>);
known_deep_size!(0; Name);

Expand All @@ -120,14 +93,23 @@ impl Deref for Name {
}
}

#[derive(Clone, DeepSizeOf)]
enum Entry {
#[derive(DeepSizeOf)]
pub enum Entry {
Directory(Directory),
File(File),
}

#[derive(Clone, DeepSizeOf)]
struct Directory {
impl Entry {
fn name(&self) -> Name {
match self {
Entry::Directory(d) => d.name,
Entry::File(f) => f.name,
}
}
}

#[derive(DeepSizeOf)]
pub struct Directory {
name: Name,
digest: Digest,
tree: DigestTrie,
Expand All @@ -145,15 +127,37 @@ impl Directory {
tree,
}
}

pub fn as_directory(&self) -> remexec::Directory {
self.tree.as_directory()
}

pub fn as_directory_node(&self) -> remexec::DirectoryNode {
remexec::DirectoryNode {
name: self.name.as_ref().to_owned(),
digest: Some((&self.digest).into()),
}
}
}

#[derive(Clone, DeepSizeOf)]
struct File {
#[derive(DeepSizeOf)]
pub struct File {
name: Name,
digest: Digest,
is_executable: bool,
}

impl File {
pub fn as_file_node(&self) -> remexec::FileNode {
remexec::FileNode {
name: self.name.as_ref().to_owned(),
digest: Some(self.digest.into()),
is_executable: self.is_executable,
..remexec::FileNode::default()
}
}
}

// TODO: `PathStat` owns its path, which means it can't be used via recursive slicing. See
// whether these types can be merged.
enum TypedPath<'a> {
Expand Down Expand Up @@ -187,7 +191,22 @@ impl<'a> From<&'a PathStat> for TypedPath<'a> {
#[derive(Clone, DeepSizeOf)]
pub struct DigestTrie(Arc<[Entry]>);

// TODO: This avoids a `rustc` crasher (repro on 7f319ee84ad41bc0aea3cb01fb2f32dcd51be704).
unsafe impl Sync for DigestTrie {}

impl DigestTrie {
pub fn from_path_stats(
path_stats: Vec<PathStat>,
file_digests: &HashMap<PathBuf, Digest>,
) -> Result<Self, String> {
let path_stats = PathStat::normalize_path_stats(path_stats)?;
Ok(Self::from_sorted_paths(
PathBuf::new(),
path_stats.iter().map(|p| p.into()).collect(),
file_digests,
))
}

fn from_sorted_paths(
prefix: PathBuf,
paths: Vec<TypedPath>,
Expand All @@ -211,10 +230,7 @@ impl DigestTrie {
path,
is_executable,
} => {
let digest = file_digests
.get(prefix.join(path).as_path())
.unwrap()
.clone();
let digest = *file_digests.get(prefix.join(path).as_path()).unwrap();

entries.push(Entry::File(File {
name,
Expand Down Expand Up @@ -243,35 +259,98 @@ impl DigestTrie {
Self(entries.into())
}

fn compute_root_digest(&self) -> Digest {
pub fn as_directory(&self) -> remexec::Directory {
let mut files = Vec::new();
let mut directories = Vec::new();

for entry in &*self.0 {
match entry {
Entry::File(f) => files.push(f.as_file_node()),
Entry::Directory(d) => directories.push(d.as_directory_node()),
}
}

remexec::Directory {
directories,
files,
..remexec::Directory::default()
}
}

pub fn compute_root_digest(&self) -> Digest {
if self.0.is_empty() {
return EMPTY_DIGEST;
}

Digest::of_bytes(&self.as_directory().to_bytes())
}

pub fn entries(&self) -> &[Entry] {
&*self.0
}

/// Returns the digests reachable from this DigestTrie.
pub fn digests(&self) -> Vec<Digest> {
// Walk the tree and collect Digests.
let mut digests = Vec::new();
let mut stack = self.0.iter().collect::<Vec<_>>();
while let Some(entry) = stack.pop() {
match entry {
Entry::Directory(d) => {
digests.push(d.digest);
stack.extend(d.tree.0.iter());
}
Entry::File(f) => {
digests.push(f.digest);
}
}
}
digests
}

/// Return a pair of Vecs of the file paths and directory paths in this DigestTrie, each in
/// sorted order.
///
/// TODO: This should probably be implemented directly by consumers via `walk`, since they
/// can directly allocate the collections that they need.
pub fn files_and_directories(&self) -> (Vec<PathBuf>, Vec<PathBuf>) {
let mut files = Vec::new();
let mut directories = Vec::new();
for entry in &*self.0 {
self.walk(&mut |path, entry| {
match entry {
Entry::File(f) => files.push(remexec::FileNode {
name: f.name.as_ref().to_owned(),
digest: Some(f.digest.into()),
is_executable: f.is_executable,
..remexec::FileNode::default()
}),
Entry::Directory(d) => directories.push(remexec::DirectoryNode {
name: d.name.as_ref().to_owned(),
digest: Some((&d.digest).into()),
}),
Entry::File(_) => files.push(path.to_owned()),
Entry::Directory(d) if d.name.is_empty() => {
// Is the root directory, which is not emitted here.
}
Entry::Directory(_) => directories.push(path.to_owned()),
}
});
(files, directories)
}

/// Visit every node in the tree, calling the given function with the path to the Node, and its
/// entries.
pub fn walk(&self, f: &mut impl FnMut(&Path, &Entry)) {
{
// TODO: It's likely that a DigestTrie should hold its own Digest, to avoid re-computing it
// here.
let root = Entry::Directory(Directory::from_digest_tree(
Name(Intern::from("")),
self.clone(),
));
f(&PathBuf::new(), &root);
}
Digest::of_bytes(
&remexec::Directory {
directories,
files,
..remexec::Directory::default()
self.walk_helper(PathBuf::new(), f)
}

fn walk_helper(&self, path_so_far: PathBuf, f: &mut impl FnMut(&Path, &Entry)) {
for entry in &*self.0 {
let path = path_so_far.join(entry.name().as_ref());
f(&path, entry);
if let Entry::Directory(d) = entry {
d.tree.walk_helper(path, f);
}
.to_bytes(),
)
}
}
}

Expand Down
6 changes: 4 additions & 2 deletions src/rust/engine/fs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,16 @@
// Arc<Mutex> can be more clear than needing to grok Orderings:
#![allow(clippy::mutex_atomic)]

mod directory;
pub mod directory;
mod glob_matching;
#[cfg(test)]
mod glob_matching_tests;
#[cfg(test)]
mod posixfs_tests;

pub use crate::directory::{DigestTrie, DirectoryDigest, EMPTY_DIRECTORY_DIGEST};
pub use crate::directory::{
DigestTrie, DirectoryDigest, EMPTY_DIGEST_TREE, EMPTY_DIRECTORY_DIGEST,
};
pub use crate::glob_matching::{
ExpandablePathGlobs, GlobMatching, PathGlob, PreparedPathGlobs, DOUBLE_STAR_GLOB,
SINGLE_STAR_GLOB,
Expand Down
4 changes: 3 additions & 1 deletion src/rust/engine/fs/store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ protos = { path = "../../protos" }
bytes = "1.0"
concrete_time = { path = "../../concrete_time" }
async-oncecell = "0.2"
grpc_util = { path = "../../grpc_util" }
# TODO: Waiting on https://github.com/Aeledfyr/deepsize/pull/30 and https://github.com/Aeledfyr/deepsize/pull/31.
deepsize = { git = "https://github.com/stuhood/deepsize.git", rev = "67c6cfc2afa1303c06b19c1b96ebe11fd3217d34" }
fs = { path = ".." }
futures = "0.3"
glob = "0.3.0"
grpc_util = { path = "../../grpc_util" }
hashing = { path = "../../hashing" }
http = "0.2"
http-body = "0.4"
Expand Down
Loading

0 comments on commit 4ddf601

Please sign in to comment.