Skip to content

Commit

Permalink
ref: Clean up deprecated Cache features (#1043)
Browse files Browse the repository at this point in the history
Now that caches have been refreshed using the new stable cache keys, we can remove a couple of legacy code:
- Code that deals with legacy cache markers.
- Code related to legacy cache paths.
- Fallback to older cache versions that were still using legacy keys.
- The `should_load` machinery.
- SymCache markers, as they are not needed anymore.
  • Loading branch information
Swatinem authored Feb 17, 2023
1 parent 7a9a6b5 commit 9c2135e
Show file tree
Hide file tree
Showing 13 changed files with 112 additions and 481 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- Use new `CacheKey` for writes and shared-cache. ([#1038](https://github.com/getsentry/symbolicator/pull/1038))
- Consolidate `CacheVersions` and bump to refresh `CacheKey` usage. ([#1041](https://github.com/getsentry/symbolicator/pull/1041), [#1042](https://github.com/getsentry/symbolicator/pull/1042))
- Automatically block downloads from unreliable hosts. ([#1039](https://github.com/getsentry/symbolicator/pull/1039))
- Fully migrate `CacheKey` usage and remove legacy markers. ([#1043](https://github.com/getsentry/symbolicator/pull/1043))

## 0.7.0

Expand Down
17 changes: 0 additions & 17 deletions crates/symbolicator-service/src/caching/cache_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,6 @@ impl CacheError {
pub(super) const TIMEOUT_MARKER: &[u8] = b"timeout";
pub(super) const DOWNLOAD_ERROR_MARKER: &[u8] = b"downloaderror";

// These markers were used in the older `CacheStatus` implementation:
pub(super) const LEGACY_PERMISSION_DENIED_MARKER: &[u8] =
b"cachespecificerrormissing permissions for file";
pub(super) const LEGACY_TIMEOUT_MARKER: &[u8] = b"cachespecificerrordownload was cancelled";
pub(super) const LEGACY_DOWNLOAD_ERROR_MARKER: &[u8] = b"cachespecificerror";

/// Writes error markers and details to a file.
///
/// * If `self` is [`InternalError`](Self::InternalError), it does nothing.
Expand Down Expand Up @@ -124,14 +118,6 @@ impl CacheError {
if let Some(raw_message) = bytes.strip_prefix(Self::PERMISSION_DENIED_MARKER) {
let err_msg = String::from_utf8_lossy(raw_message);
Some(Self::PermissionDenied(err_msg.into_owned()))
} else if let Some(raw_message) = bytes.strip_prefix(Self::LEGACY_PERMISSION_DENIED_MARKER)
{
// NOTE: `raw_message` is empty
let err_msg = String::from_utf8_lossy(raw_message);
Some(Self::PermissionDenied(err_msg.into_owned()))
} else if let Some(_raw_duration) = bytes.strip_prefix(Self::LEGACY_TIMEOUT_MARKER) {
// NOTE: `raw_duration` is empty
Some(Self::Timeout(Duration::from_secs(0)))
} else if let Some(raw_duration) = bytes.strip_prefix(Self::TIMEOUT_MARKER) {
let raw_duration = String::from_utf8_lossy(raw_duration);
match parse_duration(&raw_duration) {
Expand All @@ -141,9 +127,6 @@ impl CacheError {
Some(Self::InternalError)
}
}
} else if let Some(raw_message) = bytes.strip_prefix(Self::LEGACY_DOWNLOAD_ERROR_MARKER) {
let err_msg = String::from_utf8_lossy(raw_message);
Some(Self::DownloadError(err_msg.into_owned()))
} else if let Some(raw_message) = bytes.strip_prefix(Self::DOWNLOAD_ERROR_MARKER) {
let err_msg = String::from_utf8_lossy(raw_message);
Some(Self::DownloadError(err_msg.into_owned()))
Expand Down
66 changes: 10 additions & 56 deletions crates/symbolicator-service/src/caching/cache_key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ use crate::types::Scope;

#[derive(Debug, Clone, Eq)]
pub struct CacheKey {
legacy_cache_key: Arc<str>,
metadata: Arc<str>,
hash: [u8; 32],
}
Expand All @@ -34,7 +33,9 @@ impl std::hash::Hash for CacheKey {
impl CacheKey {
/// Creates a [`CacheKey`] for the given [`RemoteFile`] tied to [`Scope`].
pub fn from_scoped_file(scope: &Scope, file: &RemoteFile) -> Self {
Self::legacy_builder(scope, file).build()
let mut builder = Self::scoped_builder(scope);
builder.write_file_meta(file).unwrap();
builder.build()
}

/// Returns the human-readable metadata that forms the basis of the [`CacheKey`].
Expand All @@ -58,51 +59,18 @@ impl CacheKey {
path
}

/// Creates a stable relative path for the given [`RemoteFile`] tied to [`Scope`].
///
/// This is being used as the "legacy" [`CacheKey`], and also forms the basis for the more
/// precise modern cache key.
pub fn relative_scoped_file(scope: &Scope, file: &RemoteFile) -> String {
let scope = safe_path_segment(scope.as_ref());
let cache_key = safe_path_segment(&file.cache_key());
format!("{scope}/{cache_key}")
}

/// Create a [`CacheKeyBuilder`] that can be used to build a cache key consisting of all its
/// contributing sources.
pub fn legacy_builder(scope: &Scope, file: &RemoteFile) -> CacheKeyBuilder {
let legacy_cache_key = Self::relative_scoped_file(scope, file);
pub fn scoped_builder(scope: &Scope) -> CacheKeyBuilder {
let metadata = format!("scope: {scope}\n\n");

let mut builder = CacheKeyBuilder {
legacy_cache_key,
metadata,
};
builder.write_file_meta(file).unwrap();
builder
}

/// Returns the full cache path for this key inside the provided cache directory.
pub fn legacy_cache_path(&self, version: u32) -> String {
let mut path = if version != 0 {
format!("{version}/")
} else {
String::new()
};
path.push_str(&self.legacy_cache_key);
path
CacheKeyBuilder { metadata }
}

#[cfg(test)]
pub fn for_testing(key: impl Into<String>) -> Self {
let legacy_cache_key = key.into();
let metadata = legacy_cache_key.clone();
let metadata = key.into();

CacheKeyBuilder {
legacy_cache_key,
metadata,
}
.build()
CacheKeyBuilder { metadata }.build()
}
}

Expand All @@ -113,7 +81,6 @@ impl CacheKey {
/// This input in then being hashed to form the [`CacheKey`], and can also be serialized alongside
/// the cache files to help debugging.
pub struct CacheKeyBuilder {
legacy_cache_key: String,
metadata: String,
}

Expand All @@ -134,7 +101,6 @@ impl CacheKeyBuilder {
let hash = <[u8; 32]>::try_from(hash).expect("sha256 outputs 32 bytes");

CacheKey {
legacy_cache_key: self.legacy_cache_key.into(),
metadata: self.metadata.into(),
hash,
}
Expand All @@ -147,14 +113,6 @@ impl fmt::Write for CacheKeyBuilder {
}
}

/// Protect against:
/// * ".."
/// * absolute paths
/// * ":" (not a threat on POSIX filesystems, but confuses OS X Finder)
fn safe_path_segment(s: &str) -> String {
s.replace(['.', '/', '\\', ':'], "_")
}

#[cfg(test)]
mod tests {
use std::path::PathBuf;
Expand All @@ -179,8 +137,6 @@ mod tests {

let key = CacheKey::from_scoped_file(&scope, &file);

assert_eq!(&key.legacy_cache_path(0), "global/foo_bar_baz");
assert_eq!(&key.legacy_cache_path(1), "1/global/foo_bar_baz");
assert_eq!(
&key.cache_path(0),
"v0/6f/200788/bd4e6760d55bf6bd50c6d6e98b52379e194f9989fb788b4d37796427"
Expand All @@ -190,21 +146,19 @@ mod tests {
"scope: global\n\nsource: foo\nlocation: file:///bar.baz\n"
);

let builder = CacheKey::legacy_builder(&scope, &file);
let built_key = builder.build();
let built_key = CacheKey::from_scoped_file(&scope, &file);

assert_eq!(built_key.legacy_cache_path(0), key.legacy_cache_path(0));
assert_eq!(built_key.cache_path(0), key.cache_path(0));

let mut builder = CacheKey::legacy_builder(&scope, &file);
let mut builder = CacheKey::scoped_builder(&scope);
builder.write_file_meta(&file).unwrap();

let location = SourceLocation::new("bar.quux");
let file = FilesystemRemoteFile::new(source, location).into();
builder.write_str("\nsecond_source:\n").unwrap();
builder.write_file_meta(&file).unwrap();
let key = builder.build();

assert_eq!(&key.legacy_cache_path(0), "global/foo_bar_baz");
assert_eq!(
&key.cache_path(0),
"v0/07/e89036/d56878a462eb7949a744afa0a4deb5ed1b7a8154be16f7dd3b220518"
Expand Down
89 changes: 24 additions & 65 deletions crates/symbolicator-service/src/caching/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,7 @@ use super::{Cache, CacheEntry, CacheError, CacheKey};

type InMemoryCache<T> = moka::future::Cache<CacheKey, CacheEntry<T>>;

/// Manages a filesystem cache of any kind of data that can be serialized into bytes and read from
/// it:
///
/// - Object files
/// - Symcaches
/// - CFI caches
/// Manages a filesystem cache of any kind of data that can be de/serialized from/to bytes.
///
/// Transparently performs cache lookups, downloads and cache stores via the [`CacheItemRequest`]
/// trait and associated types.
Expand Down Expand Up @@ -105,14 +100,6 @@ pub trait CacheItemRequest: 'static + Send + Sync + Clone {
/// system. This is used to populate the cache for a previously missing element.
fn compute<'a>(&'a self, temp_file: &'a mut NamedTempFile) -> BoxFuture<'a, CacheEntry>;

/// Determines whether this item should be loaded.
///
/// If this returns `false` the cache will re-computed and be overwritten with the new
/// result.
fn should_load(&self, _data: &[u8]) -> bool {
true
}

/// Loads an existing element from the cache.
fn load(&self, data: ByteView<'static>) -> CacheEntry<Self::Item>;
}
Expand All @@ -128,14 +115,9 @@ impl<T: CacheItemRequest> Cacher<T> {
cache_dir: &Path,
key: &CacheKey,
version: u32,
use_legacy_key: bool,
) -> CacheEntry<CacheEntry<T::Item>> {
let name = self.config.name();
let cache_key = if use_legacy_key {
key.legacy_cache_path(version)
} else {
key.cache_path(version)
};
let cache_key = key.cache_path(version);

let item_path = cache_dir.join(&cache_key);
tracing::trace!("Trying {} cache at path {}", name, item_path.display());
Expand All @@ -151,19 +133,14 @@ impl<T: CacheItemRequest> Cacher<T> {
.open_cachefile(&item_path)?
.ok_or(CacheError::NotFound)?;

if let Ok(byteview) = &entry {
if !request.should_load(byteview) {
tracing::trace!("Discarding {} at path {}", name, item_path.display());
metric!(counter("caches.file.discarded") += 1, "cache" => name.as_ref());
return Err(CacheError::NotFound);
}

// store things into the shared cache when:
// - we have a positive cache
// - that has the latest version (we don’t want to upload old versions)
// - we refreshed the local cache time, so we also refresh the shared cache time.
let needs_reupload = expiration.was_touched();
if version == T::VERSIONS.current && needs_reupload {
// store things into the shared cache when:
// - we have a positive cache
// - that has the latest version (we don’t want to upload old versions)
// - we refreshed the local cache time, so we also refresh the shared cache time.
let needs_reupload = expiration.was_touched();
// FIXME: let-chains would be nice here :-)
if version == T::VERSIONS.current && needs_reupload {
if let Ok(byteview) = &entry {
if let Some(shared_cache) = self.shared_cache.get() {
shared_cache.store(
name,
Expand All @@ -177,11 +154,7 @@ impl<T: CacheItemRequest> Cacher<T> {

// This is also reported for "negative cache hits": When we cached
// the 404 response from a server as empty file.
metric!(
counter("caches.file.hit") += 1,
"cache" => name.as_ref(),
"key" => if use_legacy_key { "legacy" } else { "new" }
);
metric!(counter("caches.file.hit") += 1, "cache" => name.as_ref());
if let Ok(byteview) = &entry {
metric!(
time_raw("caches.file.size") = byteview.len() as u64,
Expand Down Expand Up @@ -214,17 +187,12 @@ impl<T: CacheItemRequest> Cacher<T> {
false
};

let mut entry = Err(CacheError::NotFound);
if shared_cache_hit {
// Waste an mmap call on a cold path, oh well.
let bv = ByteView::map_file_ref(temp_file.as_file())?;
if request.should_load(&bv) {
entry = Ok(bv);
} else {
tracing::trace!("Discarding item from shared cache {}", key);
metric!(counter("shared_cache.file.discarded") += 1);
}
}
let mut entry = if shared_cache_hit {
let byte_view = ByteView::map_file_ref(temp_file.as_file())?;
Ok(byte_view)
} else {
Err(CacheError::NotFound)
};

if entry.is_err() {
metric!(counter("caches.computation") += 1, "cache" => name.as_ref());
Expand Down Expand Up @@ -327,21 +295,12 @@ impl<T: CacheItemRequest> Cacher<T> {

for version in versions {
// try the new cache key first, then fall back to the old cache key
let item = match self
.lookup_local_cache(&request, cache_dir, &cache_key, version, false)
{
Err(CacheError::NotFound) => {
match self
.lookup_local_cache(&request, cache_dir, &cache_key, version, true)
{
Err(CacheError::NotFound) => continue,
Err(err) => return Err(err),
Ok(item) => item,
}
}
Err(err) => return Err(err),
Ok(item) => item,
};
let item =
match self.lookup_local_cache(&request, cache_dir, &cache_key, version) {
Err(CacheError::NotFound) => continue,
Err(err) => return Err(err),
Ok(item) => item,
};

if version != T::VERSIONS.current {
// we have found an outdated cache that we will use right away,
Expand Down Expand Up @@ -416,7 +375,7 @@ impl<T: CacheItemRequest> Cacher<T> {
tracing::trace!(
"Spawning deduplicated {} computation for path {:?}",
name,
cache_key.legacy_cache_path(T::VERSIONS.current)
cache_key.cache_path(T::VERSIONS.current)
);

let this = self.clone();
Expand Down
5 changes: 0 additions & 5 deletions crates/symbolicator-service/src/caching/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@
//!
//! Various other metrics are being collected as well, including:
//! - `caches.file.size`: A histogram for the size (in bytes) of the successfully loaded / written cache files.
//! - FIXME: `caches.file.discarded` and `shared_cache.file.discarded` is still being used in the
//! edge-case of mutable cache files. This will go away once cache files are truly immutable.
//! - `caches.file.write`: The number of caches being written to disk.
//! This should match `caches.computation` if the file-system layer is enabled.
//! - TODO: list all the other metrics that are missing here :-)
Expand Down Expand Up @@ -75,9 +73,6 @@
//!
//! A "successful" entry is considered immutable and it will be reused indefinitely as long as it
//! is being actively used.
//! FIXME: One current exception to this is the `should_load` functionality that is used in combination
//! with SymCache entries that are "mutable" depending on availability of secondary mapping files.
//! This is subject to change: <https://github.com/getsentry/symbolicator/issues/983>
//!
//! The [`SharedCacheConfig`] is optional, and no shared cache will be used when it is absent. The
//! configuration is done by providing a GCS bucket and `service_account_path`. A file-system based
Expand Down
Loading

0 comments on commit 9c2135e

Please sign in to comment.