Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Refresh symcache when bcsymbolmap status changes #493

Merged
merged 4 commits into from
Jul 15, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 184 additions & 19 deletions crates/symbolicator/src/services/symcaches.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::fs::File;
use std::io::{self, BufWriter};
use std::io::{self, BufWriter, Seek, SeekFrom, Write};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Duration;
Expand Down Expand Up @@ -27,6 +27,8 @@ use crate::types::{
use crate::utils::futures::{BoxedFuture, ThreadPool};
use crate::utils::sentry::ConfigureScope;

const SYMBOLMAP_MARKER: &[u8] = b"WITH_SYMBOLMAP";
Swatinem marked this conversation as resolved.
Show resolved Hide resolved

/// Errors happening while generating a symcache.
#[derive(Debug, Error)]
pub enum SymCacheError {
Expand Down Expand Up @@ -129,9 +131,8 @@ struct FetchSymCacheInternal {
/// The objects actor, used to fetch original DIF objects from.
objects_actor: ObjectsActor,

/// The bitcode service, use to fetch
/// [`BcSymbolMap`](symbolic::debuginfo::macho::BcSymbolMap).
bitcode_svc: BitcodeService,
/// The result of fetching the BcSymbolMap
bcsymbolmap_handle: Option<BcSymbolMapHandle>,

/// ObjectMeta handle of the original DIF object to fetch.
object_meta: Arc<ObjectMetaHandle>,
Expand All @@ -158,9 +159,8 @@ struct FetchSymCacheInternal {
async fn fetch_difs_and_compute_symcache(
path: PathBuf,
object_meta: Arc<ObjectMetaHandle>,
sources: Arc<[SourceConfig]>,
objects_actor: ObjectsActor,
bitcode_svc: BitcodeService,
bcsymbolmap_handle: Option<BcSymbolMapHandle>,
threadpool: ThreadPool,
) -> Result<CacheStatus, SymCacheError> {
let object_handle = objects_actor
Expand All @@ -172,14 +172,6 @@ async fn fetch_difs_and_compute_symcache(
return Ok(object_handle.status());
}

let bcsymbolmap_handle = match object_meta.object_id().debug_id {
Some(debug_id) => bitcode_svc
.fetch_bcsymbolmap(debug_id, object_meta.scope().clone(), sources.clone())
.await
.map_err(SymCacheError::BcSymbolMapError)?,
None => None,
};

let compute_future = async move {
let status = match write_symcache(&path, &*object_handle, bcsymbolmap_handle) {
Ok(_) => CacheStatus::Positive,
Expand Down Expand Up @@ -210,9 +202,8 @@ impl CacheItemRequest for FetchSymCacheInternal {
let future = fetch_difs_and_compute_symcache(
path.to_owned(),
self.object_meta.clone(),
self.request.sources.clone(),
self.objects_actor.clone(),
self.bitcode_svc.clone(),
self.bcsymbolmap_handle.clone(),
self.threadpool.clone(),
);

Expand All @@ -230,8 +221,11 @@ impl CacheItemRequest for FetchSymCacheInternal {
}

fn should_load(&self, data: &[u8]) -> bool {
let had_symbolmap = data.ends_with(SYMBOLMAP_MARKER);
SymCache::parse(data)
.map(|symcache| symcache.is_latest())
.map(|symcache| {
symcache.is_latest() && had_symbolmap == self.bcsymbolmap_handle.is_some()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will also recompute if previously there was a symbolmap but now we didn't fetch one. this can happen because the derived cache lives longer than the download cache. Not sure how serious an issue it is.

})
.unwrap_or(false)
}

Expand Down Expand Up @@ -295,11 +289,26 @@ impl SymCacheActor {

match meta {
Some(handle) => {
// TODO: while there is some caching *internally* in the bitcode_svc, the *complete*
// fetch request is not cached
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if this can be further reduced by checking https://docs.rs/symbolic/8.3.0/symbolic/debuginfo/macho/struct.MachObject.html#method.requires_symbolmap first, obviously only for macho symbol files too. that's probably sufficient rather than introduce an entire new cache which costs a lot of duplicate disk space.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t think we have a proper MachObject at this point in the code?

let bcsymbolmap_handle = match handle.object_id().debug_id {
Some(debug_id) => self
.bitcode_svc
.fetch_bcsymbolmap(
debug_id,
handle.scope().clone(),
request.sources.clone(),
)
.await
.map_err(SymCacheError::BcSymbolMapError)?,
None => None,
};

self.symcaches
.compute_memoized(FetchSymCacheInternal {
request,
objects_actor: self.objects.clone(),
bitcode_svc: self.bitcode_svc.clone(),
bcsymbolmap_handle,
object_meta: handle,
threadpool: self.threadpool.clone(),
candidates,
Expand Down Expand Up @@ -359,8 +368,164 @@ fn write_symcache(

SymCacheWriter::write_object(&symbolic_object, &mut writer).map_err(SymCacheError::Writing)?;

let file = writer.into_inner().map_err(io::Error::from)?;
let mut file = writer.into_inner().map_err(io::Error::from)?;

if bcsymbolmap_handle.is_some() {
file.flush()?;
file.seek(SeekFrom::End(0))?;
file.write_all(SYMBOLMAP_MARKER)?;
}
file.sync_all()?;

Ok(())
}

#[cfg(test)]
mod tests {
Swatinem marked this conversation as resolved.
Show resolved Hide resolved
use std::fs;
use std::sync::Arc;

use symbolic::common::DebugId;
use uuid::Uuid;

use super::*;
use crate::cache::Caches;
use crate::config::{CacheConfigs, Config};
use crate::services::bitcode::BitcodeService;
use crate::services::DownloadService;
use crate::sources::{
CommonSourceConfig, DirectoryLayoutType, FilesystemSourceConfig, SourceConfig, SourceId,
};
use crate::test::{self, fixture};
use crate::utils::futures::ThreadPool;

/// Creates a `SymCacheActor` with the given cache directory
/// and timeout for download cache misses.
fn symcache_actor(cache_dir: PathBuf, timeout: Duration) -> SymCacheActor {
let mut cache_config = CacheConfigs::default();
cache_config.downloaded.retry_misses_after = Some(timeout);

let config = Arc::new(Config {
cache_dir: Some(cache_dir),
connect_to_reserved_ips: true,
caches: cache_config,
..Default::default()
});

let cpu_pool = ThreadPool::new();
let caches = Caches::from_config(&config).unwrap();
caches.clear_tmp(&config).unwrap();
let downloader = DownloadService::new(config);
let objects = ObjectsActor::new(caches.object_meta, caches.objects, downloader.clone());
let bitcode = BitcodeService::new(caches.auxdifs, downloader);

SymCacheActor::new(caches.symcaches, objects, bitcode, cpu_pool)
}

/// Tests that a symcache is regenerated when it was created without a BcSymbolMap
/// and a BcSymbolMap has since become available.
///
/// We construct a symcache 3 times under varying conditions:
/// 1. No symbol map is not there
/// 2. The symbol map is there, but its absence is still cached, so it is
/// not downloaded
/// 3. The download cache has expired, so the symbol map is now
/// actually available.
///
/// Lookups in the symcache should return obfuscated names in
/// 1 and 2 and unobfuscated names in 3.
///
/// 2 is specifically intended to make sure that the SymCacheActor
/// doesn't constantly try to download the symbol map.
///
/// TODO: This test currently does not work because step 2
/// results in unobfuscated names. For some reason the
/// SymCacheActor downloads the symbol map immediately,
/// even though the negative cache should still be valid.
Swatinem marked this conversation as resolved.
Show resolved Hide resolved
#[tokio::test]
async fn test_symcache_refresh() {
test::setup();

const TIMEOUT: Duration = Duration::from_millis(500);
Swatinem marked this conversation as resolved.
Show resolved Hide resolved

let cache_dir = test::tempdir();
let symbol_dir = test::tempdir();

// Create directories for the symbol file and the bcsymbolmap
let macho_dir = symbol_dir.path().join("2d/10c42f591d3265b14778ba0868073f/");
let symbol_map_dir = symbol_dir.path().join("c8/374b6d6e9634d8ae38efaa5fec424f/");

fs::create_dir_all(&symbol_map_dir).unwrap();
fs::create_dir_all(&macho_dir).unwrap();

// Copy the symbol file to the temporary symbol directory
fs::copy(
fixture("symbols/2d10c42f-591d-3265-b147-78ba0868073f.dwarf-hidden"),
macho_dir.join("debuginfo"),
)
.unwrap();

let source = SourceConfig::Filesystem(Arc::new(FilesystemSourceConfig {
id: SourceId::new("local"),
path: symbol_dir.path().to_owned(),
files: CommonSourceConfig::with_layout(DirectoryLayoutType::Unified),
}));

let identifier = ObjectId::from(DebugId::from_uuid(
Uuid::parse_str("2d10c42f-591d-3265-b147-78ba0868073f").unwrap(),
));

let fetch_symcache = FetchSymCache {
object_type: ObjectType::Macho,
identifier,
sources: Arc::new([source]),
scope: Scope::Global,
};

let symcache_actor = symcache_actor(cache_dir.path().to_owned(), TIMEOUT);

test::spawn_compat(move || async move {
// Create the symcache for the first time. Since the bcsymbolmap is not available, names in the
// symcache will be obfuscated.
let symcache_file = symcache_actor.fetch(fetch_symcache.clone()).await.unwrap();
let symcache = symcache_file.parse().unwrap().unwrap();
let line_info = symcache.lookup(0x5a75).unwrap().next().unwrap().unwrap();
assert_eq!(line_info.filename(), "__hidden#42_");
assert_eq!(line_info.symbol(), "__hidden#0_");

// Copy the plist and bcsymbolmap to the temporary symbol directory so that the SymCacheActor can find them.
fs::copy(
fixture("symbols/2d10c42f-591d-3265-b147-78ba0868073f.plist"),
macho_dir.join("uuidmap"),
)
.unwrap();

fs::copy(
fixture("symbols/c8374b6d-6e96-34d8-ae38-efaa5fec424f.bcsymbolmap"),
symbol_map_dir.join("bcsymbolmap"),
)
.unwrap();

// Create the symcache for the second time. Even though the bcsymbolmap is now available, its absence should
// still be cached and the SymcacheActor should make no attempt to download it. Therefore, the names should
// be obfuscated like before.
let symcache_file = symcache_actor.fetch(fetch_symcache.clone()).await.unwrap();
Swatinem marked this conversation as resolved.
Show resolved Hide resolved
let symcache = symcache_file.parse().unwrap().unwrap();
let line_info = symcache.lookup(0x5a75).unwrap().next().unwrap().unwrap();
assert_eq!(line_info.filename(), "__hidden#42_");
assert_eq!(line_info.symbol(), "__hidden#0_");

// Sleep long enough for the negative cache entry to become invalid.
std::thread::sleep(TIMEOUT);

// Create the symcache for the third time. This time, the bcsymbolmap is downloaded and the names in the
// symcache are unobfuscated.
let symcache_file = symcache_actor.fetch(fetch_symcache.clone()).await.unwrap();
let symcache = symcache_file.parse().unwrap().unwrap();
let line_info = symcache.lookup(0x5a75).unwrap().next().unwrap().unwrap();
assert_eq!(line_info.filename(), "Sources/Sentry/SentryMessage.m");
assert_eq!(line_info.symbol(), "-[SentryMessage initWithFormatted:]");
})
.await;
}
}
Binary file not shown.
Loading