From 5cd3bb6b1af5672c9042d3fbe185a3caad4c81ca Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 7 Apr 2023 13:00:07 +0200 Subject: [PATCH 1/2] Do not fail processing minidumps on invalid code_file. Invalid base-names will be excluded from symbol server lookups, which will certainly not find anything useful. We still output the full (invalid) code file into the symbolicated event, as it might still be useful to customers. As a drive-by change, this reverts the `capture_anyhow` change that captures real stack traces in S4S, as the stack traces are not that useful, and the grouping is worse. --- CHANGELOG.md | 2 +- Cargo.lock | 1 - .../symbolication/process_minidump.rs | 110 ++++++------------ crates/symbolicator-sources/src/paths.rs | 10 +- crates/symbolicator-sources/src/types.rs | 43 +++++++ crates/symbolicator/Cargo.toml | 1 - crates/symbolicator/src/service.rs | 12 +- 7 files changed, 87 insertions(+), 92 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ae860a5b..7edcd2df3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ ### Fixes -- Reject minidumps containing invalid file names. ([#1047](https://github.com/getsentry/symbolicator/pull/1047)) +- Mask invalid file names in minidumps. ([#1047](https://github.com/getsentry/symbolicator/pull/1047), [#1133](https://github.com/getsentry/symbolicator/pull/1133)) - Update `minidump-processor` so minidumps with a 0-sized module are being processed. ([#1131](https://github.com/getsentry/symbolicator/pull/1131)) ### Dependencies diff --git a/Cargo.lock b/Cargo.lock index 097f6fec6..9cff7605b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4372,7 +4372,6 @@ dependencies = [ "jemallocator", "reqwest", "sentry", - "sentry-anyhow", "serde", "serde_json", "structopt", diff --git a/crates/symbolicator-service/src/services/symbolication/process_minidump.rs b/crates/symbolicator-service/src/services/symbolication/process_minidump.rs index c8a784649..6e6543b59 100644 --- a/crates/symbolicator-service/src/services/symbolication/process_minidump.rs +++ b/crates/symbolicator-service/src/services/symbolication/process_minidump.rs @@ -3,11 +3,11 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::Instant; -use anyhow::{bail, Context, Result}; +use anyhow::Result; use async_trait::async_trait; use chrono::{DateTime, Utc}; use minidump::system_info::Os; -use minidump::{MinidumpContext, MinidumpModuleList, MinidumpSystemInfo}; +use minidump::{MinidumpContext, MinidumpSystemInfo}; use minidump::{MinidumpModule, Module}; use minidump_processor::{ FileError, FileKind, FillSymbolError, FrameSymbolizer, FrameWalker, ProcessState, @@ -178,13 +178,14 @@ impl SymbolicatorSymbolProvider { let sources = self.sources.clone(); let scope = self.scope.clone(); + let code_file = non_empty_file_name(&module.code_file()); + let debug_file = module.debug_file().as_deref().and_then(non_empty_file_name); + let identifier = ObjectId { code_id: key.code_id.clone(), - code_file: Some(module.code_file().into_owned()), + code_file, debug_id: key.debug_id, - debug_file: module - .debug_file() - .map(|debug_file| debug_file.into_owned()), + debug_file, debug_checksum: None, object_type: self.object_type, }; @@ -249,18 +250,15 @@ fn object_info_from_minidump_module(ty: ObjectType, module: &MinidumpModule) -> .code_identifier() .filter(|code_id| !code_id.is_nil()) .map(|code_id| code_id.to_string().to_lowercase()); - let code_file = module.code_file(); - let code_file = match code_file.is_empty() { - true => None, - false => Some(code_file.into_owned()), - }; + let code_file = non_empty_file_name(&module.code_file()); + let debug_file = module.debug_file().as_deref().and_then(non_empty_file_name); CompleteObjectInfo::from(RawObjectInfo { ty, code_id, code_file, debug_id: module.debug_identifier().map(|c| c.breakpad().to_string()), - debug_file: module.debug_file().map(|c| c.into_owned()), + debug_file, debug_checksum: None, image_addr: HexValue(module.base_address()), image_size: match module.size() { @@ -304,20 +302,24 @@ async fn stackwalk( None => Registers::new(), }; - // Trim infinite recursions explicitly because those do not - // correlate to minidump size. Every other kind of bloated - // input data we know is already trimmed/rejected by raw - // byte size alone. - let frame_count = thread.frames.len().min(20000); - let mut frames = Vec::with_capacity(frame_count); - for frame in thread.frames.iter().take(frame_count) { - frames.push(RawFrame { - instruction_addr: HexValue(frame.resume_address), - package: frame.module.as_ref().map(|m| m.code_file().into_owned()), - trust: frame.trust.into(), - ..RawFrame::default() - }); - } + // We trim stack traces to 256 frames from the top. A similar limit is also in place in + // relay / store normalization, so any excess frames will be thrown away by Sentry anyway. + let frames = thread + .frames + .into_iter() + .take(256) + .map(|frame| { + let package = frame + .module + .and_then(|module| non_empty_file_name(&module.code_file())); + RawFrame { + instruction_addr: HexValue(frame.resume_address), + package, + trust: frame.trust.into(), + ..RawFrame::default() + } + }) + .collect(); stacktraces.push(RawStacktrace { is_requesting: requesting_thread_index.map(|r| r == index), @@ -432,25 +434,6 @@ impl SymbolicationActor { } }; - let modules = minidump - .get_stream::() - .context("Failed to read minidump module list")?; - - for module in modules.iter() { - let code_file = module.code_file(); - if file_name_is_invalid(&code_file) { - tracing::error!(%code_file, "Minidump module has an invalid code file name"); - bail!("Minidump rejected due to invalid code file name") - } - - if let Some(debug_file) = module.debug_file() { - if file_name_is_invalid(&debug_file) { - tracing::error!(%debug_file, "Minidump module has an invalid debug file name"); - bail!("Minidump rejected due to invalid debug file name") - } - } - } - let stackwalk_future = stackwalk( self.cficaches.clone(), &minidump, @@ -554,39 +537,12 @@ fn read_minidump(path: &Path) -> Result { Ok(md) } -/// Checks whether a file name contains a control character or its last segment -/// is longer than 255 bytes. -fn file_name_is_invalid(file_name: &str) -> bool { - if file_name.contains(|c: char| c.is_control()) { - return true; - } - - let last_segment = file_name - .rsplit_once(&['/', '\\']) - .map(|(_init, last)| last) - .unwrap_or(file_name); - - last_segment.len() > 255 -} - -#[cfg(test)] -mod tests { - use super::file_name_is_invalid; - - #[test] - fn invalid_file_names() { - let too_long = "foobar".repeat(50); - - assert!(file_name_is_invalid(&too_long)); - - let still_too_long = format!("a/few\\segments/{too_long}"); - - assert!(file_name_is_invalid(&still_too_long)); - - let not_too_long = format!("{too_long}/last"); - - assert!(!file_name_is_invalid(¬_too_long)); +/// Replaces invalid file names by a `` marker, otherwise returns a `String` for non-empty file names. +fn non_empty_file_name(file_name: &str) -> Option { + if file_name.is_empty() { + return None; } + Some(file_name.to_owned()) } #[cfg(skip)] diff --git a/crates/symbolicator-sources/src/paths.rs b/crates/symbolicator-sources/src/paths.rs index 0df3d84c9..d2a7be871 100644 --- a/crates/symbolicator-sources/src/paths.rs +++ b/crates/symbolicator-sources/src/paths.rs @@ -47,7 +47,7 @@ fn get_lldb_path(identifier: &ObjectId) -> Option { } fn get_pdb_symstore_path(identifier: &ObjectId, ssqp_casing: bool) -> Option { - let debug_file = identifier.debug_file_basename()?; + let debug_file = identifier.validated_debug_file_basename()?; let debug_id = identifier.debug_id.as_ref()?; let debug_file = if ssqp_casing { @@ -74,7 +74,7 @@ fn get_pdb_symstore_path(identifier: &ObjectId, ssqp_casing: bool) -> Option Option { - let code_file = identifier.code_file_basename()?; + let code_file = identifier.validated_code_file_basename()?; let code_id = identifier.code_id.as_ref()?.as_str(); let code_file = if ssqp_casing { @@ -103,7 +103,7 @@ fn get_breakpad_path(identifier: &ObjectId) -> Option { return None; } - let debug_file = identifier.debug_file_basename()?; + let debug_file = identifier.validated_debug_file_basename()?; let debug_id = identifier.debug_id.as_ref()?; let new_debug_file = debug_file .strip_suffix(".exe") @@ -223,7 +223,7 @@ fn get_symstore_path( match filetype { FileType::ElfCode => { let code_id = identifier.code_id.as_ref()?; - let code_file = identifier.code_file_basename()?; + let code_file = identifier.validated_code_file_basename()?; let code_file = if ssqp_casing { Cow::Owned(code_file.to_lowercase()) } else { @@ -237,7 +237,7 @@ fn get_symstore_path( } FileType::MachCode => { - let code_file = identifier.code_file_basename()?; + let code_file = identifier.validated_code_file_basename()?; let code_file = if ssqp_casing { Cow::Owned(code_file.to_lowercase()) } else { diff --git a/crates/symbolicator-sources/src/types.rs b/crates/symbolicator-sources/src/types.rs index 069a60545..6f0c89e5d 100644 --- a/crates/symbolicator-sources/src/types.rs +++ b/crates/symbolicator-sources/src/types.rs @@ -143,4 +143,47 @@ impl ObjectId { pub fn debug_file_basename(&self) -> Option<&str> { Some(split_path(self.debug_file.as_ref()?).1) } + + /// Validated basename of the `code_file` field. + pub fn validated_code_file_basename(&self) -> Option<&str> { + valid_file_name(self.code_file.as_ref()?) + } + + /// Validated basename of the `debug_file` field. + pub fn validated_debug_file_basename(&self) -> Option<&str> { + valid_file_name(self.debug_file.as_ref()?) + } +} + +/// Returns the "valid" basename of the file. +/// +/// The basename is valid if: +/// - it is not longer than 255 bytes and +/// - does not contain a control character +fn valid_file_name(file_name: &str) -> Option<&str> { + let basename = split_path(file_name).1; + if basename.len() < 256 && !basename.contains(|c: char| c.is_control()) { + return Some(basename); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_file_names() { + let too_long = "foobar".repeat(50); + assert_eq!(valid_file_name(&too_long), None); + + let still_too_long = format!("a/few\\segments/{too_long}"); + assert_eq!(valid_file_name(&still_too_long), None); + + let not_too_long = format!("{too_long}/last"); + assert_eq!(valid_file_name(¬_too_long), Some("last")); + + let corrupt_prefix = "some\ninvalid\0stuff\r/correct"; + assert_eq!(valid_file_name(corrupt_prefix), Some("correct")); + } } diff --git a/crates/symbolicator/Cargo.toml b/crates/symbolicator/Cargo.toml index 68c37f104..168c41d22 100644 --- a/crates/symbolicator/Cargo.toml +++ b/crates/symbolicator/Cargo.toml @@ -16,7 +16,6 @@ axum-server = "0.4.0" console = "0.15.0" futures = "0.3.12" hostname = "0.3.1" -sentry-anyhow = { version = "0.30.0", features = ["backtrace"] } sentry = { version = "0.30.0", features = ["anyhow", "debug-images", "tracing", "tower", "tower-http"] } serde = { version = "1.0.137", features = ["derive", "rc"] } serde_json = "1.0.81" diff --git a/crates/symbolicator/src/service.rs b/crates/symbolicator/src/service.rs index 74f31cb51..41cfba673 100644 --- a/crates/symbolicator/src/service.rs +++ b/crates/symbolicator/src/service.rs @@ -415,15 +415,13 @@ impl RequestService { SymbolicationResponse::Completed(Box::new(response)) } Ok(Err(err)) => { - // NOTE: This will double-report this error to Sentry. - // We want the `tracing::error` here for local command line output, but we also - // want the `capture_anyhow` which will give a full error with a symbolicated backtrace. - // So until we can actually get a `Backtrace` out of an `Error`, and `anyhow` - // implements that functionality through its `Error` impl, we are stuck with this for now. - // See + // NOTE: We could use `capture_anyhow` here, which would correctly resolve any + // stack trace thats attached to the error. However these stack traces are not + // that useful in practice, and if these events have a stack trace, they will + // group the same depending on stack trace, whereas without a stack trace, they + // group by the chained errors, which is much better in our case. let error: &dyn std::error::Error = err.as_ref(); tracing::error!(error, "Symbolication failed"); - sentry_anyhow::capture_anyhow(&err); sentry::end_session_with_status(SessionStatus::Crashed); SymbolicationResponse::Failed { From 2d53bec2c46a03e5e1d6c35fd3ed31eb67035308 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Fri, 7 Apr 2023 15:57:20 +0200 Subject: [PATCH 2/2] comment --- .../src/services/symbolication/process_minidump.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/symbolicator-service/src/services/symbolication/process_minidump.rs b/crates/symbolicator-service/src/services/symbolication/process_minidump.rs index 6e6543b59..eb9133373 100644 --- a/crates/symbolicator-service/src/services/symbolication/process_minidump.rs +++ b/crates/symbolicator-service/src/services/symbolication/process_minidump.rs @@ -537,7 +537,7 @@ fn read_minidump(path: &Path) -> Result { Ok(md) } -/// Replaces invalid file names by a `` marker, otherwise returns a `String` for non-empty file names. +/// Returns an owned version of `file_name`, or `None` if it is empty. fn non_empty_file_name(file_name: &str) -> Option { if file_name.is_empty() { return None;