Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Do not fail processing minidumps on invalid code_file #1133

Merged
merged 2 commits into from
Apr 7, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

### Fixes

- Reject minidumps containing invalid file names. ([#1047](https://github.com/getsentry/symbolicator/pull/1047))
- Mask invalid file names in minidumps. ([#1047](https://github.com/getsentry/symbolicator/pull/1047), [#1133](https://github.com/getsentry/symbolicator/pull/1133))
- Update `minidump-processor` so minidumps with a 0-sized module are being processed. ([#1131](https://github.com/getsentry/symbolicator/pull/1131))

### Dependencies
Expand Down
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::time::Instant;

use anyhow::{bail, Context, Result};
use anyhow::Result;
use async_trait::async_trait;
use chrono::{DateTime, Utc};
use minidump::system_info::Os;
use minidump::{MinidumpContext, MinidumpModuleList, MinidumpSystemInfo};
use minidump::{MinidumpContext, MinidumpSystemInfo};
use minidump::{MinidumpModule, Module};
use minidump_processor::{
FileError, FileKind, FillSymbolError, FrameSymbolizer, FrameWalker, ProcessState,
Expand Down Expand Up @@ -178,13 +178,14 @@ impl SymbolicatorSymbolProvider {
let sources = self.sources.clone();
let scope = self.scope.clone();

let code_file = non_empty_file_name(&module.code_file());
let debug_file = module.debug_file().as_deref().and_then(non_empty_file_name);

let identifier = ObjectId {
code_id: key.code_id.clone(),
code_file: Some(module.code_file().into_owned()),
code_file,
debug_id: key.debug_id,
debug_file: module
.debug_file()
.map(|debug_file| debug_file.into_owned()),
debug_file,
debug_checksum: None,
object_type: self.object_type,
};
Expand Down Expand Up @@ -249,18 +250,15 @@ fn object_info_from_minidump_module(ty: ObjectType, module: &MinidumpModule) ->
.code_identifier()
.filter(|code_id| !code_id.is_nil())
.map(|code_id| code_id.to_string().to_lowercase());
let code_file = module.code_file();
let code_file = match code_file.is_empty() {
true => None,
false => Some(code_file.into_owned()),
};
let code_file = non_empty_file_name(&module.code_file());
let debug_file = module.debug_file().as_deref().and_then(non_empty_file_name);

CompleteObjectInfo::from(RawObjectInfo {
ty,
code_id,
code_file,
debug_id: module.debug_identifier().map(|c| c.breakpad().to_string()),
debug_file: module.debug_file().map(|c| c.into_owned()),
debug_file,
debug_checksum: None,
image_addr: HexValue(module.base_address()),
image_size: match module.size() {
Expand Down Expand Up @@ -304,20 +302,24 @@ async fn stackwalk(
None => Registers::new(),
};

// Trim infinite recursions explicitly because those do not
// correlate to minidump size. Every other kind of bloated
// input data we know is already trimmed/rejected by raw
// byte size alone.
let frame_count = thread.frames.len().min(20000);
let mut frames = Vec::with_capacity(frame_count);
for frame in thread.frames.iter().take(frame_count) {
frames.push(RawFrame {
instruction_addr: HexValue(frame.resume_address),
package: frame.module.as_ref().map(|m| m.code_file().into_owned()),
trust: frame.trust.into(),
..RawFrame::default()
});
}
// We trim stack traces to 256 frames from the top. A similar limit is also in place in
// relay / store normalization, so any excess frames will be thrown away by Sentry anyway.
let frames = thread
.frames
.into_iter()
.take(256)
.map(|frame| {
let package = frame
.module
.and_then(|module| non_empty_file_name(&module.code_file()));
RawFrame {
instruction_addr: HexValue(frame.resume_address),
package,
trust: frame.trust.into(),
..RawFrame::default()
}
})
.collect();

stacktraces.push(RawStacktrace {
is_requesting: requesting_thread_index.map(|r| r == index),
Expand Down Expand Up @@ -432,25 +434,6 @@ impl SymbolicationActor {
}
};

let modules = minidump
.get_stream::<MinidumpModuleList>()
.context("Failed to read minidump module list")?;

for module in modules.iter() {
let code_file = module.code_file();
if file_name_is_invalid(&code_file) {
tracing::error!(%code_file, "Minidump module has an invalid code file name");
bail!("Minidump rejected due to invalid code file name")
}

if let Some(debug_file) = module.debug_file() {
if file_name_is_invalid(&debug_file) {
tracing::error!(%debug_file, "Minidump module has an invalid debug file name");
bail!("Minidump rejected due to invalid debug file name")
}
}
}

let stackwalk_future = stackwalk(
self.cficaches.clone(),
&minidump,
Expand Down Expand Up @@ -554,39 +537,12 @@ fn read_minidump(path: &Path) -> Result<Minidump> {
Ok(md)
}

/// Checks whether a file name contains a control character or its last segment
/// is longer than 255 bytes.
fn file_name_is_invalid(file_name: &str) -> bool {
if file_name.contains(|c: char| c.is_control()) {
return true;
}

let last_segment = file_name
.rsplit_once(&['/', '\\'])
.map(|(_init, last)| last)
.unwrap_or(file_name);

last_segment.len() > 255
}

#[cfg(test)]
mod tests {
use super::file_name_is_invalid;

#[test]
fn invalid_file_names() {
let too_long = "foobar".repeat(50);

assert!(file_name_is_invalid(&too_long));

let still_too_long = format!("a/few\\segments/{too_long}");

assert!(file_name_is_invalid(&still_too_long));

let not_too_long = format!("{too_long}/last");

assert!(!file_name_is_invalid(&not_too_long));
/// Replaces invalid file names by a `<invalid>` marker, otherwise returns a `String` for non-empty file names.
Swatinem marked this conversation as resolved.
Show resolved Hide resolved
fn non_empty_file_name(file_name: &str) -> Option<String> {
if file_name.is_empty() {
return None;
}
Some(file_name.to_owned())
}

#[cfg(skip)]
Expand Down
10 changes: 5 additions & 5 deletions crates/symbolicator-sources/src/paths.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ fn get_lldb_path(identifier: &ObjectId) -> Option<String> {
}

fn get_pdb_symstore_path(identifier: &ObjectId, ssqp_casing: bool) -> Option<String> {
let debug_file = identifier.debug_file_basename()?;
let debug_file = identifier.validated_debug_file_basename()?;
let debug_id = identifier.debug_id.as_ref()?;

let debug_file = if ssqp_casing {
Expand All @@ -74,7 +74,7 @@ fn get_pdb_symstore_path(identifier: &ObjectId, ssqp_casing: bool) -> Option<Str
}

fn get_pe_symstore_path(identifier: &ObjectId, ssqp_casing: bool) -> Option<String> {
let code_file = identifier.code_file_basename()?;
let code_file = identifier.validated_code_file_basename()?;
let code_id = identifier.code_id.as_ref()?.as_str();

let code_file = if ssqp_casing {
Expand Down Expand Up @@ -103,7 +103,7 @@ fn get_breakpad_path(identifier: &ObjectId) -> Option<String> {
return None;
}

let debug_file = identifier.debug_file_basename()?;
let debug_file = identifier.validated_debug_file_basename()?;
let debug_id = identifier.debug_id.as_ref()?;
let new_debug_file = debug_file
.strip_suffix(".exe")
Expand Down Expand Up @@ -223,7 +223,7 @@ fn get_symstore_path(
match filetype {
FileType::ElfCode => {
let code_id = identifier.code_id.as_ref()?;
let code_file = identifier.code_file_basename()?;
let code_file = identifier.validated_code_file_basename()?;
let code_file = if ssqp_casing {
Cow::Owned(code_file.to_lowercase())
} else {
Expand All @@ -237,7 +237,7 @@ fn get_symstore_path(
}

FileType::MachCode => {
let code_file = identifier.code_file_basename()?;
let code_file = identifier.validated_code_file_basename()?;
let code_file = if ssqp_casing {
Cow::Owned(code_file.to_lowercase())
} else {
Expand Down
43 changes: 43 additions & 0 deletions crates/symbolicator-sources/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,47 @@ impl ObjectId {
pub fn debug_file_basename(&self) -> Option<&str> {
Some(split_path(self.debug_file.as_ref()?).1)
}

/// Validated basename of the `code_file` field.
pub fn validated_code_file_basename(&self) -> Option<&str> {
valid_file_name(self.code_file.as_ref()?)
}

/// Validated basename of the `debug_file` field.
pub fn validated_debug_file_basename(&self) -> Option<&str> {
valid_file_name(self.debug_file.as_ref()?)
}
}

/// Returns the "valid" basename of the file.
///
/// The basename is valid if:
/// - it is not longer than 255 bytes and
/// - does not contain a control character
fn valid_file_name(file_name: &str) -> Option<&str> {
let basename = split_path(file_name).1;
if basename.len() < 256 && !basename.contains(|c: char| c.is_control()) {
return Some(basename);
}
None
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn valid_file_names() {
let too_long = "foobar".repeat(50);
assert_eq!(valid_file_name(&too_long), None);

let still_too_long = format!("a/few\\segments/{too_long}");
assert_eq!(valid_file_name(&still_too_long), None);

let not_too_long = format!("{too_long}/last");
assert_eq!(valid_file_name(&not_too_long), Some("last"));

let corrupt_prefix = "some\ninvalid\0stuff\r/correct";
assert_eq!(valid_file_name(corrupt_prefix), Some("correct"));
}
}
1 change: 0 additions & 1 deletion crates/symbolicator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ axum-server = "0.4.0"
console = "0.15.0"
futures = "0.3.12"
hostname = "0.3.1"
sentry-anyhow = { version = "0.30.0", features = ["backtrace"] }
sentry = { version = "0.30.0", features = ["anyhow", "debug-images", "tracing", "tower", "tower-http"] }
serde = { version = "1.0.137", features = ["derive", "rc"] }
serde_json = "1.0.81"
Expand Down
12 changes: 5 additions & 7 deletions crates/symbolicator/src/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,15 +415,13 @@ impl RequestService {
SymbolicationResponse::Completed(Box::new(response))
}
Ok(Err(err)) => {
// NOTE: This will double-report this error to Sentry.
// We want the `tracing::error` here for local command line output, but we also
// want the `capture_anyhow` which will give a full error with a symbolicated backtrace.
// So until we can actually get a `Backtrace` out of an `Error`, and `anyhow`
// implements that functionality through its `Error` impl, we are stuck with this for now.
// See <https://github.com/rust-lang/rust/issues/99301>
// NOTE: We could use `capture_anyhow` here, which would correctly resolve any
// stack trace thats attached to the error. However these stack traces are not
// that useful in practice, and if these events have a stack trace, they will
// group the same depending on stack trace, whereas without a stack trace, they
// group by the chained errors, which is much better in our case.
let error: &dyn std::error::Error = err.as_ref();
tracing::error!(error, "Symbolication failed");
sentry_anyhow::capture_anyhow(&err);

sentry::end_session_with_status(SessionStatus::Crashed);
SymbolicationResponse::Failed {
Expand Down