diff --git a/Cargo.toml b/Cargo.toml index 3bd4bd0c..4767cfaa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,8 @@ nix = "0.23" parking_lot = "0.11" tempfile = "3.1" thiserror = "1.0" +findshlibs = "0.10" +cfg-if = "1.0" inferno = { version = "0.10", default-features = false, features = ["nameattr"], optional = true } prost = { version = "0.9", optional = true } diff --git a/README.md b/README.md index 8575b356..e3df7a37 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,13 @@ FRAME: backtrace::backtrace::trace::h3e91a3123a3049a5 -> FRAME: pprof::profiler: FRAME: backtrace::backtrace::trace::h3e91a3123a3049a5 -> FRAME: pprof::profiler::perf_signal_handler::h7b995c4ab2e66493 -> FRAME: Unknown -> FRAME: prime_number::main::h47f1058543990c8b -> FRAME: std::rt::lang_start::{{closure}}::h4262e250f8024b06 -> FRAME: std::rt::lang_start_internal::{{closure}}::h812f70926ebbddd0 -> std::panicking::try::do_call::h3210e2ce6a68897b -> FRAME: __rust_maybe_catch_panic -> FRAME: std::panicking::try::h28c2e2ec1c3871ce -> std::panic::catch_unwind::h05e542185e35aabf -> std::rt::lang_start_internal::hd7efcfd33686f472 -> FRAME: main -> FRAME: __libc_start_main -> FRAME: _start -> FRAME: Unknown -> THREAD: prime_number 1 ``` + +## Features + +- `cpp` enables the cpp demangle. +- `flamegraph` enables the flamegraph report format. +- `protobuf` enables the pprof protobuf report format. + ## Flamegraph ```toml @@ -206,6 +213,12 @@ Unfortunately, there is no 100% robust stack tracing method. [Some related resea > libgcc's unwind method is not safe to use from signal handlers. One particular cause of deadlock is when profiling tick happens when program is propagating thrown exception. +This can be resolved by adding a blocklist: + +```rust +let guard = pprof::ProfilerGuardBuilder::default().frequency(1000).blocklist(&["libc", "libgcc", "pthread"]).build().unwrap(); +``` + ### Signal Safety Signal safety is hard to guarantee. But it's not *that* hard. diff --git a/examples/backtrace_while_sampling.rs b/examples/backtrace_while_sampling.rs new file mode 100644 index 00000000..90b6d613 --- /dev/null +++ b/examples/backtrace_while_sampling.rs @@ -0,0 +1,34 @@ +// Copyright 2021 TiKV Project Authors. Licensed under Apache-2.0. + +use pprof; +use std::fs::File; + +fn deep_recursive(depth: i32) { + if depth > 0 { + deep_recursive(depth - 1); + } else { + backtrace::Backtrace::new(); + } +} + +fn main() { + let guard = pprof::ProfilerGuardBuilder::default() + .frequency(1000) + .blocklist(&["libc", "libgcc", "pthread"]) + .build() + .unwrap(); + + for _ in 0..10000 { + deep_recursive(20); + } + + match guard.report().build() { + Ok(report) => { + let file = File::create("flamegraph.svg").unwrap(); + report.flamegraph(file).unwrap(); + + println!("report: {:?}", &report); + } + Err(_) => {} + }; +} diff --git a/examples/malloc_hook.rs b/examples/malloc_hook.rs index 4d23fc33..2957c36d 100644 --- a/examples/malloc_hook.rs +++ b/examples/malloc_hook.rs @@ -7,6 +7,7 @@ use std::ffi::c_void; #[cfg(not(target_os = "linux"))] #[allow(clippy::wrong_self_convention)] +#[allow(non_upper_case_globals)] static mut __malloc_hook: Option *mut c_void> = None; extern "C" { diff --git a/src/collector.rs b/src/collector.rs index ec11c1b9..71b897e9 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -247,12 +247,25 @@ impl Collector { } } +#[cfg(test)] +mod test_utils { + use super::*; + use std::collections::BTreeMap; + + pub fn add_map(hashmap: &mut BTreeMap, entry: &Entry) { + match hashmap.get_mut(&entry.item) { + None => { + hashmap.insert(entry.item, entry.count); + } + Some(count) => *count += entry.count, + } + } +} + #[cfg(test)] mod tests { use super::*; - use std::cell::RefCell; use std::collections::BTreeMap; - use std::ffi::c_void; #[test] fn stack_hash_counter() { @@ -272,15 +285,6 @@ mod tests { }); } - fn add_map(hashmap: &mut BTreeMap, entry: &Entry) { - match hashmap.get_mut(&entry.item) { - None => { - hashmap.insert(entry.item, entry.count); - } - Some(count) => *count += entry.count, - } - } - #[test] fn evict_test() { let mut stack_hash_counter = StackHashCounter::::default(); @@ -291,14 +295,14 @@ mod tests { match stack_hash_counter.add(item, 1) { None => {} Some(evict) => { - add_map(&mut real_map, &evict); + test_utils::add_map(&mut real_map, &evict); } } } } stack_hash_counter.iter().for_each(|entry| { - add_map(&mut real_map, &entry); + test_utils::add_map(&mut real_map, &entry); }); for item in 0..(1 << 10) * 4 { @@ -326,7 +330,7 @@ mod tests { } collector.try_iter().unwrap().for_each(|entry| { - add_map(&mut real_map, &entry); + test_utils::add_map(&mut real_map, &entry); }); for item in 0..(1 << 12) * 4 { @@ -341,10 +345,15 @@ mod tests { } } } +} - #[cfg(not(target_os = "linux"))] - #[allow(clippy::wrong_self_convention)] - static mut __malloc_hook: Option *mut c_void> = None; +#[cfg(test)] +#[cfg(target_os = "linux")] +mod malloc_free_test { + use super::*; + use std::cell::RefCell; + use std::collections::BTreeMap; + use std::ffi::c_void; extern "C" { #[cfg(target_os = "linux")] @@ -397,7 +406,7 @@ mod tests { }); collector.try_iter().unwrap().for_each(|entry| { - add_map(&mut real_map, &entry); + test_utils::add_map(&mut real_map, &entry); }); for item in 0..(1 << 10) * 4 { diff --git a/src/lib.rs b/src/lib.rs index 32da691a..54ec07c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,9 +2,9 @@ //! pprof-rs is an integrated profiler for rust program. //! -//! This crate provides a programable interface to start/stop/report a profiler dynamically. With the -//! help of this crate, you can easily integrate a profiler into your rust program in a modern, convenient -//! way. +//! This crate provides a programable interface to start/stop/report a profiler +//! dynamically. With the help of this crate, you can easily integrate a +//! profiler into your rust program in a modern, convenient way. //! //! A sample usage is: //! @@ -21,7 +21,22 @@ //!}; //! ``` //! -//! You can find more details in [README.md](https://github.com/tikv/pprof-rs/blob/master/README.md) +//! More configuration can be passed through `ProfilerGuardBuilder`: +//! +//! ```rust +//! let guard = pprof::ProfilerGuardBuilder::default().frequency(1000).blocklist(&["libc", "libgcc", "pthread"]).build().unwrap(); +//! ``` +//! +//! The frequency means the sampler frequency, and the `blocklist` means the +//! profiler will ignore the sample whose first frame is from library containing +//! these strings. +//! +//! Skipping `libc`, `libgcc` and `libpthread` could be a solution to the +//! possible deadlock inside the `_Unwind_Backtrace`, and keep the signal +//! safety. +//! +//! You can find more details in +//! [README.md](https://github.com/tikv/pprof-rs/blob/master/README.md) /// Define the MAX supported stack depth. TODO: make this variable mutable. pub const MAX_DEPTH: usize = 32; @@ -40,7 +55,7 @@ mod timer; pub use self::collector::{Collector, StackHashCounter}; pub use self::error::{Error, Result}; pub use self::frames::{Frames, Symbol}; -pub use self::profiler::ProfilerGuard; +pub use self::profiler::{ProfilerGuard, ProfilerGuardBuilder}; pub use self::report::{Report, ReportBuilder}; #[cfg(feature = "flamegraph")] diff --git a/src/profiler.rs b/src/profiler.rs index ca990a62..529603c8 100644 --- a/src/profiler.rs +++ b/src/profiler.rs @@ -7,6 +7,9 @@ use backtrace::Frame; use nix::sys::signal; use parking_lot::RwLock; +#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] +use findshlibs::{Segment, SharedLibrary, TargetSharedLibrary}; + use crate::collector::Collector; use crate::error::{Error, Result}; use crate::frames::UnresolvedFrames; @@ -23,6 +26,92 @@ pub struct Profiler { sample_counter: i32, running: bool, + + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + blocklist_segments: Vec<(usize, usize)>, +} + +pub struct ProfilerGuardBuilder { + frequency: c_int, + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + blocklist_segments: Vec<(usize, usize)>, +} + +impl Default for ProfilerGuardBuilder { + fn default() -> ProfilerGuardBuilder { + ProfilerGuardBuilder { + frequency: 99, + + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + blocklist_segments: Vec::new(), + } + } +} + +impl ProfilerGuardBuilder { + pub fn frequency(self, frequency: c_int) -> Self { + Self { frequency, ..self } + } + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + pub fn blocklist>(self, blocklist: &[T]) -> Self { + let blocklist_segments = { + let mut segments = Vec::new(); + TargetSharedLibrary::each(|shlib| { + let in_blocklist = match shlib.name().to_str() { + Some(name) => { + let mut in_blocklist = false; + for blocked_name in blocklist.iter() { + if name.contains(blocked_name.as_ref()) { + in_blocklist = true; + } + } + + in_blocklist + } + + None => false, + }; + if in_blocklist { + for seg in shlib.segments() { + let avam = seg.actual_virtual_memory_address(shlib); + let start = avam.0; + let end = start + seg.len(); + segments.push((start, end)); + } + } + }); + segments + }; + + Self { + blocklist_segments, + ..self + } + } + pub fn build(self) -> Result> { + trigger_lazy(); + + match PROFILER.write().as_mut() { + Err(err) => { + log::error!("Error in creating profiler: {}", err); + Err(Error::CreatingError) + } + Ok(profiler) => { + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + profiler.blocklist_segments = self.blocklist_segments; + } + + match profiler.start() { + Ok(()) => Ok(ProfilerGuard::<'static> { + profiler: &PROFILER, + timer: Some(Timer::new(self.frequency)), + }), + Err(err) => Err(err), + } + } + } + } } /// RAII structure used to stop profiling when dropped. It is the only interface to access profiler. @@ -39,21 +128,7 @@ fn trigger_lazy() { impl ProfilerGuard<'_> { /// Start profiling with given sample frequency. pub fn new(frequency: c_int) -> Result> { - trigger_lazy(); - - match PROFILER.write().as_mut() { - Err(err) => { - log::error!("Error in creating profiler: {}", err); - Err(Error::CreatingError) - } - Ok(profiler) => match profiler.start() { - Ok(()) => Ok(ProfilerGuard::<'static> { - profiler: &PROFILER, - timer: Some(Timer::new(frequency)), - }), - Err(err) => Err(err), - }, - } + ProfilerGuardBuilder::default().frequency(frequency).build() } /// Generate a report @@ -118,9 +193,53 @@ fn write_thread_name(current_thread: libc::pthread_t, name: &mut [libc::c_char]) #[no_mangle] #[allow(clippy::uninit_assumed_init)] -extern "C" fn perf_signal_handler(_signal: c_int) { +#[cfg_attr( + not(all(any(target_arch = "x86_64", target_arch = "aarch64"))), + allow(unused_variables) +)] +extern "C" fn perf_signal_handler( + _signal: c_int, + _siginfo: *mut libc::siginfo_t, + ucontext: *mut libc::c_void, +) { if let Some(mut guard) = PROFILER.try_write() { if let Ok(profiler) = guard.as_mut() { + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + if !ucontext.is_null() { + let ucontext: *mut libc::ucontext_t = ucontext as *mut libc::ucontext_t; + + #[cfg(all(target_arch = "x86_64", target_os = "linux"))] + let addr = + unsafe { (*ucontext).uc_mcontext.gregs[libc::REG_RIP as usize] as usize }; + + #[cfg(all(target_arch = "x86_64", target_os = "macos"))] + let addr = unsafe { + let mcontext = (*ucontext).uc_mcontext; + if mcontext.is_null() { + 0 + } else { + (*mcontext).__ss.__rip as usize + } + }; + + #[cfg(all(target_arch = "aarch64", target_os = "linux"))] + let addr = unsafe { (*ucontext).uc_mcontext.pc as usize }; + + #[cfg(all(target_arch = "aarch64", target_os = "macos"))] + let addr = unsafe { + let mcontext = (*ucontext).uc_mcontext; + if mcontext.is_null() { + 0 + } else { + (*mcontext).__ss.__pc as usize + } + }; + + if profiler.is_blocklisted(addr) { + return; + } + } + let mut bt: [Frame; MAX_DEPTH] = unsafe { std::mem::MaybeUninit::uninit().assume_init() }; let mut index = 0; @@ -155,8 +274,21 @@ impl Profiler { data: Collector::new()?, sample_counter: 0, running: false, + + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + blocklist_segments: Vec::new(), }) } + + #[cfg(all(any(target_arch = "x86_64", target_arch = "aarch64")))] + fn is_blocklisted(&self, addr: usize) -> bool { + for libs in &self.blocklist_segments { + if addr > libs.0 && addr < libs.1 { + return true; + } + } + false + } } impl Profiler { @@ -193,8 +325,13 @@ impl Profiler { } fn register_signal_handler(&self) -> Result<()> { - let handler = signal::SigHandler::Handler(perf_signal_handler); - unsafe { signal::signal(signal::SIGPROF, handler) }?; + let handler = signal::SigHandler::SigAction(perf_signal_handler); + let sigaction = signal::SigAction::new( + handler, + signal::SaFlags::SA_SIGINFO, + signal::SigSet::empty(), + ); + unsafe { signal::sigaction(signal::SIGPROF, &sigaction) }?; Ok(()) } @@ -216,17 +353,16 @@ impl Profiler { } #[cfg(test)] +#[cfg(target_os = "linux")] mod tests { use super::*; + use std::cell::RefCell; use std::ffi::c_void; - #[cfg(not(target_os = "linux"))] - #[allow(clippy::wrong_self_convention)] - static mut __malloc_hook: Option *mut c_void> = None; + use std::ptr::null_mut; extern "C" { - #[cfg(target_os = "linux")] static mut __malloc_hook: Option *mut c_void>; fn malloc(size: usize) -> *mut c_void; @@ -293,6 +429,7 @@ mod tests { prime_numbers } + #[cfg(target_os = "linux")] #[test] fn malloc_free() { trigger_lazy(); @@ -307,7 +444,7 @@ mod tests { for i in 2..50000 { if is_prime_number(i, &prime_numbers) { _v += 1; - perf_signal_handler(27); + perf_signal_handler(27, null_mut(), null_mut()); } } unsafe {