Skip to content

Commit

Permalink
aya: add guardrails for valid combinations of perf_event type and config
Browse files Browse the repository at this point in the history
Add guardrails for when setting event type and config for perf_event
programs. The `PerfEventConfig` enum now defines the event `type` and
`config` of interest.

Remove public re-exports, and add idiomatic Rust types for:
- perf_hw_id => HardwareEvent
- perf_sw_ids => SoftwareEvent
- perf_hw_cache_id => HwCacheEvent
- perf_hw_cache_op_id => HwCacheOp
- perf_hw_cache_op_result_id => HwCacheResult

The motivation behind this is mainly for the `type` and `config` fields
of `bpf_link_info.perf_event.event`. The newly added enums are planned
to also be used in the `bpf_link_info` metadata.

Although `Breakpoint`/`PERF_TYPE_BREAKPOINT` variant exists, it is not
fully implemented. It's only usage at the moment is in link info.
  • Loading branch information
tyrone-wu committed Sep 25, 2024
1 parent 045032b commit dec30be
Show file tree
Hide file tree
Showing 3 changed files with 478 additions and 119 deletions.
2 changes: 1 addition & 1 deletion aya/src/programs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ pub use crate::programs::{
links::{CgroupAttachMode, Link},
lirc_mode2::LircMode2,
lsm::Lsm,
perf_event::{PerfEvent, PerfEventScope, PerfTypeId, SamplePolicy},
perf_event::{PerfEvent, PerfEventScope, SamplePolicy},
probe::ProbeKind,
raw_trace_point::RawTracePoint,
sk_lookup::SkLookup,
Expand Down
257 changes: 224 additions & 33 deletions aya/src/programs/perf_event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@

use std::os::fd::AsFd as _;

pub use crate::generated::{
perf_hw_cache_id, perf_hw_cache_op_id, perf_hw_cache_op_result_id, perf_hw_id, perf_sw_ids,
};
use crate::{
generated::{
bpf_link_type,
bpf_prog_type::BPF_PROG_TYPE_PERF_EVENT,
perf_hw_cache_id, perf_hw_cache_op_id, perf_hw_cache_op_result_id, perf_hw_id, perf_sw_ids,
perf_type_id::{
PERF_TYPE_BREAKPOINT, PERF_TYPE_HARDWARE, PERF_TYPE_HW_CACHE, PERF_TYPE_RAW,
PERF_TYPE_SOFTWARE, PERF_TYPE_TRACEPOINT,
Expand All @@ -23,22 +21,200 @@ use crate::{
sys::{bpf_link_get_info_by_fd, perf_event_open, SyscallError},
};

/// The type of perf event
#[repr(u32)]
/// The type of perf event and their respective configuration.
#[doc(alias = "perf_type_id")]
#[derive(Debug, Clone)]
pub enum PerfTypeId {
/// PERF_TYPE_HARDWARE
Hardware = PERF_TYPE_HARDWARE as u32,
/// PERF_TYPE_SOFTWARE
Software = PERF_TYPE_SOFTWARE as u32,
/// PERF_TYPE_TRACEPOINT
TracePoint = PERF_TYPE_TRACEPOINT as u32,
/// PERF_TYPE_HW_CACHE
HwCache = PERF_TYPE_HW_CACHE as u32,
/// PERF_TYPE_RAW
Raw = PERF_TYPE_RAW as u32,
/// PERF_TYPE_BREAKPOINT
Breakpoint = PERF_TYPE_BREAKPOINT as u32,
pub enum PerfEventConfig {
/// The hardware event to report.
#[doc(alias = "PERF_TYPE_HARDWARE")]
Hardware(HardwareEvent),
/// The software event to report.
#[doc(alias = "PERF_TYPE_SOFTWARE")]
Software(SoftwareEvent),
/// The kernel trace point event to report.
#[doc(alias = "PERF_TYPE_TRACEPOINT")]
TracePoint {
/// The ID of the tracing event. This can be obtained from
/// `/sys/kernel/debug/tracing/events/*/*/id` if `ftrace` is enabled in the kernel.
event_id: u64,
},
/// The hardware cache event to report.
#[doc(alias = "PERF_TYPE_HW_CACHE")]
HwCache {
/// The hardware cache event.
event: HwCacheEvent,
/// The hardware cache operation.
operation: HwCacheOp,
/// The hardware cache result of interest.
result: HwCacheResult,
},
/// The "raw" implementation-specific event to report.
#[doc(alias = "PERF_TYPE_RAW")]
Raw {
/// The "raw" event value, which is not covered by the "generalized" events. This is CPU
/// implementation defined events.
event_id: u64,
},
/// A hardware breakpoint.
///
/// Note: this variant is not fully implemented at the moment.
// TODO: Variant not fully implemented due to additional `perf_event_attr` fields like
// `bp_type`, `bp_addr`, etc.
#[doc(alias = "PERF_TYPE_BREAKPOINT")]
Breakpoint,
/// The dynamic PMU (Performance Monitor Unit) event to report.
///
/// Available PMU's may be found under `/sys/bus/event_source/devices`.
Pmu {
/// The PMU type.
///
/// This value can extracted from `/sys/bus/event_source/devices/*/type`.
pmu_type: u32,
/// The PMU config option.
///
/// This value can extracted from `/sys/bus/event_source/devices/*/format/`, where the
/// `config:<value>` indicates the bit position to set.
///
/// For example, `config:3` => `config = 1 << 3`.
config: u64,
},
}

/// The "generalized" hardware CPU events provided by the kernel.
#[repr(u64)]
#[doc(alias = "perf_hw_id")]
#[derive(Debug, Clone, Copy)]
pub enum HardwareEvent {
/// The total CPU cycles.
#[doc(alias = "PERF_COUNT_HW_CPU_CYCLES")]
CpuCycles = perf_hw_id::PERF_COUNT_HW_CPU_CYCLES as u64,
/// Number of retired instructions.
#[doc(alias = "PERF_COUNT_HW_INSTRUCTIONS")]
Instructions = perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS as u64,
/// Number of cache accesses.
#[doc(alias = "PERF_COUNT_HW_CACHE_REFERENCES")]
CacheReferences = perf_hw_id::PERF_COUNT_HW_CACHE_REFERENCES as u64,
/// Number of cache misses.
#[doc(alias = "PERF_COUNT_HW_CACHE_MISSES")]
CacheMisses = perf_hw_id::PERF_COUNT_HW_CACHE_MISSES as u64,
/// Number of retired branch instructions.
#[doc(alias = "PERF_COUNT_HW_BRANCH_INSTRUCTIONS")]
BranchInstructions = perf_hw_id::PERF_COUNT_HW_BRANCH_INSTRUCTIONS as u64,
/// Number of mispredicted branch instructions.
#[doc(alias = "PERF_COUNT_HW_BRANCH_MISSES")]
BranchMisses = perf_hw_id::PERF_COUNT_HW_BRANCH_MISSES as u64,
/// Number of bus cycles.
#[doc(alias = "PERF_COUNT_HW_BUS_CYCLES")]
BusCycles = perf_hw_id::PERF_COUNT_HW_BUS_CYCLES as u64,
/// Number of stalled cycles during issue.
#[doc(alias = "PERF_COUNT_HW_STALLED_CYCLES_FRONTEND")]
StalledCyclesFrontend = perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND as u64,
/// Number of stalled cycles during retirement.
#[doc(alias = "PERF_COUNT_HW_STALLED_CYCLES_BACKEND")]
StalledCyclesBackend = perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_BACKEND as u64,
/// The total CPU cycles, which is not affected by CPU frequency scaling.
#[doc(alias = "PERF_COUNT_HW_REF_CPU_CYCLES")]
RefCpuCycles = perf_hw_id::PERF_COUNT_HW_REF_CPU_CYCLES as u64,
}

/// The software-defined events provided by the kernel.
#[repr(u64)]
#[doc(alias = "perf_sw_ids")]
#[derive(Debug, Clone, Copy)]
pub enum SoftwareEvent {
/// The CPU clock timer.
#[doc(alias = "PERF_COUNT_SW_CPU_CLOCK")]
CpuClock = perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK as u64,
/// The clock count specific to the task that is running.
#[doc(alias = "PERF_COUNT_SW_TASK_CLOCK")]
TaskClock = perf_sw_ids::PERF_COUNT_SW_TASK_CLOCK as u64,
/// Number of page faults.
#[doc(alias = "PERF_COUNT_SW_PAGE_FAULTS")]
PageFaults = perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS as u64,
/// Numer of context switches.
#[doc(alias = "PERF_COUNT_SW_CONTEXT_SWITCHES")]
ContextSwitches = perf_sw_ids::PERF_COUNT_SW_CONTEXT_SWITCHES as u64,
/// Number of times the process has migrated to a new CPU.
#[doc(alias = "PERF_COUNT_SW_CPU_MIGRATIONS")]
CpuMigrations = perf_sw_ids::PERF_COUNT_SW_CPU_MIGRATIONS as u64,
/// Number of minor page faults.
#[doc(alias = "PERF_COUNT_SW_PAGE_FAULTS_MIN")]
PageFaultsMin = perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MIN as u64,
/// Number of major page faults.
#[doc(alias = "PERF_COUNT_SW_PAGE_FAULTS_MAJ")]
PageFaultsMaj = perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MAJ as u64,
/// Number of alignment faults.
#[doc(alias = "PERF_COUNT_SW_ALIGNMENT_FAULTS")]
AlignmentFaults = perf_sw_ids::PERF_COUNT_SW_ALIGNMENT_FAULTS as u64,
/// Number of emulation faults.
#[doc(alias = "PERF_COUNT_SW_EMULATION_FAULTS")]
EmulationFaults = perf_sw_ids::PERF_COUNT_SW_EMULATION_FAULTS as u64,
/// Placeholder event that counts nothing.
#[doc(alias = "PERF_COUNT_SW_DUMMY")]
Dummy = perf_sw_ids::PERF_COUNT_SW_DUMMY as u64,
/// Generates raw sample data from BPF.
#[doc(alias = "PERF_COUNT_SW_BPF_OUTPUT")]
BpfOutput = perf_sw_ids::PERF_COUNT_SW_BPF_OUTPUT as u64,
/// Number of context switches to a task when switching to a different cgroup.
#[doc(alias = "PERF_COUNT_SW_CGROUP_SWITCHES")]
CgroupSwitches = perf_sw_ids::PERF_COUNT_SW_CGROUP_SWITCHES as u64,
}

/// The hardware CPU cache events.
#[repr(u64)]
#[doc(alias = "perf_hw_cache_id")]
#[derive(Debug, Clone, Copy)]
pub enum HwCacheEvent {
/// Measures Level 1 data cache.
#[doc(alias = "PERF_COUNT_HW_CACHE_L1D")]
L1d = perf_hw_cache_id::PERF_COUNT_HW_CACHE_L1D as u64,
/// Measures Level 1 data cache.
#[doc(alias = "PERF_COUNT_HW_CACHE_L1I")]
L1i = perf_hw_cache_id::PERF_COUNT_HW_CACHE_L1I as u64,
/// Measures Last-level cache.
#[doc(alias = "PERF_COUNT_HW_CACHE_LL")]
Ll = perf_hw_cache_id::PERF_COUNT_HW_CACHE_LL as u64,
/// Measures Data TLB (Translation Lookaside Buffer).
#[doc(alias = "PERF_COUNT_HW_CACHE_DTLB")]
Dtlb = perf_hw_cache_id::PERF_COUNT_HW_CACHE_DTLB as u64,
/// Measures Instruction TLB (Translation Lookaside Buffer).
#[doc(alias = "PERF_COUNT_HW_CACHE_ITLB")]
Itlb = perf_hw_cache_id::PERF_COUNT_HW_CACHE_ITLB as u64,
/// Measures branch prediction.
#[doc(alias = "PERF_COUNT_HW_CACHE_BPU")]
Bpu = perf_hw_cache_id::PERF_COUNT_HW_CACHE_BPU as u64,
/// Measures local memory accesses.
#[doc(alias = "PERF_COUNT_HW_CACHE_NODE")]
Node = perf_hw_cache_id::PERF_COUNT_HW_CACHE_NODE as u64,
}

/// The hardware CPU cache operations.
#[repr(u64)]
#[doc(alias = "perf_hw_cache_op_id")]
#[derive(Debug, Clone, Copy)]
pub enum HwCacheOp {
/// Read access.
#[doc(alias = "PERF_COUNT_HW_CACHE_OP_READ")]
Read = perf_hw_cache_op_id::PERF_COUNT_HW_CACHE_OP_READ as u64,
/// Write access.
#[doc(alias = "PERF_COUNT_HW_CACHE_OP_WRITE")]
Write = perf_hw_cache_op_id::PERF_COUNT_HW_CACHE_OP_WRITE as u64,
/// Prefetch access.
#[doc(alias = "PERF_COUNT_HW_CACHE_OP_PREFETCH")]
Prefetch = perf_hw_cache_op_id::PERF_COUNT_HW_CACHE_OP_PREFETCH as u64,
}

/// The hardware CPU cache result.
#[repr(u64)]
#[doc(alias = "perf_hw_cache_op_result_id")]
#[derive(Debug, Clone, Copy)]
pub enum HwCacheResult {
/// Cache accesses.
#[doc(alias = "PERF_COUNT_HW_CACHE_RESULT_ACCESS")]
Access = perf_hw_cache_op_result_id::PERF_COUNT_HW_CACHE_RESULT_ACCESS as u64,
/// Cache missed accesses.
#[doc(alias = "PERF_COUNT_HW_CACHE_RESULT_MISS")]
Miss = perf_hw_cache_op_result_id::PERF_COUNT_HW_CACHE_RESULT_MISS as u64,
}

/// Sample Policy
Expand Down Expand Up @@ -100,19 +276,21 @@ pub enum PerfEventScope {
/// # #[error(transparent)]
/// # Ebpf(#[from] aya::EbpfError)
/// # }
/// # let mut bpf = aya::Ebpf::load(&[])?;
/// use aya::util::online_cpus;
/// use aya::programs::perf_event::{
/// perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK, PerfEvent, PerfEventScope, PerfTypeId, SamplePolicy,
/// use aya::{
/// util::online_cpus,
/// programs::perf_event::{
/// PerfEvent, PerfEventConfig, PerfEventScope, SamplePolicy, SoftwareEvent,
/// },
/// };
///
/// # let mut bpf = aya::Ebpf::load(&[])?;
/// let prog: &mut PerfEvent = bpf.program_mut("observe_cpu_clock").unwrap().try_into()?;
/// prog.load()?;
///
/// let perf_type = PerfEventConfig::Software(SoftwareEvent::CpuClock);
/// for cpu in online_cpus()? {
/// prog.attach(
/// PerfTypeId::Software,
/// PERF_COUNT_SW_CPU_CLOCK as u64,
/// perf_type.clone(),
/// PerfEventScope::AllProcessesOneCpu { cpu },
/// SamplePolicy::Period(1000000),
/// true,
Expand All @@ -134,25 +312,38 @@ impl PerfEvent {

/// Attaches to the given perf event.
///
/// The possible values and encoding of the `config` argument depends on the
/// `perf_type`. See `perf_sw_ids`, `perf_hw_id`, `perf_hw_cache_id`,
/// `perf_hw_cache_op_id` and `perf_hw_cache_op_result_id`.
///
/// The `scope` argument determines which processes are sampled. If `inherit`
/// is true, any new processes spawned by those processes will also
/// The [`perf_type`](PerfEventConfig) defines the event `type` and `config` of interest.
/// The [`scope`](PerfEventScope) argument determines which processes are sampled.
/// If `inherit` is `true`, any new processes spawned by those processes will also
/// automatically get sampled.
///
/// The returned value can be used to detach, see [PerfEvent::detach].
pub fn attach(
&mut self,
perf_type: PerfTypeId,
config: u64,
perf_type: PerfEventConfig,
scope: PerfEventScope,
sample_policy: SamplePolicy,
inherit: bool,
) -> Result<PerfEventLinkId, ProgramError> {
let prog_fd = self.fd()?;
let prog_fd = prog_fd.as_fd();

let (event_type, config) = match perf_type {
PerfEventConfig::Hardware(hw_event) => (PERF_TYPE_HARDWARE as u32, hw_event as u64),
PerfEventConfig::Software(sw_event) => (PERF_TYPE_SOFTWARE as u32, sw_event as u64),
PerfEventConfig::TracePoint { event_id } => (PERF_TYPE_TRACEPOINT as u32, event_id),
PerfEventConfig::HwCache {
event,
operation,
result,
} => (
PERF_TYPE_HW_CACHE as u32,
(event as u64) | ((operation as u64) << 8) | ((result as u64) << 16),
),
PerfEventConfig::Raw { event_id } => (PERF_TYPE_RAW as u32, event_id),
PerfEventConfig::Breakpoint => (PERF_TYPE_BREAKPOINT as u32, 0),
PerfEventConfig::Pmu { pmu_type, config } => (pmu_type, config),
};
let (sample_period, sample_frequency) = match sample_policy {
SamplePolicy::Period(period) => (period, None),
SamplePolicy::Frequency(frequency) => (0, Some(frequency)),
Expand All @@ -165,7 +356,7 @@ impl PerfEvent {
PerfEventScope::AllProcessesOneCpu { cpu } => (-1, cpu as i32),
};
let fd = perf_event_open(
perf_type as u32,
event_type,
config,
pid,
cpu,
Expand Down
Loading

0 comments on commit dec30be

Please sign in to comment.