Skip to content

Commit

Permalink
perf(s2n-quic-dc): Optimize path secret Entry size (#2329)
Browse files Browse the repository at this point in the history
* Start tracking size of Entry

* Add init bench for hmac keys

* Remove caching of hmac control key

This shrinks the Entry state by ~700 bytes.
  • Loading branch information
Mark-Simulacrum authored Sep 27, 2024
1 parent c561869 commit 91f1606
Show file tree
Hide file tree
Showing 8 changed files with 170 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ jobs:
- rust: stable
os: ubuntu-latest
target: native
env: S2N_QUIC_PLATFORM_FEATURES_OVERRIDE="mtu_disc,pktinfo,tos,socket_msg"
env: S2N_QUIC_PLATFORM_FEATURES_OVERRIDE="mtu_disc,pktinfo,tos,socket_msg" >> $GITHUB_ENV; echo S2N_QUIC_RUN_VERSION_SPECIFIC_TESTS=1
steps:
- uses: ilammy/setup-nasm@v1
- uses: actions/checkout@v4
Expand Down
2 changes: 2 additions & 0 deletions dc/s2n-quic-dc/benches/src/crypto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ use criterion::Criterion;

pub mod encrypt;
pub mod hkdf;
pub mod hmac;

pub fn benchmarks(c: &mut Criterion) {
encrypt::benchmarks(c);
hkdf::benchmarks(c);
hmac::benchmarks(c);
}
32 changes: 32 additions & 0 deletions dc/s2n-quic-dc/benches/src/crypto/hmac.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0

use criterion::{black_box, Criterion, Throughput};

pub fn benchmarks(c: &mut Criterion) {
init(c);
}

fn init(c: &mut Criterion) {
let mut group = c.benchmark_group("crypto/hmac/init");

group.throughput(Throughput::Elements(1));

let algs = [
("sha256", aws_lc_rs::hmac::HMAC_SHA256),
("sha384", aws_lc_rs::hmac::HMAC_SHA384),
("sha512", aws_lc_rs::hmac::HMAC_SHA512),
];

for (alg_name, alg) in algs {
group.bench_function(format!("{alg_name}_init"), |b| {
let key_len = aws_lc_rs::hkdf::KeyType::len(&alg);
let mut key = vec![0u8; key_len];
aws_lc_rs::rand::fill(&mut key).unwrap();
let key = black_box(&key);
b.iter(move || {
let _ = black_box(aws_lc_rs::hmac::Key::new(alg, &key));
});
});
}
}
56 changes: 54 additions & 2 deletions dc/s2n-quic-dc/src/path/secret/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ impl Map {

match packet {
control::Packet::StaleKey(packet) => {
let Some(packet) = packet.authenticate(key) else {
let Some(packet) = packet.authenticate(&key) else {
return;
};
state.mark_live(self.state.cleaner.epoch());
Expand All @@ -447,7 +447,7 @@ impl Map {
.fetch_add(1, Ordering::Relaxed);
}
control::Packet::ReplayDetected(packet) => {
let Some(_packet) = packet.authenticate(key) else {
let Some(_packet) = packet.authenticate(&key) else {
return;
};
self.state
Expand Down Expand Up @@ -673,6 +673,58 @@ pub(super) struct Entry {
parameters: ApplicationParams,
}

impl SizeOf for Instant {}
impl SizeOf for u32 {}
impl SizeOf for SocketAddr {}
impl SizeOf for AtomicU64 {}

impl SizeOf for IsRetired {}
impl SizeOf for ApplicationParams {}

impl SizeOf for Entry {
fn size(&self) -> usize {
let Entry {
creation_time,
rehandshake_delta_secs,
peer,
secret,
retired,
used_at,
sender,
receiver,
parameters,
} = self;
creation_time.size()
+ rehandshake_delta_secs.size()
+ peer.size()
+ secret.size()
+ retired.size()
+ used_at.size()
+ sender.size()
+ receiver.size()
+ parameters.size()
}
}

/// Provide an approximation of the size of Self, including any heap indirection (e.g., a vec
/// backed by a megabyte is a megabyte in `size`, not 24 bytes).
///
/// Approximation because we don't currently attempt to account for (as an example) padding. It's
/// too annoying to do that.
#[cfg_attr(not(test), allow(unused))]
pub(crate) trait SizeOf: Sized {
fn size(&self) -> usize {
// If we don't need drop, it's very likely that this type is fully contained in size_of
// Self. This simplifies implementing this trait for e.g. std types.
assert!(
!std::mem::needs_drop::<Self>(),
"{:?} requires custom SizeOf impl",
std::any::type_name::<Self>()
);
std::mem::size_of::<Self>()
}
}

// Retired is 0 if not yet retired. Otherwise it stores the background cleaner epoch at which it
// retired; that epoch increments roughly once per minute.
#[derive(Default)]
Expand Down
9 changes: 9 additions & 0 deletions dc/s2n-quic-dc/src/path/secret/map/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,12 @@ fn check_invariants_no_overflow() {
}
})
}

#[test]
#[cfg(all(target_pointer_width = "64", target_os = "linux"))]
fn entry_size() {
// This gates to running only on specific GHA to reduce false positives.
if std::env::var("S2N_QUIC_RUN_VERSION_SPECIFIC_TESTS").is_ok() {
assert_eq!(fake_entry(0).size(), 350);
}
}
32 changes: 32 additions & 0 deletions dc/s2n-quic-dc/src/path/secret/receiver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,38 @@ pub struct State {
shared: Option<Arc<Shared>>,
}

impl super::map::SizeOf for Mutex<SlidingWindow> {
fn size(&self) -> usize {
// If we don't need drop, it's very likely that this type is fully contained in size_of
// Self. This simplifies implementing this trait for e.g. std types.
//
// Mutex on macOS (at least) has a more expensive, pthread-based impl that allocates. But
// on Linux there's no extra allocation.
if cfg!(target_os = "linux") {
assert!(
!std::mem::needs_drop::<Self>(),
"{:?} requires custom SizeOf impl",
std::any::type_name::<Self>()
);
}
std::mem::size_of::<Self>()
}
}

impl super::map::SizeOf for State {
fn size(&self) -> usize {
let State {
min_key_id,
max_seen_key_id,
seen,
shared,
} = self;
// shared is shared across all State's (effectively) so we don't currently account for that
// allocation.
min_key_id.size() + max_seen_key_id.size() + seen.size() + std::mem::size_of_val(shared)
}
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, thiserror::Error)]
pub enum Error {
/// This indicates that we know about this element and it *definitely* already exists.
Expand Down
24 changes: 24 additions & 0 deletions dc/s2n-quic-dc/src/path/secret/schedule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,30 @@ pub struct Secret {
ciphersuite: Ciphersuite,
}

impl super::map::SizeOf for Id {}
impl super::map::SizeOf for Prk {
fn size(&self) -> usize {
// FIXME: maybe don't use this type since it has overhead and needs this weird assert to
// check the mode?
assert!(format!("{:?}", self).contains("mode: Expand"));
std::mem::size_of::<Self>()
}
}
impl super::map::SizeOf for endpoint::Type {}
impl super::map::SizeOf for Ciphersuite {}

impl super::map::SizeOf for Secret {
fn size(&self) -> usize {
let Secret {
id,
prk,
endpoint,
ciphersuite,
} = self;
id.size() + prk.size() + endpoint.size() + ciphersuite.size()
}
}

impl Secret {
#[inline]
pub fn new(
Expand Down
21 changes: 16 additions & 5 deletions dc/s2n-quic-dc/src/path/secret/sender.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

use super::schedule;
use crate::{crypto::awslc::open, packet::secret_control};
use once_cell::sync::OnceCell;
use s2n_quic_core::varint::VarInt;
use std::sync::atomic::{AtomicU64, Ordering};

Expand All @@ -13,15 +12,25 @@ type StatelessReset = [u8; secret_control::TAG_LEN];
pub struct State {
current_id: AtomicU64,
pub(super) stateless_reset: StatelessReset,
control_secret: OnceCell<open::control::Secret>,
}

impl super::map::SizeOf for StatelessReset {}

impl super::map::SizeOf for State {
fn size(&self) -> usize {
let State {
current_id,
stateless_reset,
} = self;
current_id.size() + stateless_reset.size()
}
}

impl State {
pub fn new(stateless_reset: StatelessReset) -> Self {
Self {
current_id: AtomicU64::new(0),
stateless_reset,
control_secret: Default::default(),
}
}

Expand All @@ -47,8 +56,10 @@ impl State {
}

#[inline]
pub fn control_secret(&self, secret: &schedule::Secret) -> &open::control::Secret {
self.control_secret.get_or_init(|| secret.control_opener())
pub fn control_secret(&self, secret: &schedule::Secret) -> open::control::Secret {
// We don't try to cache this, hmac init is cheap (~200-600ns depending on algorithm) and
// the space requirement is huge (700+ bytes)
secret.control_opener()
}

/// Update the sender for a received stale key packet.
Expand Down

0 comments on commit 91f1606

Please sign in to comment.