Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a dedicated length-prefixing method to Hasher #94598

Merged
merged 2 commits into from
May 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compiler/rustc_data_structures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#![feature(generators)]
#![feature(let_else)]
#![feature(hash_raw_entry)]
#![feature(hasher_prefixfree_extras)]
#![feature(maybe_uninit_uninit_array)]
#![feature(min_specialization)]
#![feature(never_type)]
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_data_structures/src/sip128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,14 @@ impl Hasher for SipHasher128 {
self.slice_write(msg);
}

#[inline]
fn write_str(&mut self, s: &str) {
// This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
// so just hashing the one extra byte is enough to be prefix-free.
self.write(s.as_bytes());
self.write_u8(0xFF);
}

fn finish(&self) -> u64 {
panic!("SipHasher128 cannot provide valid 64 bit hashes")
}
Expand Down
11 changes: 11 additions & 0 deletions compiler/rustc_data_structures/src/stable_hasher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,17 @@ impl Hasher for StableHasher {
self.state.write(bytes);
}

#[inline]
fn write_str(&mut self, s: &str) {
self.state.write_str(s);
}

#[inline]
fn write_length_prefix(&mut self, len: usize) {
// Our impl for `usize` will extend it if needed.
self.write_usize(len);
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.state.write_u8(i);
Expand Down
6 changes: 6 additions & 0 deletions library/alloc/src/boxed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,12 @@ impl<T: ?Sized + Hasher, A: Allocator> Hasher for Box<T, A> {
fn write_isize(&mut self, i: isize) {
(**self).write_isize(i)
}
fn write_length_prefix(&mut self, len: usize) {
(**self).write_length_prefix(len)
}
fn write_str(&mut self, s: &str) {
(**self).write_str(s)
}
}

#[cfg(not(no_global_oom_handling))]
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/collections/btree/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1990,7 +1990,7 @@ impl<'a, K: Ord + Copy, V: Copy> Extend<(&'a K, &'a V)> for BTreeMap<K, V> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<K: Hash, V: Hash> Hash for BTreeMap<K, V> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.len().hash(state);
state.write_length_prefix(self.len());
for elt in self {
elt.hash(state);
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/collections/linked_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1944,7 +1944,7 @@ impl<T: fmt::Debug> fmt::Debug for LinkedList<T> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: Hash> Hash for LinkedList<T> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.len().hash(state);
state.write_length_prefix(self.len());
for elt in self {
elt.hash(state);
}
Expand Down
2 changes: 1 addition & 1 deletion library/alloc/src/collections/vec_deque/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2899,7 +2899,7 @@ impl<T: Ord, A: Allocator> Ord for VecDeque<T, A> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T: Hash, A: Allocator> Hash for VecDeque<T, A> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.len().hash(state);
state.write_length_prefix(self.len());
// It's not possible to use Hash::hash_slice on slices
// returned by as_slices method as their length can vary
// in otherwise identical deques.
Expand Down
1 change: 1 addition & 0 deletions library/alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@
#![feature(extend_one)]
#![feature(fmt_internals)]
#![feature(fn_traits)]
#![feature(hasher_prefixfree_extras)]
#![feature(inplace_iteration)]
#![feature(iter_advance_by)]
#![feature(layout_for_ptr)]
Expand Down
138 changes: 135 additions & 3 deletions library/core/src/hash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,12 @@ pub trait Hasher {
///
/// println!("Hash is {:x}!", hasher.finish());
/// ```
///
/// # Note to Implementers
///
/// You generally should not do length-prefixing as part of implementing
/// this method. It's up to the [`Hash`] implementation to call
/// [`Hasher::write_length_prefix`] before sequences that need it.
#[stable(feature = "rust1", since = "1.0.0")]
fn write(&mut self, bytes: &[u8]);

Expand Down Expand Up @@ -409,6 +415,127 @@ pub trait Hasher {
fn write_isize(&mut self, i: isize) {
self.write_usize(i as usize)
}

/// Writes a length prefix into this hasher, as part of being prefix-free.
///
/// If you're implementing [`Hash`] for a custom collection, call this before
/// writing its contents to this `Hasher`. That way
/// `(collection![1, 2, 3], collection![4, 5])` and
/// `(collection![1, 2], collection![3, 4, 5])` will provide different
/// sequences of values to the `Hasher`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lost a full-stop .

///
/// The `impl<T> Hash for [T]` includes a call to this method, so if you're
/// hashing a slice (or array or vector) via its `Hash::hash` method,
/// you should **not** call this yourself.
///
/// This method is only for providing domain separation. If you want to
/// hash a `usize` that represents part of the *data*, then it's important
/// that you pass it to [`Hasher::write_usize`] instead of to this method.
///
/// # Examples
///
/// ```
/// #![feature(hasher_prefixfree_extras)]
/// # // Stubs to make the `impl` below pass the compiler
/// # struct MyCollection<T>(Option<T>);
/// # impl<T> MyCollection<T> {
/// # fn len(&self) -> usize { todo!() }
/// # }
/// # impl<'a, T> IntoIterator for &'a MyCollection<T> {
/// # type Item = T;
/// # type IntoIter = std::iter::Empty<T>;
/// # fn into_iter(self) -> Self::IntoIter { todo!() }
/// # }
///
/// use std::hash::{Hash, Hasher};
/// impl<T: Hash> Hash for MyCollection<T> {
/// fn hash<H: Hasher>(&self, state: &mut H) {
/// state.write_length_prefix(self.len());
/// for elt in self {
/// elt.hash(state);
/// }
/// }
/// }
/// ```
///
/// # Note to Implementers
///
/// If you've decided that your `Hasher` is willing to be susceptible to
/// Hash-DoS attacks, then you might consider skipping hashing some or all
/// of the `len` provided in the name of increased performance.
#[inline]
#[unstable(feature = "hasher_prefixfree_extras", issue = "96762")]
fn write_length_prefix(&mut self, len: usize) {
self.write_usize(len);
}

/// Writes a single `str` into this hasher.
///
/// If you're implementing [`Hash`], you generally do not need to call this,
/// as the `impl Hash for str` does, so you should prefer that instead.
///
/// This includes the domain separator for prefix-freedom, so you should
/// **not** call `Self::write_length_prefix` before calling this.
///
/// # Note to Implementers
///
/// There are at least two reasonable default ways to implement this.
/// Which one will be the default is not yet decided, so for now
/// you probably want to override it specifically.
///
/// ## The general answer
///
/// It's always correct to implement this with a length prefix:
///
/// ```
/// # #![feature(hasher_prefixfree_extras)]
/// # struct Foo;
/// # impl std::hash::Hasher for Foo {
/// # fn finish(&self) -> u64 { unimplemented!() }
/// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
/// fn write_str(&mut self, s: &str) {
/// self.write_length_prefix(s.len());
/// self.write(s.as_bytes());
/// }
/// # }
/// ```
///
/// And, if your `Hasher` works in `usize` chunks, this is likely a very
/// efficient way to do it, as anything more complicated may well end up
/// slower than just running the round with the length.
///
/// ## If your `Hasher` works byte-wise
///
/// One nice thing about `str` being UTF-8 is that the `b'\xFF'` byte
/// never happens. That means that you can append that to the byte stream
/// being hashed and maintain prefix-freedom:
///
/// ```
/// # #![feature(hasher_prefixfree_extras)]
/// # struct Foo;
/// # impl std::hash::Hasher for Foo {
/// # fn finish(&self) -> u64 { unimplemented!() }
/// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
/// fn write_str(&mut self, s: &str) {
/// self.write(s.as_bytes());
/// self.write_u8(0xff);
/// }
/// # }
/// ```
///
/// This does require that your implementation not add extra padding, and
/// thus generally requires that you maintain a buffer, running a round
/// only once that buffer is full (or `finish` is called).
///
/// That's because if `write` pads data out to a fixed chunk size, it's
/// likely that it does it in such a way that `"a"` and `"a\x00"` would
/// end up hashing the same sequence of things, introducing conflicts.
#[inline]
#[unstable(feature = "hasher_prefixfree_extras", issue = "96762")]
fn write_str(&mut self, s: &str) {
self.write(s.as_bytes());
self.write_u8(0xff);
}
}

#[stable(feature = "indirect_hasher_impl", since = "1.22.0")]
Expand Down Expand Up @@ -455,6 +582,12 @@ impl<H: Hasher + ?Sized> Hasher for &mut H {
fn write_isize(&mut self, i: isize) {
(**self).write_isize(i)
}
fn write_length_prefix(&mut self, len: usize) {
(**self).write_length_prefix(len)
}
fn write_str(&mut self, s: &str) {
(**self).write_str(s)
}
}

/// A trait for creating instances of [`Hasher`].
Expand Down Expand Up @@ -709,8 +842,7 @@ mod impls {
impl Hash for str {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
state.write(self.as_bytes());
state.write_u8(0xff)
state.write_str(self);
}
}

Expand Down Expand Up @@ -767,7 +899,7 @@ mod impls {
impl<T: Hash> Hash for [T] {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.len().hash(state);
state.write_length_prefix(self.len());
Hash::hash_slice(self, state)
}
}
Expand Down
18 changes: 18 additions & 0 deletions library/core/src/hash/sip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ impl super::Hasher for SipHasher {
self.0.hasher.write(msg)
}

#[inline]
fn write_str(&mut self, s: &str) {
self.0.hasher.write_str(s);
}

#[inline]
fn finish(&self) -> u64 {
self.0.hasher.finish()
Expand All @@ -246,6 +251,11 @@ impl super::Hasher for SipHasher13 {
self.hasher.write(msg)
}

#[inline]
fn write_str(&mut self, s: &str) {
self.hasher.write_str(s);
}

#[inline]
fn finish(&self) -> u64 {
self.hasher.finish()
Expand Down Expand Up @@ -307,6 +317,14 @@ impl<S: Sip> super::Hasher for Hasher<S> {
self.ntail = left;
}

#[inline]
fn write_str(&mut self, s: &str) {
// This hasher works byte-wise, and `0xFF` cannot show up in a `str`,
// so just hashing the one extra byte is enough to be prefix-free.
self.write(s.as_bytes());
self.write_u8(0xFF);
}

#[inline]
fn finish(&self) -> u64 {
let mut state = self.state;
Expand Down
4 changes: 4 additions & 0 deletions library/core/tests/hash/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ impl Hasher for MyHasher {
self.hash += *byte as u64;
}
}
fn write_str(&mut self, s: &str) {
self.write(s.as_bytes());
self.write_u8(0xFF);
}
fn finish(&self) -> u64 {
self.hash
}
Expand Down
1 change: 1 addition & 0 deletions library/core/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#![feature(future_join)]
#![feature(future_poll_fn)]
#![feature(array_from_fn)]
#![feature(hasher_prefixfree_extras)]
#![feature(hashmap_internals)]
#![feature(try_find)]
#![feature(inline_const)]
Expand Down
8 changes: 8 additions & 0 deletions library/std/src/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3006,11 +3006,19 @@ impl Default for DefaultHasher {

#[stable(feature = "hashmap_default_hasher", since = "1.13.0")]
impl Hasher for DefaultHasher {
// The underlying `SipHasher13` doesn't override the other
// `write_*` methods, so it's ok not to forward them here.

#[inline]
fn write(&mut self, msg: &[u8]) {
self.0.write(msg)
}

#[inline]
fn write_str(&mut self, s: &str) {
self.0.write_str(s);
}

#[inline]
fn finish(&self) -> u64 {
self.0.finish()
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@
#![feature(exact_size_is_empty)]
#![feature(extend_one)]
#![feature(float_minimum_maximum)]
#![feature(hasher_prefixfree_extras)]
#![feature(hashmap_internals)]
#![feature(int_error_internals)]
#![feature(maybe_uninit_slice)]
Expand Down