Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use a sharded symbol interner #89073

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4036,6 +4036,7 @@ version = "0.1.0"
dependencies = [
"proc-macro2",
"quote",
"rustc-hash",
"syn",
"synstructure",
]
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ proc-macro = true
synstructure = "0.12.1"
syn = { version = "1", features = ["full"] }
proc-macro2 = "1"
rustc-hash = "1.1.0"
quote = "1"
73 changes: 52 additions & 21 deletions compiler/rustc_macros/src/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
//! ```

use proc_macro2::{Span, TokenStream};
use quote::quote;
use quote::{quote, ToTokens};
use std::collections::HashMap;
use std::hash::{Hash, Hasher};
use syn::parse::{Parse, ParseStream, Result};
use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token};

Expand Down Expand Up @@ -100,6 +101,46 @@ impl Errors {
}
}

struct PrefillStream {
prefill: [Vec<String>; 256],
}

impl PrefillStream {
fn new() -> Self {
const EMPTY_VEC: Vec<String> = Vec::new();
PrefillStream { prefill: [EMPTY_VEC; 256] }
}

fn add_symbol(&mut self, symbol: String) -> u32 {
// Warning: hasher has to be kept in sync with rustc_span::symbols to ensure that all
// pre-filled symbols are assigned the correct shard.
let mut state = rustc_hash::FxHasher::default();
symbol.hash(&mut state);
let hash = state.finish();

let shard = (hash & 0xff) as usize;
let index = self.prefill[shard].len();

self.prefill[shard].push(symbol);

(index << 8) as u32 | shard as u32
}
}

impl ToTokens for PrefillStream {
fn to_tokens(&self, tokens: &mut TokenStream) {
for shard in &self.prefill {
let mut shard_stream = quote! {};
for symbol in shard {
shard_stream.extend(quote! { #symbol, });
}
tokens.extend(quote! {
&[#shard_stream],
});
}
}
}

pub fn symbols(input: TokenStream) -> TokenStream {
let (mut output, errors) = symbols_with_errors(input);

Expand All @@ -126,8 +167,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {

let mut keyword_stream = quote! {};
let mut symbols_stream = quote! {};
let mut prefill_stream = quote! {};
let mut counter = 0u32;
let mut digit_stream = quote! {};
let mut prefill_stream = PrefillStream::new();
let mut keys =
HashMap::<String, Span>::with_capacity(input.keywords.len() + input.symbols.len() + 10);
let mut prev_key: Option<(Span, String)> = None;
Expand Down Expand Up @@ -157,13 +198,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
let value = &keyword.value;
let value_string = value.value();
check_dup(keyword.name.span(), &value_string, &mut errors);
prefill_stream.extend(quote! {
#value,
});
let sym = prefill_stream.add_symbol(value_string);
keyword_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#sym);
});
counter += 1;
}

// Generate the listed symbols.
Expand All @@ -176,30 +214,21 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
check_dup(symbol.name.span(), &value, &mut errors);
check_order(symbol.name.span(), &name.to_string(), &mut errors);

prefill_stream.extend(quote! {
#value,
});
let sym = prefill_stream.add_symbol(value);
symbols_stream.extend(quote! {
pub const #name: Symbol = Symbol::new(#counter);
pub const #name: Symbol = Symbol::new(#sym);
});
counter += 1;
}

// Generate symbols for the strings "0", "1", ..., "9".
let digits_base = counter;
counter += 10;
for n in 0..10 {
let n = n.to_string();
check_dup(Span::call_site(), &n, &mut errors);
prefill_stream.extend(quote! {
#n,
});
let sym = prefill_stream.add_symbol(n);
digit_stream.extend(quote! { Symbol::new(#sym), });
}
let _ = counter; // for future use

let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #digits_base;

#[doc(hidden)]
#[allow(non_upper_case_globals)]
mod kw_generated {
Expand All @@ -214,6 +243,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
#symbols_stream
}

const SYMBOL_DIGITS: [Symbol; 10] = [#digit_stream];

impl Interner {
pub(crate) fn fresh() -> Self {
Interner::prefill(&[
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
#![feature(array_windows)]
#![feature(crate_visibility_modifier)]
#![feature(hash_raw_entry)]
#![feature(if_let_guard)]
#![feature(negative_impls)]
#![feature(nll)]
Expand Down
60 changes: 41 additions & 19 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
//! type, and vice versa.

use rustc_arena::DroplessArena;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::{FxHashMap, FxHasher};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::sync::Lock;
use rustc_macros::HashStable_Generic;
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};

use std::cmp::{Ord, PartialEq, PartialOrd};
use std::collections::hash_map::RawEntryMut;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::str;
Expand Down Expand Up @@ -1698,8 +1699,7 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
}
}

#[derive(Default)]
pub(crate) struct Interner(Lock<InternerInner>);
pub(crate) struct Interner([Lock<InternerShard>; 256]);

// The `&'static str`s in this type actually point into the arena.
//
Expand All @@ -1710,46 +1710,68 @@ pub(crate) struct Interner(Lock<InternerInner>);
// This type is private to prevent accidentally constructing more than one `Interner` on the same
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
#[derive(Default)]
struct InternerInner {
struct InternerShard {
arena: DroplessArena,
names: FxHashMap<&'static str, Symbol>,
strings: Vec<&'static str>,
}

impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner {
strings: init.into(),
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
..Default::default()
#[cfg(test)]
fn empty_for_test() -> Self {
Self([(); 256].map(|()| Lock::new(InternerShard::default())))
}

fn prefill(init: &[&[&'static str]; 256]) -> Self {
let mut i = 0;
Interner(init.map(|init| {
let shard = Lock::new(InternerShard {
strings: init.into(),
names: init
.iter()
.copied()
.zip((0..).map(|idx| Symbol::new(idx << 8 | i)))
.collect(),
..Default::default()
});
i += 1;
shard
}))
}

#[inline]
fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock();
if let Some(&name) = inner.names.get(string) {
return name;
}
// Warning: hasher has to be kept in sync with rustc_macros::symbols to ensure that all
// pre-filled symbols are assigned the correct shard.
let mut state = FxHasher::default();
string.hash(&mut state);
let hash = state.finish();

let mut shard = self.0[(hash & 0xff) as usize].lock();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't you hash >> 8 or similar here? Otherwise the hashmap is going to be colliding constantly since the low bits are used to index into the backing storage, IIRC.

Copy link
Member Author

@bjorn3 bjorn3 Sep 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The symbol integer is created as index_within_shard << 8 | shard_num. The shard num is determined by hash & 0xff.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm missing something, but shard.names.raw_entry_mut().from_key_hashed_nocheck(hash, string) looks like it's using just the plain hash, not shifting based on the sharding. I think the low bits there will always be the same, which seems like a problem.

let shard = &mut *shard;
let vac = match shard.names.raw_entry_mut().from_key_hashed_nocheck(hash, string) {
RawEntryMut::Occupied(occ) => return *occ.get(),
RawEntryMut::Vacant(vac) => vac,
};

let name = Symbol::new(inner.strings.len() as u32);
let name = Symbol::new(((shard.strings.len() as u32) << 8) | (hash as u32 & 0xff));

// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
// UTF-8.
let string: &str =
unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) };
unsafe { str::from_utf8_unchecked(shard.arena.alloc_slice(string.as_bytes())) };
// It is safe to extend the arena allocation to `'static` because we only access
// these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
inner.strings.push(string);
inner.names.insert(string, name);
shard.strings.push(string);
vac.insert(string, name);
name
}

// Get the symbol as a string. `Symbol::as_str()` should be used in
// preference to this function.
fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings[symbol.0.as_usize()]
self.0[symbol.0.as_usize() & 0xff].lock().strings[symbol.0.as_usize() >> 8]
}
}

Expand Down Expand Up @@ -1784,7 +1806,7 @@ pub mod sym {
pub fn integer<N: TryInto<usize> + Copy + ToString>(n: N) -> Symbol {
if let Result::Ok(idx) = n.try_into() {
if idx < 10 {
return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32);
return super::SYMBOL_DIGITS[idx];
}
}
Symbol::intern(&n.to_string())
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_span/src/symbol/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;

#[test]
fn interner_tests() {
let i = Interner::default();
let i = Interner::empty_for_test();
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:
Expand Down