From 4304f75d7218ec3537f636e3ff3ccf42f9f1113b Mon Sep 17 00:00:00 2001 From: bjorn3 Date: Sat, 18 Sep 2021 19:19:34 +0200 Subject: [PATCH] Use a sharded symbol interner This should reduce contention when accessing the symbol interner from multiple threads when using parallel rustc. --- Cargo.lock | 1 + compiler/rustc_macros/Cargo.toml | 1 + compiler/rustc_macros/src/symbols.rs | 73 ++++++++++++++++++------- compiler/rustc_span/src/lib.rs | 1 + compiler/rustc_span/src/symbol.rs | 60 +++++++++++++------- compiler/rustc_span/src/symbol/tests.rs | 2 +- 6 files changed, 97 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9ceb06fa7d7b..64252b50c2c92 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4036,6 +4036,7 @@ version = "0.1.0" dependencies = [ "proc-macro2", "quote", + "rustc-hash", "syn", "synstructure", ] diff --git a/compiler/rustc_macros/Cargo.toml b/compiler/rustc_macros/Cargo.toml index e4dddbab06755..5550be57cf5b2 100644 --- a/compiler/rustc_macros/Cargo.toml +++ b/compiler/rustc_macros/Cargo.toml @@ -10,4 +10,5 @@ proc-macro = true synstructure = "0.12.1" syn = { version = "1", features = ["full"] } proc-macro2 = "1" +rustc-hash = "1.1.0" quote = "1" diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 1b245f2a75060..450371394138e 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -23,8 +23,9 @@ //! ``` use proc_macro2::{Span, TokenStream}; -use quote::quote; +use quote::{quote, ToTokens}; use std::collections::HashMap; +use std::hash::{Hash, Hasher}; use syn::parse::{Parse, ParseStream, Result}; use syn::{braced, punctuated::Punctuated, Ident, LitStr, Token}; @@ -100,6 +101,46 @@ impl Errors { } } +struct PrefillStream { + prefill: [Vec; 256], +} + +impl PrefillStream { + fn new() -> Self { + const EMPTY_VEC: Vec = Vec::new(); + PrefillStream { prefill: [EMPTY_VEC; 256] } + } + + fn add_symbol(&mut self, symbol: String) -> u32 { + // Warning: hasher has to be kept in sync with rustc_span::symbols to ensure that all + // pre-filled symbols are assigned the correct shard. + let mut state = rustc_hash::FxHasher::default(); + symbol.hash(&mut state); + let hash = state.finish(); + + let shard = (hash & 0xff) as usize; + let index = self.prefill[shard].len(); + + self.prefill[shard].push(symbol); + + (index << 8) as u32 | shard as u32 + } +} + +impl ToTokens for PrefillStream { + fn to_tokens(&self, tokens: &mut TokenStream) { + for shard in &self.prefill { + let mut shard_stream = quote! {}; + for symbol in shard { + shard_stream.extend(quote! { #symbol, }); + } + tokens.extend(quote! { + &[#shard_stream], + }); + } + } +} + pub fn symbols(input: TokenStream) -> TokenStream { let (mut output, errors) = symbols_with_errors(input); @@ -126,8 +167,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let mut keyword_stream = quote! {}; let mut symbols_stream = quote! {}; - let mut prefill_stream = quote! {}; - let mut counter = 0u32; + let mut digit_stream = quote! {}; + let mut prefill_stream = PrefillStream::new(); let mut keys = HashMap::::with_capacity(input.keywords.len() + input.symbols.len() + 10); let mut prev_key: Option<(Span, String)> = None; @@ -157,13 +198,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { let value = &keyword.value; let value_string = value.value(); check_dup(keyword.name.span(), &value_string, &mut errors); - prefill_stream.extend(quote! { - #value, - }); + let sym = prefill_stream.add_symbol(value_string); keyword_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); + pub const #name: Symbol = Symbol::new(#sym); }); - counter += 1; } // Generate the listed symbols. @@ -176,30 +214,21 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { check_dup(symbol.name.span(), &value, &mut errors); check_order(symbol.name.span(), &name.to_string(), &mut errors); - prefill_stream.extend(quote! { - #value, - }); + let sym = prefill_stream.add_symbol(value); symbols_stream.extend(quote! { - pub const #name: Symbol = Symbol::new(#counter); + pub const #name: Symbol = Symbol::new(#sym); }); - counter += 1; } // Generate symbols for the strings "0", "1", ..., "9". - let digits_base = counter; - counter += 10; for n in 0..10 { let n = n.to_string(); check_dup(Span::call_site(), &n, &mut errors); - prefill_stream.extend(quote! { - #n, - }); + let sym = prefill_stream.add_symbol(n); + digit_stream.extend(quote! { Symbol::new(#sym), }); } - let _ = counter; // for future use let output = quote! { - const SYMBOL_DIGITS_BASE: u32 = #digits_base; - #[doc(hidden)] #[allow(non_upper_case_globals)] mod kw_generated { @@ -214,6 +243,8 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { #symbols_stream } + const SYMBOL_DIGITS: [Symbol; 10] = [#digit_stream]; + impl Interner { pub(crate) fn fresh() -> Self { Interner::prefill(&[ diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index 9c5469f635f71..9c77b53053669 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -16,6 +16,7 @@ #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] #![feature(array_windows)] #![feature(crate_visibility_modifier)] +#![feature(hash_raw_entry)] #![feature(if_let_guard)] #![feature(negative_impls)] #![feature(nll)] diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 78846d8ffb26b..e7d9947cd0de3 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -3,13 +3,14 @@ //! type, and vice versa. use rustc_arena::DroplessArena; -use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::fx::{FxHashMap, FxHasher}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey}; use rustc_data_structures::sync::Lock; use rustc_macros::HashStable_Generic; use rustc_serialize::{Decodable, Decoder, Encodable, Encoder}; use std::cmp::{Ord, PartialEq, PartialOrd}; +use std::collections::hash_map::RawEntryMut; use std::fmt; use std::hash::{Hash, Hasher}; use std::str; @@ -1698,8 +1699,7 @@ impl ToStableHashKey for Symbol { } } -#[derive(Default)] -pub(crate) struct Interner(Lock); +pub(crate) struct Interner([Lock; 256]); // The `&'static str`s in this type actually point into the arena. // @@ -1710,46 +1710,68 @@ pub(crate) struct Interner(Lock); // This type is private to prevent accidentally constructing more than one `Interner` on the same // thread, which makes it easy to mixup `Symbol`s between `Interner`s. #[derive(Default)] -struct InternerInner { +struct InternerShard { arena: DroplessArena, names: FxHashMap<&'static str, Symbol>, strings: Vec<&'static str>, } impl Interner { - fn prefill(init: &[&'static str]) -> Self { - Interner(Lock::new(InternerInner { - strings: init.into(), - names: init.iter().copied().zip((0..).map(Symbol::new)).collect(), - ..Default::default() + #[cfg(test)] + fn empty_for_test() -> Self { + Self([(); 256].map(|()| Lock::new(InternerShard::default()))) + } + + fn prefill(init: &[&[&'static str]; 256]) -> Self { + let mut i = 0; + Interner(init.map(|init| { + let shard = Lock::new(InternerShard { + strings: init.into(), + names: init + .iter() + .copied() + .zip((0..).map(|idx| Symbol::new(idx << 8 | i))) + .collect(), + ..Default::default() + }); + i += 1; + shard })) } #[inline] fn intern(&self, string: &str) -> Symbol { - let mut inner = self.0.lock(); - if let Some(&name) = inner.names.get(string) { - return name; - } + // Warning: hasher has to be kept in sync with rustc_macros::symbols to ensure that all + // pre-filled symbols are assigned the correct shard. + let mut state = FxHasher::default(); + string.hash(&mut state); + let hash = state.finish(); + + let mut shard = self.0[(hash & 0xff) as usize].lock(); + let shard = &mut *shard; + let vac = match shard.names.raw_entry_mut().from_key_hashed_nocheck(hash, string) { + RawEntryMut::Occupied(occ) => return *occ.get(), + RawEntryMut::Vacant(vac) => vac, + }; - let name = Symbol::new(inner.strings.len() as u32); + let name = Symbol::new(((shard.strings.len() as u32) << 8) | (hash as u32 & 0xff)); // `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be // UTF-8. let string: &str = - unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) }; + unsafe { str::from_utf8_unchecked(shard.arena.alloc_slice(string.as_bytes())) }; // It is safe to extend the arena allocation to `'static` because we only access // these while the arena is still alive. let string: &'static str = unsafe { &*(string as *const str) }; - inner.strings.push(string); - inner.names.insert(string, name); + shard.strings.push(string); + vac.insert(string, name); name } // Get the symbol as a string. `Symbol::as_str()` should be used in // preference to this function. fn get(&self, symbol: Symbol) -> &str { - self.0.lock().strings[symbol.0.as_usize()] + self.0[symbol.0.as_usize() & 0xff].lock().strings[symbol.0.as_usize() >> 8] } } @@ -1784,7 +1806,7 @@ pub mod sym { pub fn integer + Copy + ToString>(n: N) -> Symbol { if let Result::Ok(idx) = n.try_into() { if idx < 10 { - return Symbol::new(super::SYMBOL_DIGITS_BASE + idx as u32); + return super::SYMBOL_DIGITS[idx]; } } Symbol::intern(&n.to_string()) diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs index 0958fce5fee30..872680587e95e 100644 --- a/compiler/rustc_span/src/symbol/tests.rs +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -4,7 +4,7 @@ use crate::create_default_session_globals_then; #[test] fn interner_tests() { - let i = Interner::default(); + let i = Interner::empty_for_test(); // first one is zero: assert_eq!(i.intern("dog"), Symbol::new(0)); // re-use gets the same entry: