From 9117ea975834a86dadcb9ebbc40dd9a9fb0f78ae Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Thu, 27 Oct 2022 13:23:26 +0000 Subject: [PATCH] Introduce UnordMap, UnordSet, and UnordBag (see MCP 533) MCP 533: https://github.com/rust-lang/compiler-team/issues/533 Also, as an example, substitute UnordMap for FxHashMap in used_trait_imports query result. --- compiler/rustc_data_structures/src/lib.rs | 2 + compiler/rustc_data_structures/src/unord.rs | 382 ++++++++++++++++++ .../rustc_hir_analysis/src/check_unused.rs | 7 +- compiler/rustc_hir_typeck/src/lib.rs | 4 +- compiler/rustc_middle/src/arena.rs | 2 +- compiler/rustc_middle/src/query/mod.rs | 2 +- compiler/rustc_middle/src/ty/context.rs | 3 +- compiler/rustc_middle/src/ty/query.rs | 1 + .../rustc_query_impl/src/on_disk_cache.rs | 5 +- src/librustdoc/core.rs | 4 +- 10 files changed, 400 insertions(+), 12 deletions(-) create mode 100644 compiler/rustc_data_structures/src/unord.rs diff --git a/compiler/rustc_data_structures/src/lib.rs b/compiler/rustc_data_structures/src/lib.rs index 467ac401d086b..3a2000233c5d1 100644 --- a/compiler/rustc_data_structures/src/lib.rs +++ b/compiler/rustc_data_structures/src/lib.rs @@ -22,6 +22,7 @@ #![feature(new_uninit)] #![feature(once_cell)] #![feature(rustc_attrs)] +#![feature(negative_impls)] #![feature(test)] #![feature(thread_id_value)] #![feature(vec_into_raw_parts)] @@ -86,6 +87,7 @@ pub mod steal; pub mod tagged_ptr; pub mod temp_dir; pub mod unhash; +pub mod unord; pub use ena::undo_log; pub use ena::unify; diff --git a/compiler/rustc_data_structures/src/unord.rs b/compiler/rustc_data_structures/src/unord.rs new file mode 100644 index 0000000000000..c015f1232cd92 --- /dev/null +++ b/compiler/rustc_data_structures/src/unord.rs @@ -0,0 +1,382 @@ +//! This module contains collection types that don't expose their internal +//! ordering. This is a useful property for deterministic computations, such +//! as required by the query system. + +use rustc_hash::{FxHashMap, FxHashSet}; +use smallvec::SmallVec; +use std::{ + borrow::Borrow, + hash::Hash, + iter::{Product, Sum}, +}; + +use crate::{ + fingerprint::Fingerprint, + stable_hasher::{HashStable, StableHasher, ToStableHashKey}, +}; + +/// `UnordItems` is the order-less version of `Iterator`. It only contains methods +/// that don't (easily) expose an ordering of the underlying items. +/// +/// Most methods take an `Fn` where the `Iterator`-version takes an `FnMut`. This +/// is to reduce the risk of accidentally leaking the internal order via the closure +/// environment. Otherwise one could easily do something like +/// +/// ```rust,ignore (pseudo code) +/// let mut ordered = vec![]; +/// unordered_items.all(|x| ordered.push(x)); +/// ``` +/// +/// It's still possible to do the same thing with an `Fn` by using interior mutability, +/// but the chance of doing it accidentally is reduced. +pub struct UnordItems>(I); + +impl> UnordItems { + #[inline] + pub fn map U>(self, f: F) -> UnordItems> { + UnordItems(self.0.map(f)) + } + + #[inline] + pub fn all bool>(mut self, f: F) -> bool { + self.0.all(f) + } + + #[inline] + pub fn any bool>(mut self, f: F) -> bool { + self.0.any(f) + } + + #[inline] + pub fn filter bool>(self, f: F) -> UnordItems> { + UnordItems(self.0.filter(f)) + } + + #[inline] + pub fn filter_map Option>( + self, + f: F, + ) -> UnordItems> { + UnordItems(self.0.filter_map(f)) + } + + #[inline] + pub fn max(self) -> Option + where + T: Ord, + { + self.0.max() + } + + #[inline] + pub fn min(self) -> Option + where + T: Ord, + { + self.0.min() + } + + #[inline] + pub fn sum(self) -> S + where + S: Sum, + { + self.0.sum() + } + + #[inline] + pub fn product(self) -> S + where + S: Product, + { + self.0.product() + } + + #[inline] + pub fn count(self) -> usize { + self.0.count() + } +} + +impl<'a, T: Clone + 'a, I: Iterator> UnordItems<&'a T, I> { + #[inline] + pub fn cloned(self) -> UnordItems> { + UnordItems(self.0.cloned()) + } +} + +impl<'a, T: Copy + 'a, I: Iterator> UnordItems<&'a T, I> { + #[inline] + pub fn copied(self) -> UnordItems> { + UnordItems(self.0.copied()) + } +} + +impl> UnordItems { + pub fn into_sorted(self, hcx: &HCX) -> Vec + where + T: ToStableHashKey, + { + let mut items: Vec = self.0.collect(); + items.sort_by_cached_key(|x| x.to_stable_hash_key(hcx)); + items + } + + pub fn into_sorted_small_vec(self, hcx: &HCX) -> SmallVec<[T; LEN]> + where + T: ToStableHashKey, + { + let mut items: SmallVec<[T; LEN]> = self.0.collect(); + items.sort_by_cached_key(|x| x.to_stable_hash_key(hcx)); + items + } +} + +/// This is a set collection type that tries very hard to not expose +/// any internal iteration. This is a useful property when trying to +/// uphold the determinism invariants imposed by the query system. +/// +/// This collection type is a good choice for set-like collections the +/// keys of which don't have a semantic ordering. +/// +/// See [MCP 533](https://github.com/rust-lang/compiler-team/issues/533) +/// for more information. +#[derive(Debug, Eq, PartialEq, Clone, Encodable, Decodable)] +pub struct UnordSet { + inner: FxHashSet, +} + +impl Default for UnordSet { + fn default() -> Self { + Self { inner: FxHashSet::default() } + } +} + +impl UnordSet { + #[inline] + pub fn new() -> Self { + Self { inner: Default::default() } + } + + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + #[inline] + pub fn insert(&mut self, v: V) -> bool { + self.inner.insert(v) + } + + #[inline] + pub fn contains(&self, v: &Q) -> bool + where + V: Borrow, + Q: Hash + Eq, + { + self.inner.contains(v) + } + + #[inline] + pub fn items<'a>(&'a self) -> UnordItems<&'a V, impl Iterator> { + UnordItems(self.inner.iter()) + } + + #[inline] + pub fn into_items(self) -> UnordItems> { + UnordItems(self.inner.into_iter()) + } + + // We can safely extend this UnordSet from a set of unordered values because that + // won't expose the internal ordering anywhere. + #[inline] + pub fn extend>(&mut self, items: UnordItems) { + self.inner.extend(items.0) + } +} + +impl Extend for UnordSet { + fn extend>(&mut self, iter: T) { + self.inner.extend(iter) + } +} + +impl> HashStable for UnordSet { + #[inline] + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + hash_iter_order_independent(self.inner.iter(), hcx, hasher); + } +} + +/// This is a map collection type that tries very hard to not expose +/// any internal iteration. This is a useful property when trying to +/// uphold the determinism invariants imposed by the query system. +/// +/// This collection type is a good choice for map-like collections the +/// keys of which don't have a semantic ordering. +/// +/// See [MCP 533](https://github.com/rust-lang/compiler-team/issues/533) +/// for more information. +#[derive(Debug, Eq, PartialEq, Clone, Encodable, Decodable)] +pub struct UnordMap { + inner: FxHashMap, +} + +impl Default for UnordMap { + fn default() -> Self { + Self { inner: FxHashMap::default() } + } +} + +impl Extend<(K, V)> for UnordMap { + fn extend>(&mut self, iter: T) { + self.inner.extend(iter) + } +} + +impl UnordMap { + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + #[inline] + pub fn insert(&mut self, k: K, v: V) -> Option { + self.inner.insert(k, v) + } + + #[inline] + pub fn contains_key(&self, k: &Q) -> bool + where + K: Borrow, + Q: Hash + Eq, + { + self.inner.contains_key(k) + } + + #[inline] + pub fn items<'a>(&'a self) -> UnordItems<(&'a K, &'a V), impl Iterator> { + UnordItems(self.inner.iter()) + } + + #[inline] + pub fn into_items(self) -> UnordItems<(K, V), impl Iterator> { + UnordItems(self.inner.into_iter()) + } + + // We can safely extend this UnordMap from a set of unordered values because that + // won't expose the internal ordering anywhere. + #[inline] + pub fn extend>(&mut self, items: UnordItems<(K, V), I>) { + self.inner.extend(items.0) + } +} + +impl, V: HashStable> HashStable for UnordMap { + #[inline] + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + hash_iter_order_independent(self.inner.iter(), hcx, hasher); + } +} + +/// This is a collection type that tries very hard to not expose +/// any internal iteration. This is a useful property when trying to +/// uphold the determinism invariants imposed by the query system. +/// +/// This collection type is a good choice for collections the +/// keys of which don't have a semantic ordering and don't implement +/// `Hash` or `Eq`. +/// +/// See [MCP 533](https://github.com/rust-lang/compiler-team/issues/533) +/// for more information. +#[derive(Default, Debug, Eq, PartialEq, Clone, Encodable, Decodable)] +pub struct UnordBag { + inner: Vec, +} + +impl UnordBag { + #[inline] + pub fn new() -> Self { + Self { inner: Default::default() } + } + + #[inline] + pub fn len(&self) -> usize { + self.inner.len() + } + + #[inline] + pub fn push(&mut self, v: V) { + self.inner.push(v); + } + + #[inline] + pub fn items<'a>(&'a self) -> UnordItems<&'a V, impl Iterator> { + UnordItems(self.inner.iter()) + } + + #[inline] + pub fn into_items(self) -> UnordItems> { + UnordItems(self.inner.into_iter()) + } + + // We can safely extend this UnordSet from a set of unordered values because that + // won't expose the internal ordering anywhere. + #[inline] + pub fn extend>(&mut self, items: UnordItems) { + self.inner.extend(items.0) + } +} + +impl Extend for UnordBag { + fn extend>(&mut self, iter: I) { + self.inner.extend(iter) + } +} + +impl> HashStable for UnordBag { + #[inline] + fn hash_stable(&self, hcx: &mut HCX, hasher: &mut StableHasher) { + hash_iter_order_independent(self.inner.iter(), hcx, hasher); + } +} + +fn hash_iter_order_independent< + HCX, + T: HashStable, + I: Iterator + ExactSizeIterator, +>( + mut it: I, + hcx: &mut HCX, + hasher: &mut StableHasher, +) { + let len = it.len(); + len.hash_stable(hcx, hasher); + + match len { + 0 => { + // We're done + } + 1 => { + // No need to instantiate a hasher + it.next().unwrap().hash_stable(hcx, hasher); + } + _ => { + let mut accumulator = Fingerprint::ZERO; + for item in it { + let mut item_hasher = StableHasher::new(); + item.hash_stable(hcx, &mut item_hasher); + let item_fingerprint: Fingerprint = item_hasher.finish(); + accumulator = accumulator.combine_commutative(item_fingerprint); + } + accumulator.hash_stable(hcx, hasher); + } + } +} + +// Do not implement IntoIterator for the collections in this module. +// They only exist to hide iteration order in the first place. +impl !IntoIterator for UnordBag {} +impl !IntoIterator for UnordSet {} +impl !IntoIterator for UnordMap {} +impl !IntoIterator for UnordItems {} diff --git a/compiler/rustc_hir_analysis/src/check_unused.rs b/compiler/rustc_hir_analysis/src/check_unused.rs index 922833f85806c..d3df259075274 100644 --- a/compiler/rustc_hir_analysis/src/check_unused.rs +++ b/compiler/rustc_hir_analysis/src/check_unused.rs @@ -1,5 +1,6 @@ use crate::errors::{ExternCrateNotIdiomatic, UnusedExternCrate}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::unord::UnordSet; use rustc_hir as hir; use rustc_hir::def::DefKind; use rustc_hir::def_id::{DefId, LocalDefId}; @@ -8,12 +9,12 @@ use rustc_session::lint; use rustc_span::{Span, Symbol}; pub fn check_crate(tcx: TyCtxt<'_>) { - let mut used_trait_imports: FxHashSet = FxHashSet::default(); + let mut used_trait_imports: UnordSet = Default::default(); for item_def_id in tcx.hir().body_owners() { let imports = tcx.used_trait_imports(item_def_id); debug!("GatherVisitor: item_def_id={:?} with imports {:#?}", item_def_id, imports); - used_trait_imports.extend(imports.iter()); + used_trait_imports.extend(imports.items().copied()); } for &id in tcx.maybe_unused_trait_imports(()) { diff --git a/compiler/rustc_hir_typeck/src/lib.rs b/compiler/rustc_hir_typeck/src/lib.rs index e862d577573b4..959c54866453d 100644 --- a/compiler/rustc_hir_typeck/src/lib.rs +++ b/compiler/rustc_hir_typeck/src/lib.rs @@ -52,7 +52,7 @@ pub use inherited::{Inherited, InheritedBuilder}; use crate::check::check_fn; use crate::coercion::DynamicCoerceMany; use crate::gather_locals::GatherLocalsVisitor; -use rustc_data_structures::fx::FxHashSet; +use rustc_data_structures::unord::UnordSet; use rustc_errors::{struct_span_err, MultiSpan}; use rustc_hir as hir; use rustc_hir::def::Res; @@ -174,7 +174,7 @@ fn has_typeck_results(tcx: TyCtxt<'_>, def_id: DefId) -> bool { } } -fn used_trait_imports(tcx: TyCtxt<'_>, def_id: LocalDefId) -> &FxHashSet { +fn used_trait_imports(tcx: TyCtxt<'_>, def_id: LocalDefId) -> &UnordSet { &*tcx.typeck(def_id).used_trait_imports } diff --git a/compiler/rustc_middle/src/arena.rs b/compiler/rustc_middle/src/arena.rs index d2847e4bc12a7..c94879c9f2139 100644 --- a/compiler/rustc_middle/src/arena.rs +++ b/compiler/rustc_middle/src/arena.rs @@ -96,7 +96,7 @@ macro_rules! arena_types { // since we need to allocate this type on both the `rustc_hir` arena // (during lowering) and the `librustc_middle` arena (for decoding MIR) [decode] asm_template: rustc_ast::InlineAsmTemplatePiece, - [decode] used_trait_imports: rustc_data_structures::fx::FxHashSet, + [decode] used_trait_imports: rustc_data_structures::unord::UnordSet, [decode] is_late_bound_map: rustc_data_structures::fx::FxIndexSet, [decode] impl_source: rustc_middle::traits::ImplSource<'tcx, ()>, diff --git a/compiler/rustc_middle/src/query/mod.rs b/compiler/rustc_middle/src/query/mod.rs index 67c85ef0d3b50..5ee5adc371052 100644 --- a/compiler/rustc_middle/src/query/mod.rs +++ b/compiler/rustc_middle/src/query/mod.rs @@ -912,7 +912,7 @@ rustc_queries! { cache_on_disk_if { true } } - query used_trait_imports(key: LocalDefId) -> &'tcx FxHashSet { + query used_trait_imports(key: LocalDefId) -> &'tcx UnordSet { desc { |tcx| "finding used_trait_imports `{}`", tcx.def_path_str(key.to_def_id()) } cache_on_disk_if { true } } diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index 94e3f3b63c813..3d7e2a0839abc 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -34,6 +34,7 @@ use rustc_data_structures::sharded::{IntoPointer, ShardedHashMap}; use rustc_data_structures::stable_hasher::{HashStable, StableHasher}; use rustc_data_structures::steal::Steal; use rustc_data_structures::sync::{self, Lock, Lrc, ReadGuard, RwLock, WorkerLocal}; +use rustc_data_structures::unord::UnordSet; use rustc_data_structures::vec_map::VecMap; use rustc_errors::{ DecorateLint, DiagnosticBuilder, DiagnosticMessage, ErrorGuaranteed, MultiSpan, @@ -531,7 +532,7 @@ pub struct TypeckResults<'tcx> { /// This is used for warning unused imports. During type /// checking, this `Lrc` should not be cloned: it must have a ref-count /// of 1 so that we can insert things into the set mutably. - pub used_trait_imports: Lrc>, + pub used_trait_imports: Lrc>, /// If any errors occurred while type-checking this body, /// this field will be set to `Some(ErrorGuaranteed)`. diff --git a/compiler/rustc_middle/src/ty/query.rs b/compiler/rustc_middle/src/ty/query.rs index 9c97ce34f29e4..1715837203c23 100644 --- a/compiler/rustc_middle/src/ty/query.rs +++ b/compiler/rustc_middle/src/ty/query.rs @@ -40,6 +40,7 @@ use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexMap, FxIndexSet}; use rustc_data_structures::steal::Steal; use rustc_data_structures::svh::Svh; use rustc_data_structures::sync::Lrc; +use rustc_data_structures::unord::UnordSet; use rustc_errors::ErrorGuaranteed; use rustc_hir as hir; use rustc_hir::def::DefKind; diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index a592165011273..8b14ce210a205 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -1,8 +1,9 @@ use crate::QueryCtxt; -use rustc_data_structures::fx::{FxHashMap, FxHashSet, FxIndexSet}; +use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; use rustc_data_structures::memmap::Mmap; use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, RwLock}; use rustc_data_structures::unhash::UnhashMap; +use rustc_data_structures::unord::UnordSet; use rustc_hir::def_id::{CrateNum, DefId, DefIndex, LocalDefId, StableCrateId, LOCAL_CRATE}; use rustc_hir::definitions::DefPathHash; use rustc_index::vec::{Idx, IndexVec}; @@ -792,7 +793,7 @@ impl<'a, 'tcx> Decodable> for DefId { } } -impl<'a, 'tcx> Decodable> for &'tcx FxHashSet { +impl<'a, 'tcx> Decodable> for &'tcx UnordSet { fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Self { RefDecodable::decode(d) } diff --git a/src/librustdoc/core.rs b/src/librustdoc/core.rs index 8232353f915b9..642acb1fea9c8 100644 --- a/src/librustdoc/core.rs +++ b/src/librustdoc/core.rs @@ -1,6 +1,7 @@ use rustc_ast::NodeId; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::sync::{self, Lrc}; +use rustc_data_structures::unord::UnordSet; use rustc_errors::emitter::{Emitter, EmitterWriter}; use rustc_errors::json::JsonEmitter; use rustc_feature::UnstableFeatures; @@ -288,8 +289,7 @@ pub(crate) fn create_config( providers.typeck_item_bodies = |_, _| {}; // hack so that `used_trait_imports` won't try to call typeck providers.used_trait_imports = |_, _| { - static EMPTY_SET: LazyLock> = - LazyLock::new(FxHashSet::default); + static EMPTY_SET: LazyLock> = LazyLock::new(UnordSet::default); &EMPTY_SET }; // In case typeck does end up being called, don't ICE in case there were name resolution errors