From 3273003912272b3e9e44c535ba785bae980348bb Mon Sep 17 00:00:00 2001 From: arthurprs Date: Fri, 3 Mar 2017 20:31:54 +0100 Subject: [PATCH] Reduce size overhead of adaptative hashmap Exposes a boolean flag in RawTable and use it instead of a bool field in HashMap. Fixes: #40042 --- src/libstd/collections/hash/map.rs | 24 +++------ src/libstd/collections/hash/table.rs | 74 +++++++++++++++++++++++++--- 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index f0738fe9b7033..f9b0ec479d701 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -396,8 +396,6 @@ pub struct HashMap { table: RawTable, resize_policy: DefaultResizePolicy, - - long_probes: bool, } /// Search for a pre-hashed key. @@ -655,7 +653,6 @@ impl HashMap hash_builder: hash_builder, resize_policy: DefaultResizePolicy::new(), table: RawTable::new(0), - long_probes: false, } } @@ -688,7 +685,6 @@ impl HashMap hash_builder: hash_builder, resize_policy: resize_policy, table: RawTable::new(raw_cap), - long_probes: false, } } @@ -746,7 +742,7 @@ impl HashMap let min_cap = self.len().checked_add(additional).expect("reserve overflow"); let raw_cap = self.resize_policy.raw_capacity(min_cap); self.resize(raw_cap); - } else if self.long_probes && remaining <= self.len() { + } else if self.table.tag() && remaining <= self.len() { // Probe sequence is too long and table is half full, // resize early to reduce probing length. let new_capacity = self.table.capacity() * 2; @@ -763,7 +759,6 @@ impl HashMap assert!(self.table.size() <= new_raw_cap); assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0); - self.long_probes = false; let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap)); let old_size = old_table.size(); @@ -844,8 +839,7 @@ impl HashMap /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { - let entry = search_hashed(&mut self.table, hash, |key| *key == k) - .into_entry(k, &mut self.long_probes); + let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k); match entry { Some(Occupied(mut elem)) => Some(elem.insert(v)), Some(Vacant(elem)) => { @@ -1002,7 +996,7 @@ impl HashMap self.reserve(1); let hash = self.make_hash(&key); search_hashed(&mut self.table, hash, |q| q.eq(&key)) - .into_entry(key, &mut self.long_probes).expect("unreachable") + .into_entry(key).expect("unreachable") } /// Returns the number of elements in the map. @@ -1456,7 +1450,7 @@ impl InternalEntry { impl<'a, K, V> InternalEntry> { #[inline] - fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option> { + fn into_entry(self, key: K) -> Option> { match self { InternalEntry::Occupied { elem } => { Some(Occupied(OccupiedEntry { @@ -1469,7 +1463,6 @@ impl<'a, K, V> InternalEntry> { hash: hash, key: key, elem: elem, - long_probes: long_probes, })) } InternalEntry::TableIsEmpty => None, @@ -1542,7 +1535,6 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> { hash: SafeHash, key: K, elem: VacantEntryState>, - long_probes: &'a mut bool, } #[stable(feature= "debug_hash_map", since = "1.12.0")] @@ -2117,15 +2109,15 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(self, value: V) -> &'a mut V { match self.elem { - NeqElem(bucket, disp) => { + NeqElem(mut bucket, disp) => { if disp >= DISPLACEMENT_THRESHOLD { - *self.long_probes = true; + bucket.table_mut().set_tag(true); } robin_hood(bucket, disp, self.hash, self.key, value) }, - NoElem(bucket, disp) => { + NoElem(mut bucket, disp) => { if disp >= DISPLACEMENT_THRESHOLD { - *self.long_probes = true; + bucket.table_mut().set_tag(true); } bucket.put(self.hash, self.key, value).into_mut_refs().1 }, diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index 9e92b4750145e..0e225b2964f63 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -34,6 +34,42 @@ type HashUint = usize; const EMPTY_BUCKET: HashUint = 0; +/// Special `Unique` that uses the lower bit of the pointer +/// to expose a boolean tag. +/// Note: when the pointer is initialized to EMPTY `.ptr()` will return +/// null and the tag functions shouldn't be used. +struct TaggedHashUintPtr(Unique); + +impl TaggedHashUintPtr { + #[inline] + unsafe fn new(ptr: *mut HashUint) -> Self { + debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize); + TaggedHashUintPtr(Unique::new(ptr)) + } + + #[inline] + fn set_tag(&mut self, value: bool) { + let usize_ptr = &*self.0 as *const *mut HashUint as *mut usize; + unsafe { + if value { + *usize_ptr |= 1; + } else { + *usize_ptr &= !1; + } + } + } + + #[inline] + fn tag(&self) -> bool { + (*self.0 as usize) & 1 == 1 + } + + #[inline] + fn ptr(&self) -> *mut HashUint { + (*self.0 as usize & !1) as *mut HashUint + } +} + /// The raw hashtable, providing safe-ish access to the unzipped and highly /// optimized arrays of hashes, and key-value pairs. /// @@ -72,10 +108,14 @@ const EMPTY_BUCKET: HashUint = 0; /// around just the "table" part of the hashtable. It enforces some /// invariants at the type level and employs some performance trickery, /// but in general is just a tricked out `Vec>`. +/// +/// The hashtable also exposes a special boolean tag. The tag defaults to false +/// when the RawTable is created and is accessible with the `tag` and `set_tag` +/// functions. pub struct RawTable { capacity: usize, size: usize, - hashes: Unique, + hashes: TaggedHashUintPtr, // Because K/V do not appear directly in any of the types in the struct, // inform rustc that in fact instances of K and V are reachable from here. @@ -208,6 +248,10 @@ impl FullBucket { pub fn table(&self) -> &M { &self.table } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } /// Move out the reference to the table. pub fn into_table(self) -> M { self.table @@ -227,6 +271,10 @@ impl EmptyBucket { pub fn table(&self) -> &M { &self.table } + /// Borrow a mutable reference to the table. + pub fn table_mut(&mut self) -> &mut M { + &mut self.table + } } impl Bucket { @@ -687,7 +735,7 @@ impl RawTable { return RawTable { size: 0, capacity: 0, - hashes: Unique::new(EMPTY as *mut HashUint), + hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint), marker: marker::PhantomData, }; } @@ -728,7 +776,7 @@ impl RawTable { RawTable { capacity: capacity, size: 0, - hashes: Unique::new(hashes), + hashes: TaggedHashUintPtr::new(hashes), marker: marker::PhantomData, } } @@ -737,13 +785,13 @@ impl RawTable { let hashes_size = self.capacity * size_of::(); let pairs_size = self.capacity * size_of::<(K, V)>(); - let buffer = *self.hashes as *mut u8; + let buffer = self.hashes.ptr() as *mut u8; let (pairs_offset, _, oflo) = calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>()); debug_assert!(!oflo, "capacity overflow"); unsafe { RawBucket { - hash: *self.hashes, + hash: self.hashes.ptr(), pair: buffer.offset(pairs_offset as isize) as *const _, _marker: marker::PhantomData, } @@ -755,7 +803,7 @@ impl RawTable { pub fn new(capacity: usize) -> RawTable { unsafe { let ret = RawTable::new_uninitialized(capacity); - ptr::write_bytes(*ret.hashes, 0, capacity); + ptr::write_bytes(ret.hashes.ptr(), 0, capacity); ret } } @@ -774,7 +822,7 @@ impl RawTable { fn raw_buckets(&self) -> RawBuckets { RawBuckets { raw: self.first_bucket_raw(), - hashes_end: unsafe { self.hashes.offset(self.capacity as isize) }, + hashes_end: unsafe { self.hashes.ptr().offset(self.capacity as isize) }, marker: marker::PhantomData, } } @@ -832,6 +880,16 @@ impl RawTable { marker: marker::PhantomData, } } + + /// Set the table tag + pub fn set_tag(&mut self, value: bool) { + self.hashes.set_tag(value) + } + + /// Get the table tag + pub fn tag(&self) -> bool { + self.hashes.tag() + } } /// A raw iterator. The basis for some other iterators in this module. Although @@ -1156,7 +1214,7 @@ unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable { debug_assert!(!oflo, "should be impossible"); unsafe { - deallocate(*self.hashes as *mut u8, size, align); + deallocate(self.hashes.ptr() as *mut u8, size, align); // Remember how everything was allocated out of one buffer // during initialization? We only need one call to free here. }