Faster optimized frozen dictionary creation (3/n) (#87688)

* avoid the need of having `Action<int, int> storeDestIndexFromSrcIndex` by writing the destination indexes to the provided buffer with hashcodes and moving the responsibility to the caller (1-4% gain) * For cases where the key is an integer and we know the input us already unique (because it comes from a dictionary or a hash set) there is no need to create another hash set Also, in cases where simply all hash codes are unique, we can iterate over a span rather than a hash set +9% gain for scenarios where the key was an integer (time), 10-20% allocations drop up to +5% gain where string keys turned out to have unique hash codes Co-authored-by: Stephen Toub <[email protected]>
dotnet · Jun 22, 2023 · b878df7 · b878df7
1 parent 4586587
commit b878df7
Show file tree

Hide file tree

Showing 7 changed files with 110 additions and 62 deletions.
diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenHashTable.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/FrozenHashTable.cs
@@ -36,23 +36,21 @@ private FrozenHashTable(int[] hashCodes, Bucket[] buckets, ulong fastModMultipli
         }
 
         /// <summary>Initializes a frozen hash table.</summary>
-        /// <param name="hashCodes">Pre-calculated hash codes.</param>
-        /// <param name="storeDestIndexFromSrcIndex">A delegate that assigns the index to a specific entry. It's passed the destination and source indices.</param>
+        /// <param name="hashCodes">Pre-calculated hash codes. When the method finishes, it assigns each value to destination index.</param>
         /// <param name="optimizeForReading">true to spend additional effort tuning for subsequent read speed on the table; false to prioritize construction time.</param>
+        /// <param name="hashCodesAreUnique">True when the input hash codes are already unique (provided from a dictionary of integers for example).</param>
         /// <remarks>
-        /// This method will iterate through the incoming entries and will invoke the hasher on each once.
         /// It will then determine the optimal number of hash buckets to allocate and will populate the
-        /// bucket table. In the process of doing so, it calls out to the <paramref name="storeDestIndexFromSrcIndex"/> to indicate
-        /// the resulting index for that entry. <see cref="FindMatchingEntries(int, out int, out int)"/>
-        /// then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>.
+        /// bucket table. The caller is responsible to consume the values written to <paramref name="hashCodes"/> and update the destination (if desired).
+        /// <see cref="FindMatchingEntries(int, out int, out int)"/> then uses this index to reference individual entries by indexing into <see cref="HashCodes"/>.
         /// </remarks>
         /// <returns>A frozen hash table.</returns>
-        public static FrozenHashTable Create(ReadOnlySpan<int> hashCodes, Action<int, int> storeDestIndexFromSrcIndex, bool optimizeForReading = true)
+        public static FrozenHashTable Create(Span<int> hashCodes, bool optimizeForReading = true, bool hashCodesAreUnique = false)
         {
             // Determine how many buckets to use.  This might be fewer than the number of entries
             // if any entries have identical hashcodes (not just different hashcodes that might
             // map to the same bucket).
-            int numBuckets = CalcNumBuckets(hashCodes, optimizeForReading);
+            int numBuckets = CalcNumBuckets(hashCodes, optimizeForReading, hashCodesAreUnique);
             ulong fastModMultiplier = HashHelpers.GetFastModMultiplier((uint)numBuckets);
 
             // Create two spans:
@@ -100,8 +98,10 @@ public static FrozenHashTable Create(ReadOnlySpan<int> hashCodes, Action<int, in
                 bucketStart = count;
                 while (index >= 0)
                 {
-                    hashtableHashcodes[count] = hashCodes[index];
-                    storeDestIndexFromSrcIndex(count, index);
+                    ref int hashCode = ref hashCodes[index];
+                    hashtableHashcodes[count] = hashCode;
+                    // we have used the hash code for the last time, now we re-use the buffer to store destination index
+                    hashCode = count;
                     count++;
                     bucketCount++;
 
@@ -144,9 +144,10 @@ public void FindMatchingEntries(int hashCode, out int startIndex, out int endInd
         /// sizes, starting at the exact number of hash codes and incrementing the bucket count by 1 per trial,
         /// this is a trade-off between speed of determining a good number of buckets and maximal density.
         /// </remarks>
-        private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForReading)
+        private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForReading, bool hashCodesAreUnique)
         {
             Debug.Assert(hashCodes.Length != 0);
+            Debug.Assert(!hashCodesAreUnique || new HashSet<int>(hashCodes.ToArray()).Count == hashCodes.Length);
 
             const double AcceptableCollisionRate = 0.05;  // What is a satisfactory rate of hash collisions?
             const int LargeInputSizeThreshold = 1000;     // What is the limit for an input to be considered "small"?
@@ -159,38 +160,44 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR
             }
 
             // Filter out duplicate codes, since no increase in buckets will avoid collisions from duplicate input hash codes.
-            var codes =
+            HashSet<int>? codes = null;
+            int uniqueCodesCount = hashCodes.Length;
+            if (!hashCodesAreUnique)
+            {
+                codes =
 #if NETCOREAPP2_0_OR_GREATER
-                new HashSet<int>(hashCodes.Length);
+                    new HashSet<int>(hashCodes.Length);
 #else
-                new HashSet<int>();
+                    new HashSet<int>();
 #endif
-            foreach (int hashCode in hashCodes)
-            {
-                codes.Add(hashCode);
+                foreach (int hashCode in hashCodes)
+                {
+                    codes.Add(hashCode);
+                }
+                uniqueCodesCount = codes.Count;
             }
-            Debug.Assert(codes.Count != 0);
+            Debug.Assert(uniqueCodesCount != 0);
 
             // In our precomputed primes table, find the index of the smallest prime that's at least as large as our number of
             // hash codes. If there are more codes than in our precomputed primes table, which accommodates millions of values,
             // give up and just use the next prime.
             ReadOnlySpan<int> primes = HashHelpers.Primes;
             int minPrimeIndexInclusive = 0;
-            while ((uint)minPrimeIndexInclusive < (uint)primes.Length && codes.Count > primes[minPrimeIndexInclusive])
+            while ((uint)minPrimeIndexInclusive < (uint)primes.Length && uniqueCodesCount > primes[minPrimeIndexInclusive])
             {
                 minPrimeIndexInclusive++;
             }
 
             if (minPrimeIndexInclusive >= primes.Length)
             {
-                return HashHelpers.GetPrime(codes.Count);
+                return HashHelpers.GetPrime(uniqueCodesCount);
             }
 
             // Determine the largest number of buckets we're willing to use, based on a multiple of the number of inputs.
             // For smaller inputs, we allow for a larger multiplier.
             int maxNumBuckets =
-                codes.Count *
-                (codes.Count >= LargeInputSizeThreshold ? MaxLargeBucketTableMultiplier : MaxSmallBucketTableMultiplier);
+                uniqueCodesCount *
+                (uniqueCodesCount >= LargeInputSizeThreshold ? MaxLargeBucketTableMultiplier : MaxSmallBucketTableMultiplier);
 
             // Find the index of the smallest prime that accommodates our max buckets.
             int maxPrimeIndexExclusive = minPrimeIndexInclusive;
@@ -209,7 +216,7 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR
             int[] seenBuckets = ArrayPool<int>.Shared.Rent((maxNumBuckets / BitsPerInt32) + 1);
 
             int bestNumBuckets = maxNumBuckets;
-            int bestNumCollisions = codes.Count;
+            int bestNumCollisions = uniqueCodesCount;
 
             // Iterate through each available prime between the min and max discovered. For each, compute
             // the collision ratio.
@@ -222,22 +229,48 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR
                 // Determine the bucket for each hash code and mark it as seen. If it was already seen,
                 // track it as a collision.
                 int numCollisions = 0;
-                foreach (int code in codes)
+
+                if (codes is not null && uniqueCodesCount != hashCodes.Length)
                 {
-                    uint bucketNum = (uint)code % (uint)numBuckets;
-                    if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0)
+                    foreach (int code in codes)
                     {
-                        numCollisions++;
-                        if (numCollisions >= bestNumCollisions)
+                        uint bucketNum = (uint)code % (uint)numBuckets;
+                        if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0)
+                        {
+                            numCollisions++;
+                            if (numCollisions >= bestNumCollisions)
+                            {
+                                // If we've already hit the previously known best number of collisions,
+                                // there's no point in continuing as worst case we'd just use that.
+                                break;
+                            }
+                        }
+                        else
                         {
-                            // If we've already hit the previously known best number of collisions,
-                            // there's no point in continuing as worst case we'd just use that.
-                            break;
+                            seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum;
                         }
                     }
-                    else
+                }
+                else
+                {
+                    // All of the hash codes in hashCodes are unique. In such scenario, it's faster to iterate over a span.
+                    foreach (int code in hashCodes)
                     {
-                        seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum;
+                        uint bucketNum = (uint)code % (uint)numBuckets;
+                        if ((seenBuckets[bucketNum / BitsPerInt32] & (1 << (int)bucketNum)) != 0)
+                        {
+                            numCollisions++;
+                            if (numCollisions >= bestNumCollisions)
+                            {
+                                // If we've already hit the previously known best number of collisions,
+                                // there's no point in continuing as worst case we'd just use that.
+                                break;
+                            }
+                        }
+                        else
+                        {
+                            seenBuckets[bucketNum / BitsPerInt32] |= 1 << (int)bucketNum;
+                        }
                     }
                 }
 
@@ -247,7 +280,7 @@ private static int CalcNumBuckets(ReadOnlySpan<int> hashCodes, bool optimizeForR
                 {
                     bestNumBuckets = numBuckets;
 
-                    if (numCollisions / (double)codes.Count <= AcceptableCollisionRate)
+                    if (numCollisions / (double)uniqueCodesCount <= AcceptableCollisionRate)
                     {
                         break;
                     }

diff --git a/...System.Collections.Immutable/src/System/Collections/Frozen/Int32/Int32FrozenDictionary.cs b/...System.Collections.Immutable/src/System/Collections/Frozen/Int32/Int32FrozenDictionary.cs
@@ -35,9 +35,14 @@ internal Int32FrozenDictionary(Dictionary<int, TValue> source) : base(EqualityCo
                 hashCodes[i] = entries[i].Key;
             }
 
-            _hashTable = FrozenHashTable.Create(
-                hashCodes,
-                (destIndex, srcIndex) => _values[destIndex] = entries[srcIndex].Value);
+            _hashTable = FrozenHashTable.Create(hashCodes, hashCodesAreUnique: true);
+
+            for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
+            {
+                int destIndex = hashCodes[srcIndex];
+
+                _values[destIndex] = entries[srcIndex].Value;
+            }
 
             ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
         }

diff --git a/...raries/System.Collections.Immutable/src/System/Collections/Frozen/Int32/Int32FrozenSet.cs b/...raries/System.Collections.Immutable/src/System/Collections/Frozen/Int32/Int32FrozenSet.cs
@@ -25,9 +25,7 @@ internal Int32FrozenSet(HashSet<int> source) : base(EqualityComparer<int>.Defaul
             int[] entries = ArrayPool<int>.Shared.Rent(count);
             source.CopyTo(entries);
 
-            _hashTable = FrozenHashTable.Create(
-                new ReadOnlySpan<int>(entries, 0, count),
-                static delegate { });
+            _hashTable = FrozenHashTable.Create(new Span<int>(entries, 0, count), hashCodesAreUnique: true);
 
             ArrayPool<int>.Shared.Return(entries);
         }

diff --git a/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/ItemsFrozenSet.cs b/src/libraries/System.Collections.Immutable/src/System/Collections/Frozen/ItemsFrozenSet.cs
@@ -30,10 +30,14 @@ protected ItemsFrozenSet(HashSet<T> source, bool optimizeForReading = true) : ba
                 hashCodes[i] = entries[i] is T t ? Comparer.GetHashCode(t) : 0;
             }
 
-            _hashTable = FrozenHashTable.Create(
-                hashCodes,
-                (destIndex, srcIndex) => _items[destIndex] = entries[srcIndex],
-                optimizeForReading);
+            _hashTable = FrozenHashTable.Create(hashCodes, optimizeForReading);
+
+            for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
+            {
+                int destIndex = hashCodes[srcIndex];
+
+                _items[destIndex] = entries[srcIndex];
+            }
 
             ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
         }

diff --git a/...stem.Collections.Immutable/src/System/Collections/Frozen/KeysAndValuesFrozenDictionary.cs b/...stem.Collections.Immutable/src/System/Collections/Frozen/KeysAndValuesFrozenDictionary.cs
@@ -32,14 +32,15 @@ protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool op
                 hashCodes[i] = Comparer.GetHashCode(entries[i].Key);
             }
 
-            _hashTable = FrozenHashTable.Create(
-                hashCodes,
-                (destIndex, srcIndex) =>
-                {
-                    _keys[destIndex] = entries[srcIndex].Key;
-                    _values[destIndex] = entries[srcIndex].Value;
-                },
-                optimizeForReading);
+            _hashTable = FrozenHashTable.Create(hashCodes, optimizeForReading);
+
+            for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
+            {
+                int destIndex = hashCodes[srcIndex];
+
+                _keys[destIndex] = entries[srcIndex].Key;
+                _values[destIndex] = entries[srcIndex].Value;
+            }
 
             ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
         }

diff --git a/...llections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs b/...llections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenDictionary.cs
@@ -47,13 +47,15 @@ internal OrdinalStringFrozenDictionary(
                 hashCodes[i] = GetHashCode(keys[i]);
             }
 
-            _hashTable = FrozenHashTable.Create(
-                hashCodes,
-                (destIndex, srcIndex) =>
-                {
-                    _keys[destIndex] = keys[srcIndex];
-                    _values[destIndex] = values[srcIndex];
-                });
+            _hashTable = FrozenHashTable.Create(hashCodes);
+
+            for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
+            {
+                int destIndex = hashCodes[srcIndex];
+
+                _keys[destIndex] = keys[srcIndex];
+                _values[destIndex] = values[srcIndex];
+            }
 
             ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
         }

diff --git a/...stem.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs b/...stem.Collections.Immutable/src/System/Collections/Frozen/String/OrdinalStringFrozenSet.cs
@@ -38,9 +38,14 @@ internal OrdinalStringFrozenSet(
                 hashCodes[i] = GetHashCode(entries[i]);
             }
 
-            _hashTable = FrozenHashTable.Create(
-                hashCodes,
-                (destIndex, srcIndex) => _items[destIndex] = entries[srcIndex]);
+            _hashTable = FrozenHashTable.Create(hashCodes);
+
+            for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
+            {
+                int destIndex = hashCodes[srcIndex];
+
+                _items[destIndex] = entries[srcIndex];
+            }
 
             ArrayPool<int>.Shared.Return(arrayPoolHashCodes);
         }