Skip to content

Commit

Permalink
Faster optimized frozen dictionary creation (6/6) (dotnet#88093)
Browse files Browse the repository at this point in the history
* don't use custom ToArray for small frozen collections, up to 50% gain for creation time for collections with <= 4 items

* for these types GetHashCode returns their value casted to int, so when we receive a Dictionary/HashSet where there are key we know that all hash codes are unique and we can avoid some work later

10-15% CPU time gain and 15-20% allocation reduction for FrozenDictionary and FrozenHashSet where TKey is uint, short, ushort, byte, sbyte

* move Length Buckets code to a dedicated helper type to reduce code duplication and decrease code size

* add tests for Frozen Dictionaries with key being uint, short, ushort, byte, sbyte, nint and nuint

* fix discovered bug: IntPtr started implementing IComparable<IntPtr> in .NET 5
  • Loading branch information
adamsitnik authored Jul 5, 2023
1 parent 84fd859 commit 76a8f4f
Show file tree
Hide file tree
Showing 14 changed files with 255 additions and 209 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -406,8 +406,10 @@ public void ICollection_Generic_Contains_ValidValueOnCollectionContainingThatVal
public void ICollection_Generic_Contains_DefaultValueOnCollectionNotContainingDefaultValue(int count)
{
ICollection<T> collection = GenericICollectionFactory(count);
if (DefaultValueAllowed)
if (DefaultValueAllowed && default(T) is null) // it's true only for reference types and for Nullable<T>
{
Assert.False(collection.Contains(default(T)));
}
}

[Theory]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ public void IDictionary_Generic_ItemGet_MissingNonDefaultKey_ThrowsKeyNotFoundEx
[MemberData(nameof(ValidCollectionSizes))]
public void IDictionary_Generic_ItemGet_MissingDefaultKey_ThrowsKeyNotFoundException(int count)
{
if (DefaultValueAllowed)
if (DefaultValueAllowed && !IsReadOnly)
{
IDictionary<TKey, TValue> dictionary = GenericIDictionaryFactory(count);
TKey missingKey = default(TKey);
Expand Down Expand Up @@ -733,11 +733,14 @@ public void IDictionary_Generic_ContainsKey_DefaultKeyNotContainedInDictionary(i
IDictionary<TKey, TValue> dictionary = GenericIDictionaryFactory(count);
if (DefaultValueAllowed)
{
// returns false
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.ContainsKey(missingKey));
if (!IsReadOnly)
{
// returns false
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.ContainsKey(missingKey));
}
}
else
{
Expand Down Expand Up @@ -934,10 +937,13 @@ public void IDictionary_Generic_TryGetValue_DefaultKeyNotContainedInDictionary(i
TValue outValue;
if (DefaultValueAllowed)
{
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.TryGetValue(missingKey, out outValue));
if (!IsReadOnly)
{
TKey missingKey = default(TKey);
while (dictionary.ContainsKey(missingKey))
dictionary.Remove(missingKey);
Assert.False(dictionary.TryGetValue(missingKey, out outValue));
}
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ The System.Collections.Immutable library is built-in as part of the shared frame
<Compile Include="System\Collections\Frozen\Int32\Int32FrozenSet.cs" />
<Compile Include="System\Collections\Frozen\String\Hashing.cs" />
<Compile Include="System\Collections\Frozen\String\KeyAnalyzer.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBuckets.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBucketsFrozenDictionary.cs" />
<Compile Include="System\Collections\Frozen\String\LengthBucketsFrozenSet.cs" />
<Compile Include="System\Collections\Frozen\String\OrdinalStringFrozenDictionary.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ public static bool IsKnownComparable<T>() =>
typeof(T) == typeof(uint) ||
typeof(T) == typeof(long) ||
typeof(T) == typeof(ulong) ||
typeof(T) == typeof(nint) ||
typeof(T) == typeof(nuint) ||
typeof(T) == typeof(decimal) ||
typeof(T) == typeof(float) ||
typeof(T) == typeof(double) ||
Expand All @@ -68,6 +66,8 @@ public static bool IsKnownComparable<T>() =>
#endif
#if NET5_0_OR_GREATER
typeof(T) == typeof(Half) ||
typeof(T) == typeof(nint) ||
typeof(T) == typeof(nuint) ||
#endif
#if NET6_0_OR_GREATER
typeof(T) == typeof(DateOnly) ||
Expand All @@ -78,5 +78,13 @@ public static bool IsKnownComparable<T>() =>
typeof(T) == typeof(UInt128) ||
#endif
typeof(T).IsEnum;

// for these types GetHashCode returns their value casted to int, so when we receive a Dictionary/HashSet where there are key
// we know that all hash codes are unique and we can avoid some work later
internal static bool KeysAreHashCodes<T>()
=> typeof(T) == typeof(int) || typeof(T) == typeof(uint)
|| typeof(T) == typeof(short) || typeof(T) == typeof(ushort)
|| typeof(T) == typeof(byte) || typeof(T) == typeof(sbyte)
|| ((typeof(T) == typeof(nint) || typeof(T) == typeof(nuint)) && IntPtr.Size == 4);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ internal abstract class ItemsFrozenSet<T, TThisWrapper> : FrozenSetInternalBase<
private protected readonly FrozenHashTable _hashTable;
private protected readonly T[] _items;

protected ItemsFrozenSet(HashSet<T> source) : base(source.Comparer)
protected ItemsFrozenSet(HashSet<T> source, bool keysAreHashCodes = false) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -30,7 +30,7 @@ protected ItemsFrozenSet(HashSet<T> source) : base(source.Comparer)
hashCodes[i] = entries[i] is T t ? Comparer.GetHashCode(t) : 0;
}

_hashTable = FrozenHashTable.Create(hashCodes);
_hashTable = FrozenHashTable.Create(hashCodes, keysAreHashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ internal abstract class KeysAndValuesFrozenDictionary<TKey, TValue> : FrozenDict
private protected readonly TKey[] _keys;
private protected readonly TValue[] _values;

protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source) : base(source.Comparer)
protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source, bool keysAreHashCodes = false) : base(source.Comparer)
{
Debug.Assert(source.Count != 0);

Expand All @@ -32,7 +32,7 @@ protected KeysAndValuesFrozenDictionary(Dictionary<TKey, TValue> source) : base(
hashCodes[i] = Comparer.GetHashCode(entries[i].Key);
}

_hashTable = FrozenHashTable.Create(hashCodes);
_hashTable = FrozenHashTable.Create(hashCodes, keysAreHashCodes);

for (int srcIndex = 0; srcIndex < hashCodes.Length; srcIndex++)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;

namespace System.Collections.Frozen
Expand All @@ -24,8 +25,8 @@ internal SmallFrozenDictionary(Dictionary<TKey, TValue> source) : base(source.Co
{
Debug.Assert(source.Count != 0);

_keys = source.Keys.ToArray(source.Count);
_values = source.Values.ToArray(source.Count);
_keys = source.Keys.ToArray();
_values = source.Values.ToArray();
}

private protected override TKey[] KeysCore => _keys;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ internal sealed class SmallFrozenSet<T> : FrozenSetInternalBase<T, SmallFrozenSe

internal SmallFrozenSet(HashSet<T> source) : base(source.Comparer)
{
_items = source.ToArray(source.Count);
_items = source.ToArray();
}

private protected override T[] ItemsCore => _items;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;

namespace System.Collections.Frozen
{
internal static class LengthBuckets
{
/// <summary>The maximum number of items allowed per bucket. The larger the value, the longer it can take to search a bucket, which is sequentially examined.</summary>
internal const int MaxPerLength = 5;
/// <summary>Allowed ratio between buckets with values and total buckets. Under this ratio, this implementation won't be used due to too much wasted space.</summary>
private const double EmptyLengthsRatio = 0.2;

internal static int[]? CreateLengthBucketsArrayIfAppropriate(string[] keys, IEqualityComparer<string> comparer, int minLength, int maxLength)
{
Debug.Assert(comparer == EqualityComparer<string>.Default || comparer == StringComparer.Ordinal || comparer == StringComparer.OrdinalIgnoreCase);
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// If without even looking at the keys we know that some bucket will exceed the max per-bucket
// limit (pigeon hole principle), we can early-exit out without doing any further work.
int spread = maxLength - minLength + 1;
if (keys.Length / spread > MaxPerLength)
{
return null;
}

int arraySize = spread * MaxPerLength;
#if NET6_0_OR_GREATER
if (arraySize > Array.MaxLength)
#else
if (arraySize > 0X7FFFFFC7)
#endif
{
// In the future we may lower the value, as it may be quite unlikely
// to have a LOT of strings of different sizes.
return null;
}

// Instead of creating a dictionary of lists or a multi-dimensional array
// we rent a single dimension array, where every bucket has five slots.
// The bucket starts at (key.Length - minLength) * 5.
// Each value is an index of the key from _keys array
// or just -1, which represents "null".
int[] buckets = ArrayPool<int>.Shared.Rent(arraySize);
buckets.AsSpan(0, arraySize).Fill(-1);

int nonEmptyCount = 0;
for (int i = 0; i < keys.Length; i++)
{
string key = keys[i];
int startIndex = (key.Length - minLength) * MaxPerLength;
int endIndex = startIndex + MaxPerLength;
int index = startIndex;

while (index < endIndex)
{
ref int bucket = ref buckets[index];
if (bucket < 0)
{
if (index == startIndex)
{
nonEmptyCount++;
}

bucket = i;
break;
}

index++;
}

if (index == endIndex)
{
// If we've already hit the max per-bucket limit, bail.
ArrayPool<int>.Shared.Return(buckets);
return null;
}
}

// If there would be too much empty space in the lookup array, bail.
if (nonEmptyCount / (double)spread < EmptyLengthsRatio)
{
ArrayPool<int>.Shared.Return(buckets);
return null;
}

#if NET6_0_OR_GREATER
// We don't need an array with every value initialized to zero if we are just about to overwrite every value anyway.
int[] copy = GC.AllocateUninitializedArray<int>(arraySize);
Array.Copy(buckets, copy, arraySize);
#else
int[] copy = buckets.AsSpan(0, arraySize).ToArray();
#endif
ArrayPool<int>.Shared.Return(buckets);

return copy;
}
}
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
Expand All @@ -11,11 +10,6 @@ namespace System.Collections.Frozen
/// <summary>Provides a frozen dictionary implementation where strings are grouped by their lengths.</summary>
internal sealed class LengthBucketsFrozenDictionary<TValue> : FrozenDictionary<string, TValue>
{
/// <summary>Allowed ratio between buckets with values and total buckets. Under this ratio, this implementation won't be used due to too much wasted space.</summary>
private const double EmptyLengthsRatio = 0.2;
/// <summary>The maximum number of items allowed per bucket. The larger the value, the longer it can take to search a bucket, which is sequentially examined.</summary>
private const int MaxPerLength = 5;

private readonly int[] _lengthBuckets;
private readonly int _minLength;
private readonly string[] _keys;
Expand All @@ -39,87 +33,14 @@ private LengthBucketsFrozenDictionary(
string[] keys, TValue[] values, IEqualityComparer<string> comparer, int minLength, int maxLength)
{
Debug.Assert(keys.Length != 0 && keys.Length == values.Length);
Debug.Assert(comparer == EqualityComparer<string>.Default || comparer == StringComparer.Ordinal || comparer == StringComparer.OrdinalIgnoreCase);
Debug.Assert(minLength >= 0 && maxLength >= minLength);

// If without even looking at the keys we know that some bucket will exceed the max per-bucket
// limit (pigeon hole principle), we can early-exit out without doing any further work.
int spread = maxLength - minLength + 1;
if (keys.Length / spread > MaxPerLength)
int[]? lengthBuckets = LengthBuckets.CreateLengthBucketsArrayIfAppropriate(keys, comparer, minLength, maxLength);
if (lengthBuckets is null)
{
return null;
}

int arraySize = spread * MaxPerLength;
#if NET6_0_OR_GREATER
if (arraySize > Array.MaxLength)
#else
if (arraySize > 0X7FFFFFC7)
#endif
{
// In the future we may lower the value, as it may be quite unlikely
// to have a LOT of strings of different sizes.
return null;
}

// Instead of creating a dictionary of lists or a multi-dimensional array
// we rent a single dimension array, where every bucket has five slots.
// The bucket starts at (key.Length - minLength) * 5.
// Each value is an index of the key from _keys array
// or just -1, which represents "null".
int[] buckets = ArrayPool<int>.Shared.Rent(arraySize);
buckets.AsSpan(0, arraySize).Fill(-1);

int nonEmptyCount = 0;
for (int i = 0; i < keys.Length; i++)
{
string key = keys[i];
int startIndex = (key.Length - minLength) * MaxPerLength;
int endIndex = startIndex + MaxPerLength;
int index = startIndex;

while (index < endIndex)
{
ref int bucket = ref buckets[index];
if (bucket < 0)
{
if (index == startIndex)
{
nonEmptyCount++;
}

bucket = i;
break;
}

index++;
}

if (index == endIndex)
{
// If we've already hit the max per-bucket limit, bail.
ArrayPool<int>.Shared.Return(buckets);
return null;
}
}

// If there would be too much empty space in the lookup array, bail.
if (nonEmptyCount / (double)spread < EmptyLengthsRatio)
{
ArrayPool<int>.Shared.Return(buckets);
return null;
}

#if NET6_0_OR_GREATER
// We don't need an array with every value initialized to zero if we are just about to overwrite every value anyway.
int[] copy = GC.AllocateUninitializedArray<int>(arraySize);
Array.Copy(buckets, copy, arraySize);
#else
int[] copy = buckets.AsSpan(0, arraySize).ToArray();
#endif
ArrayPool<int>.Shared.Return(buckets);

return new LengthBucketsFrozenDictionary<TValue>(keys, values, copy, minLength, comparer);
return new LengthBucketsFrozenDictionary<TValue>(keys, values, lengthBuckets, minLength, comparer);
}

/// <inheritdoc />
Expand All @@ -138,8 +59,8 @@ private LengthBucketsFrozenDictionary(
private protected override ref readonly TValue GetValueRefOrNullRefCore(string key)
{
// If the length doesn't have an associated bucket, the key isn't in the dictionary.
int bucketIndex = (key.Length - _minLength) * MaxPerLength;
int bucketEndIndex = bucketIndex + MaxPerLength;
int bucketIndex = (key.Length - _minLength) * LengthBuckets.MaxPerLength;
int bucketEndIndex = bucketIndex + LengthBuckets.MaxPerLength;
int[] lengthBuckets = _lengthBuckets;
if (bucketIndex >= 0 && bucketEndIndex <= lengthBuckets.Length)
{
Expand Down
Loading

0 comments on commit 76a8f4f

Please sign in to comment.