Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spanify some Utf8String and Utf8StringBuilder use #102101

Merged
merged 26 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
56490e8
Address TODOs
PaulusParssinen May 10, 2024
ea1e855
Make Utf8String readonly
PaulusParssinen May 10, 2024
bc4e50e
Make Utf8StringBuilder.Append do two Encoding.UTF8 calls instead of many
PaulusParssinen May 10, 2024
ca24552
Use UTF8 literals for the appended constant strings in ILC
PaulusParssinen May 10, 2024
8d3e0db
Use UTF8 literals for the appended constant strings in R2R
PaulusParssinen May 10, 2024
7431393
Use pattern match in Utf8String.Equals(object)
PaulusParssinen May 10, 2024
bd9e6ef
Use SequenceEquals in Utf8String.Equals(Utf8String)
PaulusParssinen May 10, 2024
cb0eb90
Use CommonPrefixLength in Utf8String.Compare(Utf8String, Utf8String)
PaulusParssinen May 10, 2024
2ed3a64
Remove unused Utf8StringBuilder.LastCharBoundary
PaulusParssinen May 10, 2024
ace1494
Remove Utf8StringBuilder.UnderlyingArray
PaulusParssinen May 11, 2024
0002c62
Revert "Remove Utf8StringBuilder.UnderlyingArray"
PaulusParssinen May 11, 2024
fe86998
Use SequenceCompareTo
PaulusParssinen May 11, 2024
51b63e3
UnderlyingArray -> AsSpan()
PaulusParssinen May 11, 2024
bafd05f
Only return filled-portion of the buffer in Utf8StringBuilder.AsSpan
PaulusParssinen May 11, 2024
77a600d
Write null-terminator for the augmentationString.
PaulusParssinen May 11, 2024
2e050a3
Utf8StringBuilder.Append(char) -> Utf8StringBuilder.Append(byte)
PaulusParssinen May 11, 2024
5dd1cbf
Remove unnecessary cast
PaulusParssinen May 11, 2024
19106c7
Update src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs
jkotas May 11, 2024
00d4e54
Update src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs
jkotas May 11, 2024
8a97513
Use UTF8 literals for single characters too
PaulusParssinen May 11, 2024
bb4477b
Revert "Use UTF8 literals for single characters too"
PaulusParssinen May 11, 2024
7838413
Revert "Remove unnecessary cast"
PaulusParssinen May 11, 2024
1f80778
Revert "Utf8StringBuilder.Append(char) -> Utf8StringBuilder.Append(by…
PaulusParssinen May 11, 2024
b7c0fdb
Add Ascii.IsValid assert to Utf8StringBuilder.Append
PaulusParssinen May 11, 2024
ae917bc
Merge branch 'main' into spanify-internal-text-utf8string
jkotas May 11, 2024
b007b64
Merge branch 'main' into spanify-internal-text-utf8string
MichalStrehovsky May 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/coreclr/tools/Common/Compiler/NativeAotNameMangler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ public override Utf8String GetMangledMethodName(MethodDesc method)

Utf8StringBuilder sb = new Utf8StringBuilder();
sb.Append(GetMangledTypeName(method.OwningType));
sb.Append("__");
sb.Append("__"u8);
sb.Append(GetUnqualifiedMangledMethodName(method));
utf8MangledName = sb.ToUtf8String();

Expand Down Expand Up @@ -391,7 +391,7 @@ private Utf8String GetPrefixMangledSignatureName(IPrefixMangledSignature prefixM

for (int i = 0; i < signature.Length; i++)
{
sb.Append("__");
sb.Append("__"u8);
string sigArgName = GetMangledTypeName(signature[i]);
sb.Append(sigArgName);
}
Expand Down Expand Up @@ -451,7 +451,7 @@ private Utf8String ComputeUnqualifiedMangledMethodName(MethodDesc method)
{
string instArgName = GetMangledTypeName(inst[i]);
if (i > 0)
sb.Append("__");
sb.Append("__"u8);
sb.Append(instArgName);
}

Expand Down
69 changes: 7 additions & 62 deletions src/coreclr/tools/Common/Internal/Text/Utf8String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@

namespace Internal.Text
{
public struct Utf8String : IEquatable<Utf8String>, IComparable<Utf8String>
public readonly struct Utf8String : IEquatable<Utf8String>, IComparable<Utf8String>
{
private byte[] _value;
private readonly byte[] _value;

public Utf8String(byte[] underlyingArray)
{
Expand All @@ -21,8 +21,6 @@ public Utf8String(string s)
_value = Encoding.UTF8.GetBytes(s);
}

// TODO: This should return ReadOnlySpan<byte> instead once available
public byte[] UnderlyingArray => _value;
public int Length => _value.Length;

// For now, define implicit conversions between string and Utf8String to aid the transition
Expand All @@ -32,14 +30,16 @@ public static implicit operator Utf8String(string s)
return new Utf8String(s);
}

public ReadOnlySpan<byte> AsSpan() => _value;

public override string ToString()
{
return Encoding.UTF8.GetString(_value);
}

public override bool Equals(object obj)
{
return (obj is Utf8String) && Equals((Utf8String)obj);
return (obj is Utf8String utf8String) && Equals(utf8String);
}

public override unsafe int GetHashCode()
Expand Down Expand Up @@ -72,67 +72,12 @@ public override unsafe int GetHashCode()

public bool Equals(Utf8String other)
{
int length = _value.Length;
if (length != other.Length)
return false;

if (_value == other._value)
return true;

unsafe
{
fixed (byte* ap = _value) fixed (byte* bp = other._value)
{
byte* a = ap;
byte* b = bp;

while (length >= 4)
{
if (*(int*)a != *(int*)b) return false;
a += 4; b += 4; length -= 4;
}
if (length >= 2)
{
if (*(short*)a != *(short*)b) return false;
a += 2; b += 2; length -= 2;
}
if (length > 0)
{
if (*a != *b) return false;
}
return true;
}
}
return AsSpan().SequenceEqual(other.AsSpan());
}

private static int Compare(Utf8String strA, Utf8String strB)
{
int length = Math.Min(strA.Length, strB.Length);

unsafe
{
fixed (byte* ap = strA._value)
fixed (byte* bp = strB._value)
{
byte* a = ap;
byte* b = bp;

while (length > 0)
{
if (*a != *b)
return *a - *b;
a += 1;
b += 1;
length -= 1;
}

// At this point, we have compared all the characters in at least one string.
// The longer string will be larger.
// We could optimize and compare lengths before iterating strings, but we want
// Foo and Foo1 to be sorted adjacent to eachother.
return strA.Length - strB.Length;
}
}
return strA.AsSpan().SequenceCompareTo(strB.AsSpan());
}

public int CompareTo(Utf8String other)
Expand Down
79 changes: 13 additions & 66 deletions src/coreclr/tools/Common/Internal/Text/Utf8StringBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ public Utf8StringBuilder()
{
}

// TODO: This should return ReadOnlySpan<byte> instead once available
public byte[] UnderlyingArray => _buffer;
public int Length => _length;

public ReadOnlySpan<byte> AsSpan() => _buffer.AsSpan(0, _length);
PaulusParssinen marked this conversation as resolved.
Show resolved Hide resolved

public Utf8StringBuilder Clear()
{
_length = 0;
Expand All @@ -35,39 +35,33 @@ public Utf8StringBuilder Truncate(int newLength)

public Utf8StringBuilder Append(Utf8String value)
{
return Append(value.UnderlyingArray);
return Append(value.AsSpan());
}

public Utf8StringBuilder Append(byte[] value)
public Utf8StringBuilder Append(ReadOnlySpan<byte> value)
{
Ensure(value.Length);
Buffer.BlockCopy(value, 0, _buffer, _length, value.Length);
value.CopyTo(_buffer.AsSpan(_length));
_length += value.Length;
return this;
}

public Utf8StringBuilder Append(char value)
{
Debug.Assert(Ascii.IsValid(value));

Ensure(1);
if (value > 0x7F)
return Append(Encoding.UTF8.GetBytes(new char[] { value }));
_buffer[_length++] = (byte)value;
return this;
}

public Utf8StringBuilder Append(string value)
PaulusParssinen marked this conversation as resolved.
Show resolved Hide resolved
{
Ensure(value.Length);
int length = Encoding.UTF8.GetByteCount(value);
Ensure(length);

byte[] buffer = _buffer;
for (int i = 0; i < value.Length; i++)
{
char c = value[i];
if (c > 0x7F)
return Append(Encoding.UTF8.GetBytes(value));
buffer[_length+i] = (byte)c;
}
_length += value.Length;
Encoding.UTF8.GetBytes(value, _buffer.AsSpan(_length));
_length += length;

return this;
}
Expand All @@ -84,9 +78,7 @@ public string ToString(int start)

public Utf8String ToUtf8String()
{
var ret = new byte[_length];
Buffer.BlockCopy(_buffer, 0, ret, 0, _length);
return new Utf8String(ret);
return new Utf8String(AsSpan().ToArray());
}

private void Ensure(int extraSpace)
Expand All @@ -99,53 +91,8 @@ private void Grow(int extraSpace)
{
int newSize = Math.Max(2 * _buffer.Length, _length + extraSpace);
byte[] newBuffer = new byte[newSize];
Buffer.BlockCopy(_buffer, 0, newBuffer, 0, _length);
AsSpan().CopyTo(newBuffer);
_buffer = newBuffer;
}

// Find the boundary of the last character prior to a position
// If pos points to the last byte of a char, then return pos; Otherwise,
// return the position of the last byte of the preceding char.
public int LastCharBoundary(int pos)
{
Debug.Assert(pos < _length);

if (_buffer[pos] < 128 /*10000000*/)
{
// This is a single byte character
return pos;
}

int origPos = pos;

// Skip following bytes of a multi-byte character until the first byte is seen
while (_buffer[pos] < 192 /*11000000*/)
{
pos--;
}

if (pos == origPos - 3)
{
// We just skipped a four-byte character
Debug.Assert(_buffer[pos] >= 240 /*11110000*/);
return origPos;
}

if (pos == origPos - 2 && _buffer[pos] < 240 && _buffer[pos] >= 224 /*11100000*/)
{
// We just skipped a three-byte character
return origPos;
}

if (pos == origPos - 1 && _buffer[pos] < 224)
{
// We just skipped a two-byte character
Debug.Assert(_buffer[pos] >= 192 /*11000000*/);
return origPos;
}

// We were in the middle of a multi-byte character
return pos - 1;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -306,20 +306,20 @@ public static DelegateCreationInfo Create(TypeDesc delegateType, MethodDesc targ

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append("__DelegateCtor_");
sb.Append("__DelegateCtor_"u8);
if (TargetNeedsVTableLookup)
sb.Append("FromVtbl_");
sb.Append("FromVtbl_"u8);
Constructor.AppendMangledName(nameMangler, sb);
sb.Append("__");
sb.Append("__"u8);
sb.Append(nameMangler.GetMangledMethodName(_targetMethod));
if (_constrainedType != null)
{
sb.Append("__");
sb.Append("__"u8);
nameMangler.GetMangledTypeName(_constrainedType);
}
if (Thunk != null)
{
sb.Append("__");
sb.Append("__"u8);
Thunk.AppendMangledName(nameMangler, sb);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public ArrayMapNode(ExternalReferencesTableNode externalReferences)

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__array_type_map");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__array_type_map"u8);
}
public int Offset => 0;
public override bool IsShareable => false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ public EmbeddedPointerIndirectionWithSymbolNode(ArrayOfEmbeddedPointersNode<TTar

public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append(_parentNode._startSymbolMangledName).Append("_").Append(_id.ToStringInvariant());
sb.Append(nameMangler.CompilationUnitPrefix).Append(_parentNode._startSymbolMangledName).Append("_"u8).Append(_id.ToStringInvariant());
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public class ArrayOfFrozenObjectsNode : DehydratableObjectNode, ISymbolDefinitio
int INodeWithSize.Size => _size.Value;

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
=> sb.Append(nameMangler.CompilationUnitPrefix).Append("__FrozenSegmentStart");
=> sb.Append(nameMangler.CompilationUnitPrefix).Append("__FrozenSegmentStart"u8);

public int Offset => 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public ByRefTypeMapNode(ExternalReferencesTableNode externalReferences)

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__byref_type_map");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__byref_type_map"u8);
}
public int Offset => 0;
public override bool IsShareable => false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public ClassConstructorContextMap(ExternalReferencesTableNode externalReferences

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__type_to_cctorContext_map");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__type_to_cctorContext_map"u8);
}
public int Offset => 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ internal sealed class DehydratedDataNode : ObjectNode, ISymbolDefinitionNode, IN

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__dehydrated_data");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__dehydrated_data"u8);
}

protected override string GetName(NodeFactory factory) => this.GetMangledName(factory.NameMangler);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public DelegateMarshallingStubMapNode(ExternalReferencesTableNode externalRefere

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__delegate_marshalling_stub_map");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__delegate_marshalling_stub_map"u8);
}
public int Offset => 0;
public override bool IsShareable => false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1469,7 +1469,7 @@ public override ObjectNodeSection GetSection(NodeFactory factory)

public override bool StaticDependenciesAreComputed => true;
public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
=> sb.Append("__writableData").Append(nameMangler.GetMangledTypeName(_type.Type));
=> sb.Append("__writableData"u8).Append(nameMangler.GetMangledTypeName(_type.Type));
public int Offset => 0;
public override bool IsShareable => true;
public override bool ShouldSkipEmittingObjectNode(NodeFactory factory) => _type.ShouldSkipEmittingObjectNode(factory);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public override ObjectNodeSection GetSection(NodeFactory factory)

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append("__optionalfields_");
sb.Append("__optionalfields_"u8);
_owner.AppendMangledName(nameMangler, sb);
}
public int Offset => 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public override void EncodeData(ref ObjectDataBuilder dataBuilder, NodeFactory f

public virtual void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append("_embedded_ptr_");
sb.Append("_embedded_ptr_"u8);
Target.AppendMangledName(nameMangler, sb);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ public ExactMethodInstantiationsNode(ExternalReferencesTableNode externalReferen

public void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__exact_method_instantiations");
sb.Append(nameMangler.CompilationUnitPrefix).Append("__exact_method_instantiations"u8);
}

int INodeWithSize.Size => _size.Value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public FrozenRuntimeTypeNode(TypeDesc type, bool constructed)

public override void AppendMangledName(NameMangler nameMangler, Utf8StringBuilder sb)
{
sb.Append(nameMangler.CompilationUnitPrefix).Append("__RuntimeType_").Append(nameMangler.GetMangledTypeName(_type));
sb.Append(nameMangler.CompilationUnitPrefix).Append("__RuntimeType_"u8).Append(nameMangler.GetMangledTypeName(_type));
}

protected override int ContentSize => ObjectType.InstanceByteCount.AsInt;
Expand Down
Loading