Skip to content

Commit

Permalink
Optimize HttpUtility.UrlEncodeToBytes for (string, Encoding) overload. (
Browse files Browse the repository at this point in the history
#102805)

* Optimize HttpUtility.UrlEncodeToBytes for (string, Encoding) overload.

* feedback

* use SearchValues

* Remove IsUrlSafeChar

* Split UrlEncode to potentially inlinable function

* Address feedback

* remove fallback
  • Loading branch information
TrayanZapryanov authored Jun 3, 2024
1 parent b23dea6 commit e730d83
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 85 deletions.
15 changes: 3 additions & 12 deletions src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,22 +213,13 @@ public static NameValueCollection ParseQueryString(string query, Encoding encodi
}

[return: NotNullIfNotNull(nameof(bytes))]
public static byte[]? UrlDecodeToBytes(byte[]? bytes) => bytes == null ? null : HttpEncoder.UrlDecode(bytes.AsSpan(0, bytes.Length));
public static byte[]? UrlDecodeToBytes(byte[]? bytes) => bytes == null ? null : HttpEncoder.UrlDecode(bytes);

[return: NotNullIfNotNull(nameof(str))]
public static byte[]? UrlEncodeToBytes(string? str, Encoding e)
{
if (str == null)
{
return null;
}

byte[] bytes = e.GetBytes(str);
return HttpEncoder.UrlEncode(bytes, 0, bytes.Length, alwaysCreateNewReturnValue: false);
}
public static byte[]? UrlEncodeToBytes(string? str, Encoding e) => str == null ? null : HttpEncoder.UrlEncode(str, e);

[return: NotNullIfNotNull(nameof(bytes))]
public static byte[]? UrlEncodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlEncode(bytes, offset, count, alwaysCreateNewReturnValue: true);
public static byte[]? UrlEncodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlEncode(bytes, offset, count);

[Obsolete("This method produces non-standards-compliant output and has interoperability issues. The preferred alternative is UrlEncode(String).")]
[return: NotNullIfNotNull(nameof(str))]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
Expand All @@ -13,6 +14,12 @@ namespace System.Web.Util
internal static class HttpEncoder
{
private const int MaxStackAllocUrlLength = 256;
private const int StackallocThreshold = 512;

// Set of safe chars, from RFC 1738.4 minus '+'
private static readonly SearchValues<byte> s_urlSafeBytes = SearchValues.Create(
"!()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"u8);

private static void AppendCharAsUnicodeJavaScript(StringBuilder builder, char c)
{
builder.Append($"\\u{(int)c:x4}");
Expand Down Expand Up @@ -218,8 +225,6 @@ internal static string JavaScriptStringEncode(string? value)

internal static byte[] UrlDecode(ReadOnlySpan<byte> bytes)
{
const int StackallocThreshold = 512;

int decodedBytesCount = 0;
int count = bytes.Length;
Span<byte> decodedBytes = count <= StackallocThreshold ? stackalloc byte[StackallocThreshold] : new byte[count];
Expand Down Expand Up @@ -401,71 +406,40 @@ internal static string UrlDecode(ReadOnlySpan<char> value, Encoding encoding)
}

[return: NotNullIfNotNull(nameof(bytes))]
internal static byte[]? UrlEncode(byte[]? bytes, int offset, int count, bool alwaysCreateNewReturnValue)
{
byte[]? encoded = UrlEncode(bytes, offset, count);

return (alwaysCreateNewReturnValue && (encoded != null) && (encoded == bytes))
? (byte[])encoded.Clone()
: encoded;
}

[return: NotNullIfNotNull(nameof(bytes))]
private static byte[]? UrlEncode(byte[]? bytes, int offset, int count)
internal static byte[]? UrlEncode(byte[]? bytes, int offset, int count)
{
if (!ValidateUrlEncodingParameters(bytes, offset, count))
{
return null;
}

int cSpaces = 0;
int cUnsafe = 0;

// count them first
for (int i = 0; i < count; i++)
{
char ch = (char)bytes[offset + i];

if (ch == ' ')
{
cSpaces++;
}
else if (!HttpEncoderUtility.IsUrlSafeChar(ch))
{
cUnsafe++;
}
}
return UrlEncode(bytes.AsSpan(offset, count));
}

private static byte[] UrlEncode(ReadOnlySpan<byte> bytes)
{
// nothing to expand?
if (cSpaces == 0 && cUnsafe == 0)
if (!NeedsEncoding(bytes, out int cUnsafe))
{
// DevDiv 912606: respect "offset" and "count"
if (0 == offset && bytes.Length == count)
{
return bytes;
}
else
{
byte[] subarray = new byte[count];
Buffer.BlockCopy(bytes, offset, subarray, 0, count);
return subarray;
}
return bytes.ToArray();
}

return UrlEncode(bytes, cUnsafe);
}

private static byte[] UrlEncode(ReadOnlySpan<byte> bytes, int cUnsafe)
{
// expand not 'safe' characters into %XX, spaces to +s
byte[] expandedBytes = new byte[count + cUnsafe * 2];
byte[] expandedBytes = new byte[bytes.Length + cUnsafe * 2];
int pos = 0;

for (int i = 0; i < count; i++)
foreach (byte b in bytes)
{
byte b = bytes[offset + i];
char ch = (char)b;

if (HttpEncoderUtility.IsUrlSafeChar(ch))
if (s_urlSafeBytes.Contains(b))
{
expandedBytes[pos++] = b;
}
else if (ch == ' ')
else if (b == ' ')
{
expandedBytes[pos++] = (byte)'+';
}
Expand All @@ -480,6 +454,43 @@ internal static string UrlDecode(ReadOnlySpan<char> value, Encoding encoding)
return expandedBytes;
}

private static bool NeedsEncoding(ReadOnlySpan<byte> bytes, out int cUnsafe)
{
cUnsafe = 0;

int i = bytes.IndexOfAnyExcept(s_urlSafeBytes);
if (i < 0)
{
return false;
}

foreach (byte b in bytes.Slice(i))
{
if (!s_urlSafeBytes.Contains(b) && b != ' ')
{
cUnsafe++;
}
}

return true;
}

internal static byte[] UrlEncode(string str, Encoding e)
{
if (e.GetMaxByteCount(str.Length) <= StackallocThreshold)
{
Span<byte> byteSpan = stackalloc byte[StackallocThreshold];
int encodedBytes = e.GetBytes(str, byteSpan);

return UrlEncode(byteSpan.Slice(0, encodedBytes));
}

byte[] bytes = e.GetBytes(str);
return NeedsEncoding(bytes, out int cUnsafe)
? UrlEncode(bytes, cUnsafe)
: bytes;
}

// Helper to encode the non-ASCII url characters only
private static string UrlEncodeNonAscii(string str, Encoding e)
{
Expand Down Expand Up @@ -550,7 +561,7 @@ private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count)

if ((ch & 0xff80) == 0)
{ // 7 bit?
if (HttpEncoderUtility.IsUrlSafeChar(ch))
if (s_urlSafeBytes.Contains((byte)ch))
{
sb.Append(ch);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,6 @@ namespace System.Web.Util
{
internal static class HttpEncoderUtility
{
// Set of safe chars, from RFC 1738.4 minus '+'
public static bool IsUrlSafeChar(char ch)
{
if (char.IsAsciiLetterOrDigit(ch))
{
return true;
}

switch (ch)
{
case '-':
case '_':
case '.':
case '!':
case '*':
case '(':
case ')':
return true;
}

return false;
}

// Helper to encode spaces only
[return: NotNullIfNotNull(nameof(str))]
internal static string? UrlEncodeSpaces(string? str) => str != null && str.Contains(' ') ? str.Replace(" ", "%20") : str;
Expand Down

0 comments on commit e730d83

Please sign in to comment.