Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize HttpUtility.UrlEncodeToBytes for (string, Encoding) overload. #102805

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 3 additions & 12 deletions src/libraries/System.Web.HttpUtility/src/System/Web/HttpUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -213,22 +213,13 @@ public static NameValueCollection ParseQueryString(string query, Encoding encodi
}

[return: NotNullIfNotNull(nameof(bytes))]
public static byte[]? UrlDecodeToBytes(byte[]? bytes) => bytes == null ? null : HttpEncoder.UrlDecode(bytes.AsSpan(0, bytes.Length));
public static byte[]? UrlDecodeToBytes(byte[]? bytes) => bytes == null ? null : HttpEncoder.UrlDecode(bytes);

[return: NotNullIfNotNull(nameof(str))]
public static byte[]? UrlEncodeToBytes(string? str, Encoding e)
{
if (str == null)
{
return null;
}

byte[] bytes = e.GetBytes(str);
return HttpEncoder.UrlEncode(bytes, 0, bytes.Length, alwaysCreateNewReturnValue: false);
}
public static byte[]? UrlEncodeToBytes(string? str, Encoding e) => str == null ? null : HttpEncoder.UrlEncode(str, e);

[return: NotNullIfNotNull(nameof(bytes))]
public static byte[]? UrlEncodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlEncode(bytes, offset, count, alwaysCreateNewReturnValue: true);
public static byte[]? UrlEncodeToBytes(byte[]? bytes, int offset, int count) => HttpEncoder.UrlEncode(bytes, offset, count);

[Obsolete("This method produces non-standards-compliant output and has interoperability issues. The preferred alternative is UrlEncode(String).")]
[return: NotNullIfNotNull(nameof(str))]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Buffers;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Net;
using System.Runtime.CompilerServices;
using System.Text;

namespace System.Web.Util
{
internal static class HttpEncoder
{
private const int MaxStackAllocUrlLength = 256;
private const int StackallocThreshold = 512;

private static void AppendCharAsUnicodeJavaScript(StringBuilder builder, char c)
{
builder.Append($"\\u{(int)c:x4}");
Expand Down Expand Up @@ -218,8 +222,6 @@ internal static string JavaScriptStringEncode(string? value)

internal static byte[] UrlDecode(ReadOnlySpan<byte> bytes)
{
const int StackallocThreshold = 512;

int decodedBytesCount = 0;
int count = bytes.Length;
Span<byte> decodedBytes = count <= StackallocThreshold ? stackalloc byte[StackallocThreshold] : new byte[count];
Expand Down Expand Up @@ -401,71 +403,40 @@ internal static string UrlDecode(ReadOnlySpan<char> value, Encoding encoding)
}

[return: NotNullIfNotNull(nameof(bytes))]
internal static byte[]? UrlEncode(byte[]? bytes, int offset, int count, bool alwaysCreateNewReturnValue)
{
byte[]? encoded = UrlEncode(bytes, offset, count);

return (alwaysCreateNewReturnValue && (encoded != null) && (encoded == bytes))
? (byte[])encoded.Clone()
: encoded;
}

[return: NotNullIfNotNull(nameof(bytes))]
private static byte[]? UrlEncode(byte[]? bytes, int offset, int count)
internal static byte[]? UrlEncode(byte[]? bytes, int offset, int count)
{
if (!ValidateUrlEncodingParameters(bytes, offset, count))
{
return null;
}

int cSpaces = 0;
int cUnsafe = 0;

// count them first
for (int i = 0; i < count; i++)
{
char ch = (char)bytes[offset + i];

if (ch == ' ')
{
cSpaces++;
}
else if (!HttpEncoderUtility.IsUrlSafeChar(ch))
{
cUnsafe++;
}
}
return UrlEncode(bytes.AsSpan(offset, count));
}

private static byte[] UrlEncode(ReadOnlySpan<byte> bytes)
{
// nothing to expand?
if (cSpaces == 0 && cUnsafe == 0)
if (!NeedsEncoding(bytes, out int cUnsafe))
{
// DevDiv 912606: respect "offset" and "count"
if (0 == offset && bytes.Length == count)
{
return bytes;
}
else
{
byte[] subarray = new byte[count];
Buffer.BlockCopy(bytes, offset, subarray, 0, count);
return subarray;
}
return bytes.ToArray();
}

return UrlEncode(bytes, cUnsafe);
}

private static byte[] UrlEncode(ReadOnlySpan<byte> bytes, int cUnsafe)
{
// expand not 'safe' characters into %XX, spaces to +s
byte[] expandedBytes = new byte[count + cUnsafe * 2];
byte[] expandedBytes = new byte[bytes.Length + cUnsafe * 2];
int pos = 0;

for (int i = 0; i < count; i++)
foreach (byte b in bytes)
{
byte b = bytes[offset + i];
char ch = (char)b;

if (HttpEncoderUtility.IsUrlSafeChar(ch))
if (s_urlSafeBytes.Contains(b))
{
expandedBytes[pos++] = b;
}
else if (ch == ' ')
else if (b == 32) // ' '
TrayanZapryanov marked this conversation as resolved.
Show resolved Hide resolved
{
expandedBytes[pos++] = (byte)'+';
}
Expand All @@ -480,6 +451,56 @@ internal static string UrlDecode(ReadOnlySpan<char> value, Encoding encoding)
return expandedBytes;
}

// Set of safe chars, from RFC 1738.4 minus '+'
private static readonly SearchValues<byte> s_urlSafeBytes = SearchValues.Create(
"!()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"u8);
TrayanZapryanov marked this conversation as resolved.
Show resolved Hide resolved
private static bool NeedsEncoding(ReadOnlySpan<byte> bytes, out int cUnsafe)
{
cUnsafe = 0;

int i = bytes.IndexOfAnyExcept(s_urlSafeBytes);
if (i < 0)
{
return false;
}

foreach (byte b in bytes.Slice(i))
{
if (!s_urlSafeBytes.Contains(b) && b != ' ')
{
cUnsafe++;
}
}

return true;
}

internal static byte[] UrlEncode(string str, Encoding e)
TrayanZapryanov marked this conversation as resolved.
Show resolved Hide resolved
{
if (e.GetMaxByteCount(str.Length) <= StackallocThreshold)
{
Span<byte> byteSpan = stackalloc byte[StackallocThreshold];
int encodedBytes = e.GetBytes(str, byteSpan);

return UrlEncode(byteSpan.Slice(0, encodedBytes));
}

return Fallback(str, e);

[MethodImpl(MethodImplOptions.NoInlining)]
static byte[] Fallback(string str, Encoding e)
{
byte[] bytes = e.GetBytes(str);
if (!NeedsEncoding(bytes, out int cUnsafe))
{
// return encoded byte[] if nothing to expand
return bytes;
}

return UrlEncode(bytes, cUnsafe);
}
TrayanZapryanov marked this conversation as resolved.
Show resolved Hide resolved
}

// Helper to encode the non-ASCII url characters only
private static string UrlEncodeNonAscii(string str, Encoding e)
{
Expand Down Expand Up @@ -550,7 +571,7 @@ private static byte[] UrlEncodeNonAscii(byte[] bytes, int offset, int count)

if ((ch & 0xff80) == 0)
{ // 7 bit?
if (HttpEncoderUtility.IsUrlSafeChar(ch))
if (s_urlSafeBytes.Contains((byte)ch))
{
sb.Append(ch);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,6 @@ namespace System.Web.Util
{
internal static class HttpEncoderUtility
{
// Set of safe chars, from RFC 1738.4 minus '+'
public static bool IsUrlSafeChar(char ch)
{
if (char.IsAsciiLetterOrDigit(ch))
{
return true;
}

switch (ch)
{
case '-':
case '_':
case '.':
case '!':
case '*':
case '(':
case ')':
return true;
}

return false;
}

// Helper to encode spaces only
[return: NotNullIfNotNull(nameof(str))]
internal static string? UrlEncodeSpaces(string? str) => str != null && str.Contains(' ') ? str.Replace(" ", "%20") : str;
Expand Down