From d77a7588cf4f356955e85d91e638a01120195e6b Mon Sep 17 00:00:00 2001 From: Timothy Gu Date: Fri, 3 Feb 2017 17:34:47 -0800 Subject: [PATCH] url: spec-compliant URLSearchParams serializer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/11626 Reviewed-By: Joyee Cheung Reviewed-By: Michaƫl Zasso Reviewed-By: James M Snell Reviewed-By: Daijiro Wachi --- ...cy-vs-whatwg-url-searchparams-serialize.js | 2 +- lib/internal/url.js | 105 ++++++++++++++++-- test/fixtures/url-tests.js | 2 +- test/parallel/test-whatwg-url-constructor.js | 6 +- ...est-whatwg-url-searchparams-constructor.js | 4 +- ...est-whatwg-url-searchparams-stringifier.js | 20 ++-- test/parallel/test-whatwg-url-searchparams.js | 2 +- 7 files changed, 111 insertions(+), 30 deletions(-) diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js index 7e56b5fba6e4f8..2b8d2c36a810b3 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js @@ -7,7 +7,7 @@ const inputs = require('../fixtures/url-inputs.js').searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), method: ['legacy', 'whatwg'], - n: [1e5] + n: [1e6] }); function useLegacy(n, input, prop) { diff --git a/lib/internal/url.js b/lib/internal/url.js index 4af9e34f91b4cb..005f5b66476752 100644 --- a/lib/internal/url.js +++ b/lib/internal/url.js @@ -1,7 +1,7 @@ 'use strict'; const util = require('util'); -const { StorageObject } = require('internal/querystring'); +const { hexTable, StorageObject } = require('internal/querystring'); const binding = process.binding('url'); const context = Symbol('context'); const cannotBeBase = Symbol('cannot-be-base'); @@ -597,18 +597,99 @@ function getParamsFromObject(obj) { return values; } -function getObjectFromParams(array) { - const obj = new StorageObject(); - for (var i = 0; i < array.length; i += 2) { - const name = array[i]; - const value = array[i + 1]; - if (obj[name]) { - obj[name].push(value); - } else { - obj[name] = [value]; +// Adapted from querystring's implementation. +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer +const noEscape = [ +//0, 1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 0x30 - 0x3F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 // 0x70 - 0x7F +]; + +// Special version of hexTable that uses `+` for U+0020 SPACE. +const paramHexTable = hexTable.slice(); +paramHexTable[0x20] = '+'; + +function escapeParam(str) { + const len = str.length; + if (len === 0) + return ''; + + var out = ''; + var lastPos = 0; + + for (var i = 0; i < len; i++) { + var c = str.charCodeAt(i); + + // ASCII + if (c < 0x80) { + if (noEscape[c] === 1) + continue; + if (lastPos < i) + out += str.slice(lastPos, i); + lastPos = i + 1; + out += paramHexTable[c]; + continue; + } + + if (lastPos < i) + out += str.slice(lastPos, i); + + // Multi-byte characters ... + if (c < 0x800) { + lastPos = i + 1; + out += paramHexTable[0xC0 | (c >> 6)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; + } + if (c < 0xD800 || c >= 0xE000) { + lastPos = i + 1; + out += paramHexTable[0xE0 | (c >> 12)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; + continue; } + // Surrogate pair + ++i; + var c2; + if (i < len) + c2 = str.charCodeAt(i) & 0x3FF; + else { + // This branch should never happen because all URLSearchParams entries + // should already be converted to USVString. But, included for + // completion's sake anyway. + c2 = 0; + } + lastPos = i + 1; + c = 0x10000 + (((c & 0x3FF) << 10) | c2); + out += paramHexTable[0xF0 | (c >> 18)] + + paramHexTable[0x80 | ((c >> 12) & 0x3F)] + + paramHexTable[0x80 | ((c >> 6) & 0x3F)] + + paramHexTable[0x80 | (c & 0x3F)]; } - return obj; + if (lastPos === 0) + return str; + if (lastPos < len) + return out + str.slice(lastPos); + return out; +} + +// application/x-www-form-urlencoded serializer +// Ref: https://url.spec.whatwg.org/#concept-urlencoded-serializer +function serializeParams(array) { + const len = array.length; + if (len === 0) + return ''; + + var output = `${escapeParam(array[0])}=${escapeParam(array[1])}`; + for (var i = 2; i < len; i += 2) + output += `&${escapeParam(array[i])}=${escapeParam(array[i + 1])}`; + return output; } // Mainly to mitigate func-name-matching ESLint rule @@ -993,7 +1074,7 @@ defineIDLClass(URLSearchParams.prototype, 'URLSearchParams', { throw new TypeError('Value of `this` is not a URLSearchParams'); } - return querystring.stringify(getObjectFromParams(this[searchParams])); + return serializeParams(this[searchParams]); } }); diff --git a/test/fixtures/url-tests.js b/test/fixtures/url-tests.js index 0e510eb366d0f2..a4e7de9f26b199 100644 --- a/test/fixtures/url-tests.js +++ b/test/fixtures/url-tests.js @@ -4639,7 +4639,7 @@ module.exports = "port": "", "pathname": "/foo/bar", "search": "??a=b&c=d", - // "searchParams": "%3Fa=b&c=d", + "searchParams": "%3Fa=b&c=d", "hash": "" }, "# Scheme only", diff --git a/test/parallel/test-whatwg-url-constructor.js b/test/parallel/test-whatwg-url-constructor.js index c5d70b3f4c1544..c2773b9af105fb 100644 --- a/test/parallel/test-whatwg-url-constructor.js +++ b/test/parallel/test-whatwg-url-constructor.js @@ -120,12 +120,12 @@ function runURLSearchParamTests() { // And in the other direction, altering searchParams propagates // back to 'search'. searchParams.append('i', ' j ') - // assert_equals(url.search, '?e=f&g=h&i=+j+') - // assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') + assert_equals(url.search, '?e=f&g=h&i=+j+') + assert_equals(url.searchParams.toString(), 'e=f&g=h&i=+j+') assert_equals(searchParams.get('i'), ' j ') searchParams.set('e', 'updated') - // assert_equals(url.search, '?e=updated&g=h&i=+j+') + assert_equals(url.search, '?e=updated&g=h&i=+j+') assert_equals(searchParams.get('e'), 'updated') var url2 = bURL('http://example.org/file??a=b&c=d') diff --git a/test/parallel/test-whatwg-url-searchparams-constructor.js b/test/parallel/test-whatwg-url-searchparams-constructor.js index 236d01396095f1..da459fe99c7fb8 100644 --- a/test/parallel/test-whatwg-url-searchparams-constructor.js +++ b/test/parallel/test-whatwg-url-searchparams-constructor.js @@ -11,7 +11,7 @@ const { /* eslint-disable */ var params; // Strict mode fix for WPT. /* WPT Refs: - https://github.com/w3c/web-platform-tests/blob/405394a/url/urlsearchparams-constructor.html + https://github.com/w3c/web-platform-tests/blob/e94c604916/url/urlsearchparams-constructor.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ test(function() { @@ -154,7 +154,7 @@ test(function() { }, "Constructor with sequence of sequences of strings"); [ -// { "input": {"+": "%C2"}, "output": [[" ", "\uFFFD"]], "name": "object with +" }, + { "input": {"+": "%C2"}, "output": [["+", "%C2"]], "name": "object with +" }, { "input": {c: "x", a: "?"}, "output": [["c", "x"], ["a", "?"]], "name": "object with two keys" }, { "input": [["c", "x"], ["a", "?"]], "output": [["c", "x"], ["a", "?"]], "name": "array with two keys" } ].forEach((val) => { diff --git a/test/parallel/test-whatwg-url-searchparams-stringifier.js b/test/parallel/test-whatwg-url-searchparams-stringifier.js index 5f751b1c503d5b..ac09979e027b7c 100644 --- a/test/parallel/test-whatwg-url-searchparams-stringifier.js +++ b/test/parallel/test-whatwg-url-searchparams-stringifier.js @@ -10,14 +10,14 @@ const { test, assert_equals } = common.WPT; https://github.com/w3c/web-platform-tests/blob/8791bed/url/urlsearchparams-stringifier.html License: http://www.w3.org/Consortium/Legal/2008/04-testsuite-copyright.html */ -// test(function() { -// var params = new URLSearchParams(); -// params.append('a', 'b c'); -// assert_equals(params + '', 'a=b+c'); -// params.delete('a'); -// params.append('a b', 'c'); -// assert_equals(params + '', 'a+b=c'); -// }, 'Serialize space'); +test(function() { + var params = new URLSearchParams(); + params.append('a', 'b c'); + assert_equals(params + '', 'a=b+c'); + params.delete('a'); + params.append('a b', 'c'); + assert_equals(params + '', 'a+b=c'); +}, 'Serialize space'); test(function() { var params = new URLSearchParams(); @@ -114,8 +114,8 @@ test(function() { var params; params = new URLSearchParams('a=b&c=d&&e&&'); assert_equals(params.toString(), 'a=b&c=d&e='); - // params = new URLSearchParams('a = b &a=b&c=d%20'); - // assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); + params = new URLSearchParams('a = b &a=b&c=d%20'); + assert_equals(params.toString(), 'a+=+b+&a=b&c=d+'); // The lone '=' _does_ survive the roundtrip. params = new URLSearchParams('a=&a=b'); assert_equals(params.toString(), 'a=&a=b'); diff --git a/test/parallel/test-whatwg-url-searchparams.js b/test/parallel/test-whatwg-url-searchparams.js index e0d1826596704c..7d6df646407269 100644 --- a/test/parallel/test-whatwg-url-searchparams.js +++ b/test/parallel/test-whatwg-url-searchparams.js @@ -7,7 +7,7 @@ const URL = require('url').URL; // Tests below are not from WPT. const serialized = 'a=a&a=1&a=true&a=undefined&a=null&a=%EF%BF%BD' + '&a=%EF%BF%BD&a=%F0%9F%98%80&a=%EF%BF%BD%EF%BF%BD' + - '&a=%5Bobject%20Object%5D'; + '&a=%5Bobject+Object%5D'; const values = ['a', 1, true, undefined, null, '\uD83D', '\uDE00', '\uD83D\uDE00', '\uDE00\uD83D', {}]; const normalizedValues = ['a', '1', 'true', 'undefined', 'null', '\uFFFD',