Skip to content

Commit

Permalink
core: correctly truncate unicode strings (#14911)
Browse files Browse the repository at this point in the history
  • Loading branch information
connorjclark authored Mar 22, 2023
1 parent 6ee4c1f commit 90797a1
Show file tree
Hide file tree
Showing 11 changed files with 132 additions and 39 deletions.
4 changes: 2 additions & 2 deletions cli/test/smokehouse/test-definitions/byte-efficiency.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,12 @@ const expectations = {
},
{
// /some-custom-url.js,
url: 'inline: \n function unusedFunction() {\n // Un...',
url: 'inline: \n function unusedFunction() {\n // U…',
wastedBytes: '6690 +/- 100',
wastedPercent: '99.6 +/- 0.1',
},
{
url: 'inline: \n // Used block #1\n // FILLER DATA JUS...',
url: 'inline: \n // Used block #1\n // FILLER DATA JU…',
wastedBytes: '6569 +/- 100',
wastedPercent: 100,
},
Expand Down
2 changes: 1 addition & 1 deletion core/audits/audit.js
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ class Audit {
const lineNumber = lineIndex + 1;
/** @type LH.Audit.Details.SnippetValue['lines'][0] */
const lineDetail = {
content: line.slice(0, maxLineLength),
content: Util.truncate(line, maxLineLength),
lineNumber,
};
if (line.length > maxLineLength) {
Expand Down
3 changes: 2 additions & 1 deletion core/audits/byte-efficiency/unminified-javascript.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {ByteEfficiencyAudit} from './byte-efficiency-audit.js';
import * as i18n from '../../lib/i18n/i18n.js';
import {computeJSTokenLength as computeTokenLength} from '../../lib/minification-estimator.js';
import {getRequestForScript, isInline} from '../../lib/script-helpers.js';
import {Util} from '../../../shared/util.js';

const UIStrings = {
/** Imperative title of a Lighthouse audit that tells the user to minify the page’s JS code to reduce file size. This is displayed in a list of audit titles that Lighthouse generates. */
Expand Down Expand Up @@ -84,7 +85,7 @@ class UnminifiedJavaScript extends ByteEfficiencyAudit {
const networkRecord = getRequestForScript(networkRecords, script);

const displayUrl = isInline(script) ?
`inline: ${script.content.substring(0, 40)}...` :
`inline: ${Util.truncate(script.content, 40)}` :
script.url;
try {
const result = UnminifiedJavaScript.computeWaste(script.content, displayUrl, networkRecord);
Expand Down
15 changes: 8 additions & 7 deletions core/computed/unused-css.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import {makeComputedArtifact} from './computed-artifact.js';
import {ByteEfficiencyAudit} from '../audits/byte-efficiency/byte-efficiency-audit.js';
import {NetworkRecords} from './network-records.js';
import {Util} from '../../shared/util.js';

const PREVIEW_LENGTH = 100;

Expand Down Expand Up @@ -87,8 +88,7 @@ class UnusedCSS {
* @return {string}
*/
static determineContentPreview(content) {
let preview = (content || '')
.slice(0, PREVIEW_LENGTH * 5)
let preview = Util.truncate(content || '', PREVIEW_LENGTH * 5, '')
.replace(/( {2,}|\t)+/g, ' ') // remove leading indentation if present
.replace(/\n\s+}/g, '\n}') // completely remove indentation of closing braces
.trim(); // trim the leading whitespace
Expand All @@ -101,16 +101,17 @@ class UnusedCSS {
firstRuleStart > firstRuleEnd ||
firstRuleStart > PREVIEW_LENGTH) {
// We couldn't determine the first rule-set or it's not within the preview
preview = preview.slice(0, PREVIEW_LENGTH) + '...';
preview = Util.truncate(preview, PREVIEW_LENGTH);
} else if (firstRuleEnd < PREVIEW_LENGTH) {
// The entire first rule-set fits within the preview
preview = preview.slice(0, firstRuleEnd + 1) + ' ...';
preview = preview.slice(0, firstRuleEnd + 1) + ' ';
} else {
// The first rule-set doesn't fit within the preview, just show as many as we can
const lastSemicolonIndex = preview.slice(0, PREVIEW_LENGTH).lastIndexOf(';');
const truncated = Util.truncate(preview, PREVIEW_LENGTH, '');
const lastSemicolonIndex = truncated.lastIndexOf(';');
preview = lastSemicolonIndex < firstRuleStart ?
preview.slice(0, PREVIEW_LENGTH) + '... } ...' :
preview.slice(0, lastSemicolonIndex + 1) + ' ... } ...';
truncated + ' } ' :
preview.slice(0, lastSemicolonIndex + 1) + ' } ';
}
}

Expand Down
15 changes: 15 additions & 0 deletions core/gather/driver/prepare.js
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,23 @@ async function prepareTargetForTimespanMode(driver, settings) {
disableThrottling: false,
blockedUrlPatterns: undefined,
});
await warmUpIntlSegmenter(driver);

log.timeEnd(status);
}

/**
* Ensure the `Intl.Segmenter` created in `pageFunctions.truncate` is cached by v8 before
* recording the trace begins.
*
* @param {LH.Gatherer.FRTransitionalDriver} driver
*/
async function warmUpIntlSegmenter(driver) {
await driver.executionContext.evaluate(pageFunctions.truncate, {
args: ['aaa', 2],
});
}

/**
* Prepares a target to be analyzed in navigation mode by enabling protocol domains, emulation, and new document
* handlers for global APIs or error handling.
Expand All @@ -197,6 +210,8 @@ async function prepareTargetForNavigationMode(driver, settings) {
await shimRequestIdleCallbackOnNewDocument(driver, settings);
}

await warmUpIntlSegmenter(driver);

log.timeEnd(status);
}

Expand Down
63 changes: 41 additions & 22 deletions core/lib/page-functions.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/

import {Util} from '../../shared/util.js';

/**
* @fileoverview
* Helper functions that are passed by `toString()` by Driver to be evaluated in target page.
Expand Down Expand Up @@ -132,10 +134,9 @@ function getOuterHTMLSnippet(element, ignoreAttrs = [], snippetCharacterLimit =
}

// Elide attribute value if too long.
if (attributeValue.length > ATTRIBUTE_CHAR_LIMIT) {
attributeValue = attributeValue.slice(0, ATTRIBUTE_CHAR_LIMIT - 1) + '…';
dirty = true;
}
const truncatedString = truncate(attributeValue, ATTRIBUTE_CHAR_LIMIT);
if (truncatedString !== attributeValue) dirty = true;
attributeValue = truncatedString;

if (dirty) {
// Style attributes can be blocked by the CSP if they are set via `setAttribute`.
Expand Down Expand Up @@ -372,22 +373,6 @@ function isPositionFixed(element) {
* @return {string | null}
*/
function getNodeLabel(element) {
// Inline so that audits that import getNodeLabel don't
// also need to import truncate
/**
* @param {string} str
* @param {number} maxLength
* @return {string}
*/
function truncate(str, maxLength) {
if (str.length <= maxLength) {
return str;
}
// Take advantage of string iterator multi-byte character awareness.
// Regular `.slice` will ignore unicode character boundaries and lead to malformed text.
return Array.from(str).slice(0, maxLength - 1).join('') + '…';
}

const tagName = element.tagName.toLowerCase();
// html and body content is too broad to be useful, since they contain all page content
if (tagName !== 'html' && tagName !== 'body') {
Expand Down Expand Up @@ -518,14 +503,47 @@ function getNodeDetails(element) {
return details;
}

/**
*
* @param {string} string
* @param {number} characterLimit
* @return {string}
*/
function truncate(string, characterLimit) {
return Util.truncate(string, characterLimit);
}

/** @type {string} */
const truncateRawString = truncate.toString();
truncate.toString = () => `function truncate(string, characterLimit) {
const Util = { ${Util.truncate} };
return (${truncateRawString})(string, characterLimit);
}`;

/** @type {string} */
const getNodeLabelRawString = getNodeLabel.toString();
getNodeLabel.toString = () => `function getNodeLabel(element) {
${truncate};
return (${getNodeLabelRawString})(element);
}`;

/** @type {string} */
const getOuterHTMLSnippetRawString = getOuterHTMLSnippet.toString();
// eslint-disable-next-line max-len
getOuterHTMLSnippet.toString = () => `function getOuterHTMLSnippet(element, ignoreAttrs = [], snippetCharacterLimit = 500) {
${truncate};
return (${getOuterHTMLSnippetRawString})(element, ignoreAttrs, snippetCharacterLimit);
}`;

/** @type {string} */
const getNodeDetailsRawString = getNodeDetails.toString();
getNodeDetails.toString = () => `function getNodeDetails(element) {
${truncate};
${getNodePath};
${getNodeSelector};
${getBoundingClientRect};
${getOuterHTMLSnippet};
${getNodeLabel};
${getOuterHTMLSnippetRawString};
${getNodeLabelRawString};
return (${getNodeDetailsRawString})(element);
}`;

Expand All @@ -541,4 +559,5 @@ export const pageFunctions = {
isPositionFixed,
wrapRequestIdleCallback,
getBoundingClientRect,
truncate,
};
2 changes: 1 addition & 1 deletion core/lib/url-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ class UrlUtils {
static elideDataURI(url) {
try {
const parsed = new URL(url);
return parsed.protocol === 'data:' ? url.slice(0, 100) : url;
return parsed.protocol === 'data:' ? Util.truncate(url, 100) : url;
} catch (e) {
return url;
}
Expand Down
10 changes: 6 additions & 4 deletions core/test/computed/unused-css-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import assert from 'assert/strict';

import {expect} from 'expect';

import {UnusedCSS} from '../../computed/unused-css.js';

describe('UnusedCSS computed artifact', () => {
Expand All @@ -22,7 +24,7 @@ describe('UnusedCSS computed artifact', () => {
describe('#determineContentPreview', () => {
function assertLinesContained(actual, expected) {
expected.split('\n').forEach(line => {
assert.ok(actual.includes(line.trim()), `${line} is found in preview`);
expect(actual).toContain(line.trim());
});
}

Expand Down Expand Up @@ -50,7 +52,7 @@ describe('UnusedCSS computed artifact', () => {
assertLinesContained(preview(longContent), `
body {
color: white;
} ...
}
`.trim());
});

Expand All @@ -66,7 +68,7 @@ describe('UnusedCSS computed artifact', () => {
assertLinesContained(preview(longContent), `
body {
color: white;
font-size: 20px; ... } ...
font-size: 20px; }
`.trim());
});

Expand All @@ -77,7 +79,7 @@ describe('UnusedCSS computed artifact', () => {
*/
`.trim();

assert.ok(/aaa\.\.\./.test(preview(longContent)));
assert.ok(/aaa/.test(preview(longContent)));
});
});

Expand Down
22 changes: 22 additions & 0 deletions shared/test/util-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -211,4 +211,26 @@ describe('util helpers', () => {
]);
});
});

describe('truncate', () => {
it('truncates based on visual characters', () => {
expect(Util.truncate('aaa', 30)).toEqual('aaa');
expect(Util.truncate('aaa', 3)).toEqual('aaa');
expect(Util.truncate('aaa', 2)).toEqual('a…');
expect(Util.truncate('aaa🥳', 4)).toEqual('aaa🥳');
expect(Util.truncate('aaa🥳', 3)).toEqual('aa…');
expect(Util.truncate('aaa👨‍👨‍👦‍👦', 4)).toEqual('aaa👨‍👨‍👦‍👦');
expect(Util.truncate('aaa👨‍👨‍👦‍👦', 3)).toEqual('aa…');
expect(Util.truncate('देवनागरी', 5)).toEqual('देवनागरी');
expect(Util.truncate('देवनागरी', 4)).toEqual('देवना…');

expect(Util.truncate('aaa', 3, '')).toEqual('aaa');
expect(Util.truncate('aaa', 2, '')).toEqual('aa');

expect(Util.truncate('aaaaa', 5, '...')).toEqual('aaaaa');
expect(Util.truncate('aaaaa', 4, '...')).toEqual('a...');
expect(Util.truncate('aaaaa', 3, '...')).toEqual('...');
expect(Util.truncate('aaaaa', 1, '...')).toEqual('...');
});
});
});
2 changes: 1 addition & 1 deletion shared/tsconfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"extends": "../tsconfig-base.json",
"compilerOptions": {
// Limit defs to base JS.
"lib": ["es2020"],
"lib": ["es2020", "es2022.intl"],
// Only include `@types/node` from node_modules/.
"types": ["node"],
// "listFiles": true,
Expand Down
33 changes: 33 additions & 0 deletions shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,39 @@ class Util {
return segments;
}

/**
* @param {string} string
* @param {number} characterLimit
* @param {string} ellipseSuffix
*/
static truncate(string, characterLimit, ellipseSuffix = '…') {
// Early return for the case where there are fewer bytes than the character limit.
if (string.length <= characterLimit) {
return string;
}

const segmenter = new Intl.Segmenter(undefined, {granularity: 'grapheme'});
const iterator = segmenter.segment(string)[Symbol.iterator]();

let lastSegmentIndex = 0;
for (let i = 0; i <= characterLimit - ellipseSuffix.length; i++) {
const result = iterator.next();
if (result.done) {
return string;
}

lastSegmentIndex = result.value.index;
}

for (let i = 0; i < ellipseSuffix.length; i++) {
if (iterator.next().done) {
return string;
}
}

return string.slice(0, lastSegmentIndex) + ellipseSuffix;
}

/**
* @param {URL} parsedUrl
* @param {{numPathParts?: number, preserveQuery?: boolean, preserveHost?: boolean}=} options
Expand Down

0 comments on commit 90797a1

Please sign in to comment.