-
Notifications
You must be signed in to change notification settings - Fork 9.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
core: correctly truncate unicode strings #14911
Changes from all commits
54c9e3f
201d506
7f8a37d
0be3408
d997981
de6a394
2407a0c
da726e9
4203cc9
b709724
14fcf2a
5893c35
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,6 +7,7 @@ | |
import {makeComputedArtifact} from './computed-artifact.js'; | ||
import {ByteEfficiencyAudit} from '../audits/byte-efficiency/byte-efficiency-audit.js'; | ||
import {NetworkRecords} from './network-records.js'; | ||
import {Util} from '../../shared/util.js'; | ||
|
||
const PREVIEW_LENGTH = 100; | ||
|
||
|
@@ -87,8 +88,7 @@ class UnusedCSS { | |
* @return {string} | ||
*/ | ||
static determineContentPreview(content) { | ||
let preview = (content || '') | ||
.slice(0, PREVIEW_LENGTH * 5) | ||
let preview = Util.truncate(content || '', PREVIEW_LENGTH * 5, '') | ||
.replace(/( {2,}|\t)+/g, ' ') // remove leading indentation if present | ||
.replace(/\n\s+}/g, '\n}') // completely remove indentation of closing braces | ||
.trim(); // trim the leading whitespace | ||
|
@@ -101,16 +101,17 @@ class UnusedCSS { | |
firstRuleStart > firstRuleEnd || | ||
firstRuleStart > PREVIEW_LENGTH) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if |
||
// We couldn't determine the first rule-set or it's not within the preview | ||
preview = preview.slice(0, PREVIEW_LENGTH) + '...'; | ||
preview = Util.truncate(preview, PREVIEW_LENGTH); | ||
} else if (firstRuleEnd < PREVIEW_LENGTH) { | ||
// The entire first rule-set fits within the preview | ||
preview = preview.slice(0, firstRuleEnd + 1) + ' ...'; | ||
preview = preview.slice(0, firstRuleEnd + 1) + ' …'; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because |
||
} else { | ||
// The first rule-set doesn't fit within the preview, just show as many as we can | ||
const lastSemicolonIndex = preview.slice(0, PREVIEW_LENGTH).lastIndexOf(';'); | ||
const truncated = Util.truncate(preview, PREVIEW_LENGTH, ''); | ||
const lastSemicolonIndex = truncated.lastIndexOf(';'); | ||
preview = lastSemicolonIndex < firstRuleStart ? | ||
preview.slice(0, PREVIEW_LENGTH) + '... } ...' : | ||
preview.slice(0, lastSemicolonIndex + 1) + ' ... } ...'; | ||
truncated + '… } …' : | ||
preview.slice(0, lastSemicolonIndex + 1) + ' … } …'; | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -138,7 +138,7 @@ class UrlUtils { | |
static elideDataURI(url) { | ||
try { | ||
const parsed = new URL(url); | ||
return parsed.protocol === 'data:' ? url.slice(0, 100) : url; | ||
return parsed.protocol === 'data:' ? Util.truncate(url, 100) : url; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FWIW data URLs can only contain ascii characters There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, but at least inclusion of the ellipse suffix is an improvement here. |
||
} catch (e) { | ||
return url; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -153,6 +153,39 @@ class Util { | |
return segments; | ||
} | ||
|
||
/** | ||
* @param {string} string | ||
* @param {number} characterLimit | ||
* @param {string} ellipseSuffix | ||
*/ | ||
static truncate(string, characterLimit, ellipseSuffix = '…') { | ||
// Early return for the case where there are fewer bytes than the character limit. | ||
if (string.length <= characterLimit) { | ||
connorjclark marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return string; | ||
} | ||
|
||
const segmenter = new Intl.Segmenter(undefined, {granularity: 'grapheme'}); | ||
const iterator = segmenter.segment(string)[Symbol.iterator](); | ||
|
||
let lastSegmentIndex = 0; | ||
for (let i = 0; i <= characterLimit - ellipseSuffix.length; i++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. technically |
||
const result = iterator.next(); | ||
if (result.done) { | ||
return string; | ||
} | ||
|
||
lastSegmentIndex = result.value.index; | ||
} | ||
|
||
for (let i = 0; i < ellipseSuffix.length; i++) { | ||
if (iterator.next().done) { | ||
return string; | ||
} | ||
} | ||
|
||
return string.slice(0, lastSegmentIndex) + ellipseSuffix; | ||
} | ||
|
||
/** | ||
* @param {URL} parsedUrl | ||
* @param {{numPathParts?: number, preserveQuery?: boolean, preserveHost?: boolean}=} options | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
feels weird that this isn't a direct export