Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core(unminified-javascript): replace esprima with custom tokenizer #5925

Merged
merged 7 commits into from
Sep 27, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 2 additions & 52 deletions lighthouse-core/audits/byte-efficiency/unminified-css.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
const ByteEfficiencyAudit = require('./byte-efficiency-audit');
const UnusedCSSRules = require('./unused-css-rules');
const i18n = require('../../lib/i18n/i18n.js');
const computeTokenLength = require('../../lib/minification-estimator').computeCSSTokenLength;

const UIStrings = {
/** Imperative title of a Lighthouse audit that tells the user to minify (remove whitespace) the page's CSS code. This is displayed in a list of audit titles that Lighthouse generates. */
Expand Down Expand Up @@ -46,58 +47,7 @@ class UnminifiedCSS extends ByteEfficiencyAudit {
* @return {number}
*/
static computeTokenLength(content) {
let totalTokenLength = 0;
let isInComment = false;
let isInLicenseComment = false;
let isInString = false;
let stringOpenChar = null;

for (let i = 0; i < content.length; i++) {
const twoChars = content.substr(i, 2);
const char = twoChars.charAt(0);

const isWhitespace = char === ' ' || char === '\n' || char === '\t';
const isAStringOpenChar = char === `'` || char === '"';

if (isInComment) {
if (isInLicenseComment) totalTokenLength++;

if (twoChars === '*/') {
if (isInLicenseComment) totalTokenLength++;
isInComment = false;
i++;
}
} else if (isInString) {
totalTokenLength++;
if (char === '\\') {
totalTokenLength++;
i++;
} else if (char === stringOpenChar) {
isInString = false;
}
} else {
if (twoChars === '/*') {
isInComment = true;
isInLicenseComment = content.charAt(i + 2) === '!';
if (isInLicenseComment) totalTokenLength += 2;
i++;
} else if (isAStringOpenChar) {
isInString = true;
stringOpenChar = char;
totalTokenLength++;
} else if (!isWhitespace) {
totalTokenLength++;
}
}
}

// If the content contained unbalanced comments, it's either invalid or we had a parsing error.
// Report the token length as the entire string so it will be ignored.
if (isInComment || isInString) {
return content.length;
}

return totalTokenLength;
return computeTokenLength(content);
}

/**
Expand Down
14 changes: 2 additions & 12 deletions lighthouse-core/audits/byte-efficiency/unminified-javascript.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
'use strict';

const ByteEfficiencyAudit = require('./byte-efficiency-audit');
const esprima = require('esprima');
const i18n = require('../../lib/i18n/i18n.js');
const computeTokenLength = require('../../lib/minification-estimator').computeJSTokenLength;

const UIStrings = {
/** Imperative title of a Lighthouse audit that tells the user to minify the page’s JS code to reduce file size. This is displayed in a list of audit titles that Lighthouse generates. */
Expand Down Expand Up @@ -53,17 +53,7 @@ class UnminifiedJavaScript extends ByteEfficiencyAudit {
*/
static computeWaste(scriptContent, networkRecord) {
const contentLength = scriptContent.length;
let totalTokenLength = 0;

/** @type {Array<esprima.Token> & {errors: Error[]}} */
const tokens = (esprima.tokenize(scriptContent, {tolerant: true}));
if (!tokens.length && tokens.errors && tokens.errors.length) {
throw tokens.errors[0];
}

for (const token of tokens) {
totalTokenLength += token.value.length;
}
const totalTokenLength = computeTokenLength(scriptContent);

const totalBytes = ByteEfficiencyAudit.estimateTransferSize(networkRecord, contentLength,
'Script');
Expand Down
162 changes: 162 additions & 0 deletions lighthouse-core/lib/minification-estimator.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/**
* @license Copyright 2018 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

// https://www.ecma-international.org/ecma-262/9.0/index.html#sec-punctuators
// eslint-disable-next-line max-len
const PUNCTUATOR_REGEX = /(return|{|\(|\[|\.\.\.|;|,|<|>|<=|>=|==|!=|===|!==|\+|-|\*|%|\*\*|\+\+|--|<<|>>|>>>|&|\||\^|!|~|&&|\|\||\?|:|=|\+=|-=|\*=|%=|\*\*=|<<=|>>=|>>>=|&=|\|=|\^=|=>|\/|\/=|\})$/;
const WHITESPACE_REGEX = /( |\n|\t)+$/;

/**
* Look backwards from `startPosition` in `content` for an ECMAScript punctuator.
* This is used to differentiate a RegExp from a divide statement.
* If a punctuator immediately precedes a lone `/`, the `/` must be the start of a RegExp.
*
* @param {string} content
* @param {number} startPosition
*/
function hasPunctuatorBefore(content, startPosition) {
for (let i = startPosition; i > 0; i--) {
// Try to grab at least 6 characters so we can check for `return`
const sliceStart = Math.max(0, i - 6);
const precedingCharacters = content.slice(sliceStart, i);
// Skip over any ending whitespace
if (WHITESPACE_REGEX.test(precedingCharacters)) continue;
// Check if it's a punctuator
return PUNCTUATOR_REGEX.test(precedingCharacters);
}

// The beginning of the content counts too for our purposes.
// i.e. a script can't start with a divide symbol
return true;
}


/**
*
* @param {string} content
* @param {{singlelineComments: boolean, regex: boolean}} features
*/
function computeTokenLength(content, features) {
let totalTokenLength = 0;
let isInSinglelineComment = false;
let isInMultilineComment = false;
let isInLicenseComment = false;
let isInString = false;
let isInRegex = false;
let stringOpenChar = null;

for (let i = 0; i < content.length; i++) {
const twoChars = content.substr(i, 2);
const char = twoChars.charAt(0);

const isWhitespace = char === ' ' || char === '\n' || char === '\t';
const isAStringOpenChar = char === `'` || char === '"' || char === '`';

if (isInSinglelineComment) {
if (char === '\n') {
// End the comment when you hit a newline
isInSinglelineComment = false;
}
} else if (isInMultilineComment) {
// License comments count
if (isInLicenseComment) totalTokenLength++;

if (twoChars === '*/') {
// License comments count, account for the '/' character we're skipping over
if (isInLicenseComment) totalTokenLength++;
// End the comment when we hit the closing sequence
isInMultilineComment = false;
// Skip over the '/' character since we've already processed it
i++;
}
} else if (isInString) {
// String characters count
totalTokenLength++;

if (char === '\\') {
// Skip over any escaped characters
totalTokenLength++;
i++;
} else if (char === stringOpenChar) {
// End the string when we hit the same stringOpenCharacter
isInString = false;
// console.log(i, 'exiting string', stringOpenChar)
}
} else if (isInRegex) {
// Regex characters count
totalTokenLength++;

if (char === '\\') {
// Skip over any escaped characters
totalTokenLength++;
i++;
} else if (char === '/') {
// End the string when we hit the regex close character
isInRegex = false;
// console.log(i, 'leaving regex', char)
}
} else {
// We're not in any particular token mode, look for the start of different
if (twoChars === '/*') {
// Start the multi-line comment
isInMultilineComment = true;
// Check if it's a license comment so we know whether to count it
isInLicenseComment = content.charAt(i + 2) === '!';
// += 2 because we are processing 2 characters, not just 1
if (isInLicenseComment) totalTokenLength += 2;
// Skip over the '*' character since we've already processed it
i++;
} else if (twoChars === '//' && features.singlelineComments) {
// Start the single-line comment
isInSinglelineComment = true;
isInMultilineComment = false;
isInLicenseComment = false;
// Skip over the second '/' character since we've already processed it
i++;
} else if (char === '/' && features.regex && hasPunctuatorBefore(content, i)) {
// Start the regex
isInRegex = true;
// Regex characters count
totalTokenLength++;
} else if (isAStringOpenChar) {
// Start the string
isInString = true;
// Save the open character for later so we know when to close it
stringOpenChar = char;
// String characters count
totalTokenLength++;
} else if (!isWhitespace) {
// All non-whitespace characters count
totalTokenLength++;
}
}
}

// If the content contained unbalanced comments, it's either invalid or we had a parsing error.
// Report the token length as the entire string so it will be ignored.
if (isInMultilineComment || isInString) {
return content.length;
}

return totalTokenLength;
}

/**
* @param {string} content
*/
function computeJSTokenLength(content) {
return computeTokenLength(content, {singlelineComments: true, regex: true});
}

/**
* @param {string} content
*/
function computeCSSTokenLength(content) {
return computeTokenLength(content, {singlelineComments: false, regex: false});
}

module.exports = {computeJSTokenLength, computeCSSTokenLength};
103 changes: 0 additions & 103 deletions lighthouse-core/test/audits/byte-efficiency/unminified-css-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,109 +13,6 @@ const assert = require('assert');

const resourceType = 'Stylesheet';
describe('Page uses optimized css', () => {
describe('#computeTokenLength', () => {
it('should compute length of meaningful content', () => {
const full = `
/*
* a complicated comment
* that is
* several
* lines
*/
.my-class {
/* a simple comment */
width: 100px;
height: 100px;
}
`;

const minified = '.my-class{width:100px;height:100px;}';
assert.equal(UnminifiedCssAudit.computeTokenLength(full), minified.length);
});

it('should handle string edge cases', () => {
const pairs = [
['.my-class { content: "/*"; }', '.my-class{content:"/*";}'],
['.my-class { content: \'/* */\'; }', '.my-class{content:\'/* */\';}'],
['.my-class { content: "/*\\\\a"; }', '.my-class{content:"/*\\\\a";}'],
['.my-class { content: "/*\\"a"; }', '.my-class{content:"/*\\"a";}'],
['.my-class { content: "hello }', '.my-class { content: "hello }'],
['.my-class { content: "hello" }', '.my-class{content:"hello"}'],
];

for (const [full, minified] of pairs) {
assert.equal(
UnminifiedCssAudit.computeTokenLength(full),
minified.length,
`did not handle ${full} properly`
);
}
});

it('should handle comment edge cases', () => {
const full = `
/* here is a cool "string I found" */
.my-class {
content: "/*";
}
`;

const minified = '.my-class{content:"/*";}';
assert.equal(UnminifiedCssAudit.computeTokenLength(full), minified.length);
});

it('should handle license comments', () => {
const full = `
/*!
* @LICENSE
* Apache 2.0
*/
.my-class {
width: 100px;
}
`;

const minified = `/*!
* @LICENSE
* Apache 2.0
*/.my-class{width:100px;}`;
assert.equal(UnminifiedCssAudit.computeTokenLength(full), minified.length);
});

it('should handle unbalanced comments', () => {
const full = `
/*
.my-class {
width: 100px;
}
`;

assert.equal(UnminifiedCssAudit.computeTokenLength(full), full.length);
});

it('should handle data URIs', () => {
const uri = 'data:image/jpeg;base64,asdfadiosgjwiojasfaasd';
const full = `
.my-other-class {
background: data("${uri}");
height: 100px;
}
`;

const minified = `.my-other-class{background:data("${uri}");height:100px;}`;
assert.equal(UnminifiedCssAudit.computeTokenLength(full), minified.length);
});

it('should handle reeally long strings', () => {
let hugeCss = '';
for (let i = 0; i < 10000; i++) {
hugeCss += `.my-class-${i} { width: 100px; height: 100px; }\n`;
}

assert.ok(UnminifiedCssAudit.computeTokenLength(hugeCss) < 0.9 * hugeCss.length);
});
});

it('fails when given unminified stylesheets', () => {
const auditResult = UnminifiedCssAudit.audit_(
{
Expand Down
Loading