Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unsafe html should be a breaking flaw #3192

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const VALID_FLAW_CHECKS = new Set([
"bad_pre_tags",
"sectioning",
"heading_links",
"unsafe_html",
]);

// TODO (far future): Switch to "error" when number of flaws drops.
Expand Down
198 changes: 132 additions & 66 deletions build/flaws.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@ const imageminSvgo = require("imagemin-svgo");
const sanitizeFilename = require("sanitize-filename");

const { Archive, Document, Redirect, Image } = require("../content");
const { FLAW_LEVELS } = require("./constants");
const { FLAW_LEVELS, VALID_FLAW_CHECKS } = require("./constants");
const {
INTERACTIVE_EXAMPLES_BASE_URL,
LIVE_SAMPLES_BASE_URL,
} = require("../kumascript/src/constants");
const { packageBCD } = require("./resolve-bcd");
const {
findMatchesInText,
Expand All @@ -22,26 +26,134 @@ const {
const { humanFileSize } = require("./utils");
const { VALID_MIME_TYPES } = require("../filecheck/constants");

function injectFlaws(doc, $, options, { rawContent }) {
function injectFlaws(doc, $, options, document) {
if (doc.isArchive) return;

injectBrokenLinksFlaws(
options.flawLevels.get("broken_links"),
doc,
$,
rawContent
);
const flawChecks = [
["unsafe_html", injectUnsafeHTMLFlaws, false],
["broken_links", injectBrokenLinksFlaws, true],
["bad_bcd_queries", injectBadBCDQueriesFlaws, false],
["bad_pre_tags", injectPreTagFlaws, false],
["heading_links", injectHeadingLinksFlaws, false],
];

escattone marked this conversation as resolved.
Show resolved Hide resolved
// Note that some flaw checking functions need to always run. Even if we're not
// recording the flaws, the checks that it does are important for regular
// building.

for (const [flawName, func, alwaysRun] of flawChecks) {
// Sanity check the list of flaw names that they're all recognized.
// Basically a cheap enum check.
if (!VALID_FLAW_CHECKS.has(flawName)) {
throw new Error(`'${flawName}' is not a valid flaw check name`);
}

injectBadBCDQueriesFlaws(options.flawLevels.get("bad_bcd_queries"), doc, $);
const level = options.flawLevels.get(flawName);
if (!alwaysRun && level === FLAW_LEVELS.IGNORE) {
continue;
}

injectPreTagFlaws(options.flawLevels.get("bad_pre_tags"), doc, $, rawContent);
// The flaw injection function will mutate the `doc.flaws` object.
func(doc, $, document, level);

injectHeadingLinksFlaws(
options.flawLevels.get("heading_links"),
doc,
$,
rawContent
);
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws[flawName] &&
doc.flaws[flawName].length > 0
) {
throw new Error(
`${flawName} flaws: ${doc.flaws[flawName].map((f) => f.explanation)}`
);
}
}
}

function injectUnsafeHTMLFlaws(doc, $, { rawContent }) {
function addFlaw(element, explanation) {
if (!("unsafe_html" in doc.flaws)) {
doc.flaws.unsafe_html = [];
}
const id = `unsafe_html${doc.flaws.unsafe_html.length + 1}`;
let html = $.html($(element));
$(element).replaceWith($("<code>").addClass("unsafe-html").text(html));
// Some nasty tags are so broken they can make the HTML become more or less
// the whole page. E.g. `<script\x20type="text/javascript">`.
if (html.length > 100) {
html = html.slice(0, Math.min(html.indexOf("\n"), 100)) + "…";
}
// Perhaps in the future we can make it possibly fixable to delete it.
const fixable = false;
const suggestion = null;

const flaw = {
explanation,
id,
fixable,
html,
suggestion,
};
for (const { line, column } of findMatchesInText(html, rawContent)) {
// This might not find anything because the HTML might have mutated
// slightly because of how cheerio parses it. But it doesn't hurt to try.
flaw.line = line;
flaw.column = column;
}

doc.flaws.unsafe_html.push(flaw);
}

const safeIFrameSrcs = [
LIVE_SAMPLES_BASE_URL.toLowerCase(),
INTERACTIVE_EXAMPLES_BASE_URL.toLowerCase(),
// EmbedGHLiveSample.ejs
"https://mdn.github.io",
// EmbedYouTube.ejs
"https://www.youtube-nocookie.com",
// JSFiddleEmbed.ejs
"https://jsfiddle.net",
// EmbedTest262ReportResultsTable.ejs
"https://test262.report",
];

$("script, embed, object, iframe").each((i, element) => {
const { tagName } = element;
if (tagName === "iframe") {
// For iframes we only check the 'src' value
const src = $(element).attr("src");
if (!safeIFrameSrcs.find((s) => src.toLowerCase().startsWith(s))) {
addFlaw(element, `Unsafe <iframe> 'src' value (${src})`);
}
} else {
addFlaw(element, `<${tagName}> tag found`);
}
});

$("*").each((i, element) => {
const { tagName } = element;
// E.g. `<script\x20type="text/javascript">javascript:alert(1);</script>`
if (tagName.startsWith("script")) {
addFlaw(element, `possible <script> tag`);
}

const checkValueAttributes = new Set(["style", "href"]);
for (const key in element.attribs) {
// No need to lowercase the `key` because it's already always lowercased
// by cheerio.
// This regex will match on `\xa0onload` and `onmousover` but
// not `fond` or `stompon`.
if (/(\\x[a-f0-9]{2}|\b)on\w+/.test(key)) {
addFlaw(element, `'${key}' on-handler found in ${tagName}`);
} else if (checkValueAttributes.has(key)) {
const value = element.attribs[key];
if (value && /(^|\\x[a-f0-9]{2})javascript:/i.test(value)) {
addFlaw(
element,
`'javascript:' expression found inside 'style' attribute in ${tagName}`
);
}
}
}
});
}

function injectSectionFlaws(doc, flaws, options) {
Expand All @@ -62,7 +174,7 @@ function injectSectionFlaws(doc, flaws, options) {

// The 'broken_links' flaw check looks for internal links that
// link to a document that's going to fail with a 404 Not Found.
function injectBrokenLinksFlaws(level, doc, $, rawContent) {
function injectBrokenLinksFlaws(doc, $, { rawContent }, level) {
// This is needed because the same href can occur multiple time.
// For example:
// <a href="/foo/bar">
Expand Down Expand Up @@ -219,15 +331,6 @@ function injectBrokenLinksFlaws(level, doc, $, rawContent) {
}
}
});
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.broken_links &&
doc.flaws.broken_links.length
) {
throw new Error(
`broken_links flaws: ${doc.flaws.broken_links.map(JSON.stringify)}`
);
}
}

// Bad BCD queries are when the `<div class="bc-data">` tags have an
Expand All @@ -236,9 +339,7 @@ function injectBrokenLinksFlaws(level, doc, $, rawContent) {
//
// <div class="bc-data" id="bcd:never.ever.heard.of">
//
function injectBadBCDQueriesFlaws(level, doc, $) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectBadBCDQueriesFlaws(doc, $) {
$("div.bc-data").each((i, element) => {
const dataQuery = $(element).attr("id");
if (!dataQuery) {
Expand All @@ -265,22 +366,9 @@ function injectBadBCDQueriesFlaws(level, doc, $) {
}
}
});
if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.bad_bcd_queries &&
doc.flaws.bad_bcd_queries.length
) {
throw new Error(
`bad_bcd_queries flaws: ${doc.flaws.bad_bcd_queries.map(
(f) => f.explanation
)}`
);
}
}

function injectPreTagFlaws(level, doc, $, rawContent) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectPreTagFlaws(doc, $, { rawContent }) {
function escapeHTML(s) {
return s
.replace(/&/g, "&amp;")
Expand Down Expand Up @@ -393,26 +481,14 @@ function injectPreTagFlaws(level, doc, $, rawContent) {
if (noFixablePreTagFlawsYet()) {
// another flaw check here
}

if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.bad_pre_tags &&
doc.flaws.bad_pre_tags.length
) {
throw new Error(
`bad_pre_tags flaws: ${doc.flaws.bad_pre_tags.map(JSON.stringify)}`
);
}
}

// You're not allowed to have `<a>` elements inside `<h2>` or `<h3>` elements
// because those will be rendered out as "links to themselves".
// I.e. a source of `<h2 id="foo">Foo</h2>` renders out as:
// `<h2 id="foo"><a href="#foo">Foo</a></h2>` in the final HTML. That makes
// it easy to (perma)link to specific headings in the document.
function injectHeadingLinksFlaws(level, doc, $, rawContent) {
if (level === FLAW_LEVELS.IGNORE) return;

function injectHeadingLinksFlaws(doc, $, { rawContent }) {
function addFlaw($heading) {
if (!("heading_links" in doc.flaws)) {
doc.flaws.heading_links = [];
Expand Down Expand Up @@ -464,16 +540,6 @@ function injectHeadingLinksFlaws(level, doc, $, rawContent) {
$("h2 a, h3 a").each((i, element) => {
addFlaw($(element).parent());
});

if (
level === FLAW_LEVELS.ERROR &&
doc.flaws.heading_links &&
doc.flaws.heading_links.length
) {
throw new Error(
`heading_links flaws: ${doc.flaws.heading_links.map(JSON.stringify)}`
);
}
}

async function fixFixableFlaws(doc, options, document) {
Expand Down
32 changes: 32 additions & 0 deletions client/src/document/toolbar/flaws.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import {
BadPreTagFlaw,
SectioningFlaw,
HeadingLinksFlaw,
UnsafeHTMLFlaw,
} from "../types";
import "./flaws.scss";

Expand Down Expand Up @@ -271,6 +272,10 @@ function Flaws({
flaws={doc.flaws.heading_links}
/>
);
case "unsafe_html":
return (
<UnsafeHTML key="unsafe_html" flaws={doc.flaws.unsafe_html} />
);
case "sectioning":
return <Sectioning key="sectioning" flaws={doc.flaws.sectioning} />;
default:
Expand Down Expand Up @@ -976,3 +981,30 @@ function HeadingLinks({
</div>
);
}

function UnsafeHTML({ flaws }: { flaws: UnsafeHTMLFlaw[] }) {
// The UI for this flaw can be a bit "simplistic" because by default this
// flaw will error rather than warn.
return (
<div className="flaw">
<h3>⚠️ {humanizeFlawName("unsafe_html")} ⚠️</h3>
<ul>
{flaws.map((flaw, i) => {
const key = flaw.id;
return (
<li key={key}>
<b>{flaw.explanation}</b>{" "}
{flaw.line && flaw.column && (
<>
line {flaw.line}:{flaw.column}
</>
)}{" "}
{flaw.fixable && <FixableFlawBadge />} <br />
<b>HTML:</b> <pre className="example-bad">{flaw.html}</pre> <br />
</li>
);
})}
</ul>
</div>
);
}
7 changes: 7 additions & 0 deletions client/src/document/types.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ export interface HeadingLinksFlaw extends GenericFlaw {
column: number | null;
}

export interface UnsafeHTMLFlaw extends GenericFlaw {
html: string;
line: number | null;
column: number | null;
}

export interface MacroErrorMessage extends GenericFlaw {
name: string;
error: {
Expand All @@ -85,6 +91,7 @@ type Flaws = {
sectioning: SectioningFlaw[];
image_widths: ImageWidthFlaw[];
heading_links: HeadingLinksFlaw[];
unsafe_html: UnsafeHTMLFlaw[];
};

export type Translation = {
Expand Down
1 change: 1 addition & 0 deletions client/src/flaw-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ export function humanizeFlawName(name) {
bad_bcd_queries: "Bad BCD queries",
bad_bcd_links: "Bad BCD links",
bad_pre_tags: "Bad <pre> tags",
unsafe_html: "Unsafe HTML",
};
function fallback() {
return name.charAt(0).toUpperCase() + name.slice(1).replace(/_/g, " ");
Expand Down
16 changes: 0 additions & 16 deletions kumascript/src/api/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,6 @@ class HTMLTool {
return this;
}

removeOnEventHandlers() {
// Remove ALL on-event handlers.
this.$("*").each((i, e) => {
// Since "e.attribs" is an object with a "null"
// prototype, "key in e.attribs" is equivalent to
// "key of Object.keys(e.attribs)" since we don't
// have to worry about keys from the prototype.
for (const key in e.attribs) {
if (key.startsWith("on")) {
delete e.attribs[key];
}
}
});
return this;
}

injectSectionIDs() {
let idCount = 0;
const $ = this.$;
Expand Down
1 change: 0 additions & 1 deletion kumascript/src/api/wiki.js
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ module.exports = {
// First, we need to inject section ID's since the section
// extraction often depends on them.
tool.injectSectionIDs();
tool.removeOnEventHandlers();
tool.removeNoIncludes();

if (section) {
Expand Down
Loading