diff --git a/package-lock.json b/package-lock.json index 209d32a5..b01ad423 100644 --- a/package-lock.json +++ b/package-lock.json @@ -229,6 +229,11 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.0.tgz", "integrity": "sha1-ibTRmasr7kneFk6gK4nORi1xt2c=" }, + "bcp-47-match": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/bcp-47-match/-/bcp-47-match-1.0.2.tgz", + "integrity": "sha512-LugfCkkRdq/h8vVcQjhKxgm+c84AKHMvyNkFyy/jxCnTsEqj2lULoEm9ooUWbwWDwL2rok9GoQXMu7iETJO+uw==" + }, "bcrypt-pbkdf": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", @@ -237,6 +242,11 @@ "tweetnacl": "^0.14.3" } }, + "boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" + }, "brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -544,6 +554,11 @@ } } }, + "css-selector-parser": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/css-selector-parser/-/css-selector-parser-1.3.0.tgz", + "integrity": "sha1-XxrUPi2O77/cME/NOaUhZklD4+s=" + }, "cssom": { "version": "0.4.4", "resolved": "https://registry.npmjs.org/cssom/-/cssom-0.4.4.tgz", @@ -620,6 +635,11 @@ "repeat-string": "^1.5.4" } }, + "direction": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/direction/-/direction-1.0.4.tgz", + "integrity": "sha512-GYqKi1aH7PJXxdhTeZBFrg8vUBeKXi+cNprXsC1kpJcbcVnV9wBsrOu1cQEdG0WeQwlfHiy3XvnKfIrJ2R0NzQ==" + }, "doctrine": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", @@ -1087,6 +1107,11 @@ "write": "1.0.3" } }, + "flatmap": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/flatmap/-/flatmap-0.0.3.tgz", + "integrity": "sha1-Hxik2TgVLUlZZfnJWNkjqy3WabQ=" + }, "flatted": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/flatted/-/flatted-2.0.1.tgz", @@ -1348,6 +1373,37 @@ "zwitch": "^1.0.0" } }, + "hast-util-select": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/hast-util-select/-/hast-util-select-3.0.1.tgz", + "integrity": "sha512-at6Tl9T5PwfcGrKar38Kw+GjQueuq1bj4dFV1x9HT8c8aw0WpinjTiPAbvBmsYRhHlOZJxOqCRWszZFUaJ0M/A==", + "requires": { + "bcp-47-match": "^1.0.0", + "comma-separated-tokens": "^1.0.2", + "css-selector-parser": "^1.3.0", + "direction": "^1.0.2", + "hast-util-has-property": "^1.0.0", + "hast-util-is-element": "^1.0.0", + "hast-util-to-string": "^1.0.1", + "hast-util-whitespace": "^1.0.0", + "not": "^0.1.0", + "nth-check": "^1.0.1", + "property-information": "^5.0.0", + "space-separated-tokens": "^1.1.0", + "unist-util-visit": "^1.3.1", + "zwitch": "^1.0.0" + }, + "dependencies": { + "unist-util-visit": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-1.4.1.tgz", + "integrity": "sha512-AvGNk7Bb//EmJZyhtRUnNMEpId/AZ5Ph/KUpTI09WHQuDZHKovQ1oEv3mfmKpWKtoMzyMC4GLBm1Zy5k12fjIw==", + "requires": { + "unist-util-visit-parents": "^2.0.0" + } + } + } + }, "hast-util-to-html": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-6.0.2.tgz", @@ -1436,6 +1492,11 @@ "zwitch": "^1.0.0" } }, + "hast-util-to-string": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/hast-util-to-string/-/hast-util-to-string-1.0.2.tgz", + "integrity": "sha512-fQNr0n5KJmZW1TmBfXbc4DO0ucZmseUw3T6K4PDsUUTMtTGGLZMUYRB8mOKgPgtw7rtICdxxpRQZmWwo8KxlOA==" + }, "hast-util-to-text": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/hast-util-to-text/-/hast-util-to-text-1.0.1.tgz", @@ -2153,6 +2214,19 @@ "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.0.tgz", "integrity": "sha1-5jNFY4bUqlWGP2dqerDaqP3ssP0=" }, + "not": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/not/-/not-0.1.0.tgz", + "integrity": "sha1-yWkcF0bFXc++VMvYvU/wQbwrUZ0=" + }, + "nth-check": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.2.tgz", + "integrity": "sha512-WeBOdju8SnzPN5vTUJYxYUxLeXpCaVP5i5e0LF8fg7WORF2Wd7wFX/pk0tYZk7s8T+J7VLy0Da6J1+wCT0AtHg==", + "requires": { + "boolbase": "~1.0.0" + } + }, "number-is-nan": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/number-is-nan/-/number-is-nan-1.0.1.tgz", @@ -3259,6 +3333,22 @@ "object-assign": "^4.1.0" } }, + "unist-util-filter": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/unist-util-filter/-/unist-util-filter-2.0.2.tgz", + "integrity": "sha512-yHcR4YpvFzKx8EN9i2HhTORIR3zYC6YwsW6b2nuk8EI/3gps2YC008S+KJRyNuPViqScYQdrI67ceOryNL9EUQ==", + "requires": { + "flatmap": "0.0.3", + "unist-util-is": "^4.0.0" + }, + "dependencies": { + "unist-util-is": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-4.0.2.tgz", + "integrity": "sha512-Ofx8uf6haexJwI1gxWMGg6I/dLnF2yE+KibhD3/diOqY2TinLcqHXCV6OI5gFVn3xQqDH+u0M625pfKwIwgBKQ==" + } + } + }, "unist-util-find-after": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/unist-util-find-after/-/unist-util-find-after-2.0.4.tgz", diff --git a/package.json b/package.json index e460fc9a..a25f7d68 100644 --- a/package.json +++ b/package.json @@ -31,6 +31,8 @@ "gray-matter": "4.0.2", "hast-util-find-and-replace": "^2.0.0", "hast-util-has-property": "^1.0.3", + "hast-util-select": "^3.0.1", + "hast-util-to-string": "^1.0.2", "hast-util-to-text": "^1.0.1", "js-yaml": "3.13.1", "jsdom": "16.2.0", @@ -50,6 +52,7 @@ "to-vfile": "^6.0.0", "unified": "^8.4.2", "unified-lint-rule": "^1.0.4", + "unist-util-filter": "^2.0.2", "unist-util-visit": "^2.0.2", "vfile": "^4.0.2", "vfile-reporter": "^6.0.0", diff --git a/scripts/scraper-ng/index.js b/scripts/scraper-ng/index.js index 32ab2857..92f82c42 100644 --- a/scripts/scraper-ng/index.js +++ b/scripts/scraper-ng/index.js @@ -7,6 +7,7 @@ const limiter = require("./rate-limiter"); const mdnUrl = require("./mdn-url"); const summaryReporter = require("./vfile-reporter-summary"); const toVFile = require("./url-to-vfile"); +const VMessage = require("vfile-message"); const examplePage = "https://developer.mozilla.org/en-US/docs/Web/HTML/Element/div"; @@ -65,7 +66,15 @@ async function run() { const file = await toVFile(url); const hasFileErrors = file.messages.length > 0; if (!hasFileErrors) { - await processor.process(file); + try { + await processor.process(file); + } catch (err) { + // If a VMessage gets thrown, then this should not interrupt subsequent + // files, but other errors should break everything + if (!(err instanceof VMessage)) { + throw err; + } + } } return file; }); diff --git a/scripts/scraper-ng/preset.js b/scripts/scraper-ng/preset.js index 2672ace9..bca4dd61 100644 --- a/scripts/scraper-ng/preset.js +++ b/scripts/scraper-ng/preset.js @@ -1,5 +1,4 @@ -const allowedMacros = ["Compat"]; -const requiredMacros = ["Compat"]; +const allowedMacros = ["Compat", "EmbedInteractiveExample", "SpecName"]; module.exports = { settings: {}, @@ -11,7 +10,7 @@ module.exports = { // For rules that don't need settings of any kind: // [require('./rules/rule-name')] [require("./rules/file-require-recipe")], - [require("./rules/html-require-macros"), ["error", requiredMacros]], - [require("./rules/html-warn-macros"), allowedMacros] + [require("./rules/html-warn-macros"), allowedMacros], + [require("./rules/html-require-recipe-ingredients")] ] }; diff --git a/scripts/scraper-ng/rules/file-require-recipe.js b/scripts/scraper-ng/rules/file-require-recipe.js index 28afbb55..a116c266 100644 --- a/scripts/scraper-ng/rules/file-require-recipe.js +++ b/scripts/scraper-ng/rules/file-require-recipe.js @@ -45,7 +45,7 @@ function msg(file, text, ruleId, note) { message.fatal = true; message.ruleId = ruleId; message.note = note; - return message; + throw message; } module.exports = requireRecipe; diff --git a/scripts/scraper-ng/rules/html-require-macros.js b/scripts/scraper-ng/rules/html-require-macros.js deleted file mode 100644 index a123952f..00000000 --- a/scripts/scraper-ng/rules/html-require-macros.js +++ /dev/null @@ -1,32 +0,0 @@ -const rule = require("unified-lint-rule"); -const visit = require("unist-util-visit"); - -const normalizeMacroName = require("../normalize-macro-name"); - -/** - * Require one or more named macros. - */ -function requireMacros(tree, file, required = []) { - visit( - tree, - node => - node.type === "text" && - node.data && - required.map(normalizeMacroName).includes(node.data.macroName), - node => { - required = required.filter( - macro => node.data.macroName !== normalizeMacroName(macro) - ); - - if (!required.length) { - return visit.EXIT; - } - } - ); - - for (const macro of required) { - file.message(`${macro} macro call required but not found`); - } -} - -module.exports = rule("html-require-macros", requireMacros); diff --git a/scripts/scraper-ng/rules/html-require-recipe-ingredients/index.js b/scripts/scraper-ng/rules/html-require-recipe-ingredients/index.js new file mode 100644 index 00000000..ddb53785 --- /dev/null +++ b/scripts/scraper-ng/rules/html-require-recipe-ingredients/index.js @@ -0,0 +1,55 @@ +const fs = require("fs"); +const path = require("path"); + +const yaml = require("js-yaml"); + +const ingredientHandlers = require("./ingredient-handlers"); + +/** + * A unified plugin that issues an error on pages that are missing ingredients. + */ +function requireRecipeIngredientsPlugin() { + return function warnOnMissingRecipeIngredients(tree, file) { + const recipeName = path.basename(file.data.recipePath, ".yaml"); + const recipe = loadRecipe(file.data.recipePath); + + const requiredBody = recipe.body.filter( + ingredientName => + !(ingredientName.endsWith("?") || ingredientName.endsWith(".*")) + ); + + for (const ingredient of requiredBody) { + const info = { + recipeName, + ingredient + }; + if (ingredient in ingredientHandlers) { + ingredientHandlers[ingredient](tree, file, info); + } else { + ingredientHandlers.default(tree, file, info); + } + } + }; +} + +const recipesCache = {}; + +/** + * Load a recipe object from a path. + * + * @param {String} path - the path to a recipe YAML file + * @returns {Object} - the loaded recipe object + */ +function loadRecipe(path) { + if (path === undefined) { + return undefined; + } + + if (recipesCache[path] === undefined) { + recipesCache[path] = yaml.safeLoad(fs.readFileSync(path)); + } + + return recipesCache[path]; +} + +module.exports = requireRecipeIngredientsPlugin; diff --git a/scripts/scraper-ng/rules/html-require-recipe-ingredients/ingredient-handlers.js b/scripts/scraper-ng/rules/html-require-recipe-ingredients/ingredient-handlers.js new file mode 100644 index 00000000..6f144403 --- /dev/null +++ b/scripts/scraper-ng/rules/html-require-recipe-ingredients/ingredient-handlers.js @@ -0,0 +1,282 @@ +const { select } = require("hast-util-select"); +const filter = require("unist-util-filter"); +const toString = require("hast-util-to-string"); +const visit = require("unist-util-visit"); + +const normalizeMacroName = require("../../normalize-macro-name"); + +const ruleNamespace = "html-require-ingredient"; + +/** + * Functions to check for recipe ingredients in Kuma page sources. + * + * The key is the name of a recipe ingredient (e.g., + * `data.browser_compatbiility` or `prose.syntax`) and the value is a function + * to process a tree and file for that. + * + * Handler functions must take three arguments: a hast tree, a VFile, and a + * context object. The context object has two entries: + * + * - `ingredient` - the name of the ingredient + * - `recipeName` - the name of the recipe + * + * Handler functions may log messages against the file. + * + */ +const ingredientHandlers = { + default: (tree, file, context) => { + const { recipeName, ingredient } = context; + const rule = `${recipeName}/${ingredient}`; + const origin = `${ruleNamespace}:${rule}`; + + file.message(`Linting ${ingredient} ingredient is unimplemented`, origin); + }, + "data.browser_compatibility": (tree, file, context) => { + const id = "Browser_compatibility"; + const body = select(`body`, tree); + const heading = select(`h2#${id}`, body); + + if (heading === null) { + const message = file.message( + `Expected h2#${id} for ${context.recipeName}: ${context.ingredient}`, + body, + `${ruleNamespace}:${context.recipeName}/${context.ingredient}/expected-heading` + ); + message.fatal = true; + logMissingIngredient(file, context); + return; + } + + let macroCount = 0; + visit( + sliceSection(heading, body), + node => isMacro(node, "Compat"), + () => { + macroCount += 1; + } + ); + + if (macroCount !== 1) { + logMissingIngredient(file, context); + } + }, + "data.examples": requireTopLevelHeading("Examples"), + "data.specifications": (tree, file, context) => { + const id = "Specifications"; + const body = select(`body`, tree); + + const heading = select(`h2#${id}`, body); + if (heading === null) { + const message = file.message( + `Expected h2#${id} ${context.recipeName}: ${context.ingredient}`, + body, + `${ruleNamespace}:${context.recipeName}/${context.ingredient}/expected-heading` + ); + message.fatal = true; + logMissingIngredient(file, context); + return; + } + + let sectionOk = false; + visit(sliceSection(heading, body), "text", node => { + if (isMacro(node, "SpecName")) { + sectionOk = true; + return visit.SKIP; + } + + if (node.value.includes("Not part of any standard")) { + sectionOk = true; + return visit.SKIP; + } + }); + + if (!sectionOk) { + const message = file.message( + `Expected SpecName macro for ${context.recipeName}: ${context.ingredient}`, + heading, + `${ruleNamespace}:${context.recipeName}/${context.ingredient}/expected-macro` + ); + message.fatal = true; + logMissingIngredient(file, context); + } + }, + "prose.description": requireTopLevelHeading("Description"), + "prose.error_type": requireTopLevelHeading("Error_type"), + "prose.message": requireTopLevelHeading("Message"), + "prose.see_also": requireTopLevelHeading("See_also"), + "prose.short_description": (tree, file, context) => { + // Short descriptions are complicated! + // + // A short description is understood to be either an seoSummary or + // the first

that precedes: + // + // - an interactive example macro + // - an

+ // - the end of the document + // + // whichever comes first, but excluding

's that are admonitions (warnings + // or notes). + + const body = select("body", tree); + + if (select("span.seoSummary", tree) !== null) { + return; + } + + // Slice the tree to the nodes between the first element in and + // a terminating node (interactive example or h2) or the end of the + // document + const introSection = sliceBetween( + select(":first-child", body), + node => { + if (node.tagName === "h2") { + return true; + } + + let containsInteractiveExample = false; + visit( + node, + node => isMacro(node, "EmbedInteractiveExample"), + () => { + containsInteractiveExample = true; + return visit.EXIT; + } + ); + return containsInteractiveExample; + }, + body + ); + + // Remove admonition paragraphs + const isAdmonition = node => + node.tagName === "p" && + node.properties.className && + (node.properties.className.includes("warning") || + node.properties.className.includes("note")); + const filtered = filter(introSection, node => !isAdmonition(node)); + + // Get the first paragraph left over + const shortDescriptionP = select("p", filtered); + + if (shortDescriptionP === null) { + logMissingIngredient(file, context); + return; + } + + // Check if the paragraph actually contains text + const shortDescriptionText = toString(shortDescriptionP).trim(); + + // See if there's any text remaining + if (!shortDescriptionText.length) { + logMissingIngredient(file, context); + } + }, + "prose.syntax": requireTopLevelHeading("Syntax"), + "prose.what_went_wrong": requireTopLevelHeading("What_went_wrong") +}; + +/** + * A convenience function that returns ingredient handlers for checking the existence of a certain H2 in a hast tree. + * + * @param {String} ingredient - an ingredient name + * @param {String} id - an id of an H2 to look for in the hast tree + * @returns {Function} a function + */ +function requireTopLevelHeading(id) { + return (tree, file, context) => { + const heading = select(`h2#${id}`, tree); + if (heading === null) { + logMissingIngredient(file, context); + } + }; +} + +/** + * Get a subset of `tree` starting with `startNode` and ending just before the + * next H2 element (or the end of the document, if it doesn't exist). + * + * @param {String} startNode - the starting node (e.g., some section heading) + * @param {Object} tree - a hast tree + * @returns {Object} a hast tree + */ +function sliceSection(startNode, tree) { + return sliceBetween(startNode, node => node.tagName === "h2", tree); +} + +/** + * Get a subset of `tree` starting with `startNode` and ending just before the + * first node that passes `endCondition`. + * + * @param {Object} startNode - the starting node (e.g., some section heading) + * @param {Function} endCondition - a function that takes a node as an argument + * and returns a boolean (e.g., to stop at a specific node, use `(node) => node + * === someNode`) + * @param {Object} tree - a hast tree + * @returns {Object} a hast tree + */ +function sliceBetween(startNode, endCondition, tree) { + const newRoot = { type: "root", children: [] }; + + let inBounds = false; + visit(tree, node => { + if (node === startNode) { + inBounds = true; + newRoot.children.push(node); + return visit.SKIP; + } + + if (inBounds) { + if (endCondition(node)) { + return visit.EXIT; + } + + newRoot.children.push(node); + return visit.SKIP; + } + }); + + return newRoot; +} + +/** + * Test if `node` is a macro call and, optionally, whether it calls a specific macro name. + * + * For use with `unist-util-visit` and similar. + * + * @param {Object} node - the node to test + * @param {string} [macroName] - the name of the macro + * @returns {Boolean} `true` or `false` + */ +function isMacro(node, macroName) { + const isMacroType = + node.type === "text" && + node.data !== undefined && + node.data.macroName !== undefined; + + return ( + isMacroType && + (macroName === undefined || + node.data.macroName === normalizeMacroName(macroName)) + ); +} + +/** + * Log a message when a file is missing an ingredient. + * + * @param {VFile} file - a VFile + * @param {Object} context - a context object with recipe name and ingredient + * strings + */ +function logMissingIngredient(file, context) { + const { recipeName, ingredient } = context; + const rule = `${recipeName}/${ingredient}`; + const origin = `${ruleNamespace}:${rule}`; + + const message = file.message( + `Missing from ${recipeName}: ${ingredient}`, + origin + ); + message.fatal = true; +} + +module.exports = ingredientHandlers;