From 8422ac5072617f7bf139afadf5f98b4d66184ff3 Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 11:51:12 -0700 Subject: [PATCH 01/41] Add one more short circuit AND now that we split by h4 tags as well --- build/index.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build/index.js b/build/index.js index ec2efa8e7b55..313a0b276d73 100644 --- a/build/index.js +++ b/build/index.js @@ -250,7 +250,8 @@ function makeTOC(doc) { section.type === "specifications") && section.value.id && section.value.title && - !section.value.isH3 + !section.value.isH3 && + !section.value.isH4 ) { return { text: section.value.title, id: section.value.id }; } From 45777eea9b7660c751c6b55fa258e0bfc2833a1f Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 12:04:18 -0700 Subject: [PATCH 02/41] Add logic to document-extractor.js to split section by h2, h3 OR h4 --- build/document-extractor.js | 60 ++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/build/document-extractor.js b/build/document-extractor.js index ddebaeff6fdd..6ab5d3376652 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -49,7 +49,11 @@ function extractSections($) { let c = 0; iterable.forEach((child) => { - if (child.tagName === "h2" || child.tagName === "h3") { + if ( + child.tagName === "h2" || + child.tagName === "h3" || + child.tagName === "h4" + ) { if (c) { const [subSections, subFlaws] = addSections(section.clone()); sections.push(...subSections); @@ -226,11 +230,11 @@ function addSections($) { const specialSections = _addSingleSpecialSection($); // The _addSingleSpecialSection() function will have sucked up the

or

- // and the `div.bc-data` or `div.bc-specs` to turn it into a special section. + // or

and the `div.bc-data` or `div.bc-specs` to turn it into a special section. // First remove that, then put whatever HTML is left as a prose // section underneath. - $.find("div.bc-data, h2, h3").remove(); - $.find("div.bc-specs, h2, h3").remove(); + $.find("div.bc-data, h2, h3, h4").remove(); + $.find("div.bc-specs, h2, h3, h4").remove(); const [proseSections, proseFlaws] = _addSectionProse($); specialSections.push(...proseSections); flaws.push(...proseFlaws); @@ -251,18 +255,23 @@ function _addSingleSpecialSection($) { let id = null; let title = null; let isH3 = false; + let isH4 = false; const h2s = $.find("h2"); + const h3s = $.find("h3"); if (h2s.length === 1) { id = h2s.attr("id"); title = h2s.text(); + } else if (h3s.length === 1) { + id = h3s.attr("id"); + title = h3s.text(); + isH3 = true; } else { - const h3s = $.find("h3"); - if (h3s.length === 1) { - id = h3s.attr("id"); - title = h3s.text(); - isH3 = true; - } + // Look for

s + const h4s = $.find("h4"); + id = h4s.attr("id"); + title = h4s.text(); + isH4 = true; } let dataQuery = null; @@ -296,6 +305,7 @@ function _addSingleSpecialSection($) { title, id, isH3, + isH4, data: null, query, browsers: null, @@ -313,6 +323,7 @@ function _addSingleSpecialSection($) { title, id, isH3, + isH4, query, specifications: [], }, @@ -390,6 +401,7 @@ function _addSingleSpecialSection($) { title, id, isH3, + isH4, data, query, browsers, @@ -444,6 +456,7 @@ function _addSingleSpecialSection($) { title, id, isH3, + isH4, specifications, query, }, @@ -457,6 +470,7 @@ function _addSectionProse($) { let title = null; let titleAsText = null; let isH3 = false; + let isH4 = false; const flaws = []; @@ -484,6 +498,7 @@ function _addSectionProse($) { } // If there was no

, look through all the

s. + let h3found = false; if (!h2found) { const h3s = $.find("h3"); for (const i of [...Array(h3s.length).keys()]) { @@ -503,12 +518,37 @@ function _addSectionProse($) { h3s.eq(i).remove(); } } + h3found = true; + } + } + + // if there was no

, look through all the

s. + if (!h3found) { + const h4s = $.find("h4"); + for (const i of [...Array(h4s.length).keys()]) { + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h4s + .eq(i) + .attr("id")}', text='${h4s.eq(i).text()}')` + ); + } else { + id = h4s.eq(i).attr("id"); + title = h4s.eq(i).html(); + titleAsText = h4s.eq(i).text(); + if (id && title) { + isH4 = true; + h4s.eq(i).remove(); + } + } } } const value = { id, title, isH3, + isH4, content: $.html().trim(), }; From 7dfc273ace039ec00ab7242b66267d2ed5da035c Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 12:05:00 -0700 Subject: [PATCH 03/41] Introduce isH4 and DisplayH4 to spec-section --- client/src/document/ingredients/spec-section.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/client/src/document/ingredients/spec-section.tsx b/client/src/document/ingredients/spec-section.tsx index 719bf4517815..6ec14cad8d60 100644 --- a/client/src/document/ingredients/spec-section.tsx +++ b/client/src/document/ingredients/spec-section.tsx @@ -1,15 +1,17 @@ -import { DisplayH2, DisplayH3 } from "./utils"; +import { DisplayH2, DisplayH3, DisplayH4 } from "./utils"; export function SpecificationSection({ id, title, isH3, + isH4, specifications, query, }: { id: string; title: string; isH3: boolean; + isH4: boolean; specifications: Array<{ title: string; bcdSpecificationURL: string; @@ -21,6 +23,7 @@ export function SpecificationSection({ <> {title && !isH3 && } {title && isH3 && } + {title && isH4 && !isH3 && } {specifications.length > 0 ? ( From 3f97e7bdef0747465dd175a9dd1ff0086f1351b0 Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 12:08:54 -0700 Subject: [PATCH 04/41] Introduce isH4 and DisplayH4 to lazy-bcd-table --- client/src/document/lazy-bcd-table.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/client/src/document/lazy-bcd-table.tsx b/client/src/document/lazy-bcd-table.tsx index 70be2d37d3ea..a50ea455288d 100644 --- a/client/src/document/lazy-bcd-table.tsx +++ b/client/src/document/lazy-bcd-table.tsx @@ -1,7 +1,7 @@ import React, { lazy, Suspense, useEffect, useState } from "react"; import useSWR from "swr"; -import { DisplayH2, DisplayH3 } from "./ingredients/utils"; +import { DisplayH2, DisplayH3, DisplayH4 } from "./ingredients/utils"; import { Loading } from "../ui/atoms/loading"; // Because it's bad for web performance to lazy-load CSS during the initial render // (because the page is saying "Wait! Stop rendering, now that I've downloaded @@ -23,12 +23,14 @@ export function LazyBrowserCompatibilityTable({ id, title, isH3, + isH4, query, dataURL, }: { id: string; title: string; isH3: boolean; + isH4: boolean; query: string; dataURL: string | null; }) { @@ -36,6 +38,7 @@ export function LazyBrowserCompatibilityTable({ <> {title && !isH3 && } {title && isH3 && } + {title && isH4 && !isH3 && } {dataURL ? ( ) : ( From 6dbbb3d375930e0dd6f40d0f979b705f14b0f527 Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 12:32:29 -0700 Subject: [PATCH 05/41] Add logic to conditionally render DisplayH4 component --- client/src/document/ingredients/prose.tsx | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/client/src/document/ingredients/prose.tsx b/client/src/document/ingredients/prose.tsx index be12c064e6c5..2764be401596 100644 --- a/client/src/document/ingredients/prose.tsx +++ b/client/src/document/ingredients/prose.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3 } from "./utils"; +import { DisplayH2, DisplayH3, DisplayH4 } from "./utils"; export function Prose({ section }) { return
; @@ -7,13 +7,21 @@ export function Prose({ section }) { export function ProseWithHeading({ id, section }) { return ( <> - {section.isH3 ? ( + {section.isH4 && ( + + )} + {section.isH3 && !section.isH4 && ( - ) : ( + )} + {!section.isH4 && !section.isH3 && ( Date: Mon, 2 Aug 2021 12:33:12 -0700 Subject: [PATCH 06/41] Create DisplayH4 functional component in utils.tsx --- client/src/document/ingredients/utils.tsx | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/client/src/document/ingredients/utils.tsx b/client/src/document/ingredients/utils.tsx index 0987c11e3915..0d3f4af610d0 100644 --- a/client/src/document/ingredients/utils.tsx +++ b/client/src/document/ingredients/utils.tsx @@ -30,6 +30,22 @@ export function DisplayH3({ ); } +export function DisplayH4({ + id, + title, + titleAsText, +}: { + id: string; + title: string; + titleAsText?: string; +}) { + return ( +

+ +

+ ); +} + function Permalink({ id, title, From bb79fd943af61d59329cc526b0d0fa47fc4dd7a5 Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 12:34:37 -0700 Subject: [PATCH 07/41] Add h4 selector to make sure they receive the same styles as other direct heading links --- client/src/document/index.scss | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/client/src/document/index.scss b/client/src/document/index.scss index 4336e56f4f90..376ff4eb97e3 100644 --- a/client/src/document/index.scss +++ b/client/src/document/index.scss @@ -204,7 +204,8 @@ a.page-not-created { } h2, - h3 { + h3, + h4 { margin-bottom: ($base-unit * 2); a:link, From 530e09fa33cf573530162375b79d84c3b57e425e Mon Sep 17 00:00:00 2001 From: Tanner Date: Mon, 2 Aug 2021 22:45:05 -0700 Subject: [PATCH 08/41] Refactor child.tagName conditional check --- build/document-extractor.js | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/build/document-extractor.js b/build/document-extractor.js index 6ab5d3376652..7d9e5dceddff 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -48,12 +48,9 @@ function extractSections($) { const iterable = [...$("#_body")[0].childNodes]; let c = 0; + let tagNames = ["h2", "h3", "h4"]; iterable.forEach((child) => { - if ( - child.tagName === "h2" || - child.tagName === "h3" || - child.tagName === "h4" - ) { + if (tagNames.includes(child.tagName)) { if (c) { const [subSections, subFlaws] = addSections(section.clone()); sections.push(...subSections); From e192ecd58e9297d1da3b28ccfd90fbcf562b9262 Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 3 Aug 2021 10:34:24 -0700 Subject: [PATCH 09/41] Use a set for the tagNames instead --- build/document-extractor.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/build/document-extractor.js b/build/document-extractor.js index 7d9e5dceddff..80d6feb819a0 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -48,9 +48,9 @@ function extractSections($) { const iterable = [...$("#_body")[0].childNodes]; let c = 0; - let tagNames = ["h2", "h3", "h4"]; + const tagNames = new Set(["h2", "h3", "h4"]); iterable.forEach((child) => { - if (tagNames.includes(child.tagName)) { + if (tagNames.has(child.tagName)) { if (c) { const [subSections, subFlaws] = addSections(section.clone()); sections.push(...subSections); From f407799a799e071ede2b80caf0f0832ea288080c Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 3 Aug 2021 17:00:17 -0700 Subject: [PATCH 10/41] Condense each DisplayH* function into a single DisplayHeading component --- client/src/document/ingredients/utils.tsx | 43 ++++------------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/client/src/document/ingredients/utils.tsx b/client/src/document/ingredients/utils.tsx index 0d3f4af610d0..0fe4e1aaef1b 100644 --- a/client/src/document/ingredients/utils.tsx +++ b/client/src/document/ingredients/utils.tsx @@ -1,48 +1,19 @@ -export function DisplayH2({ +export function DisplayHeading({ + level, id, title, titleAsText, }: { + level: number; id: string; title: string; titleAsText?: string; }) { + const Tag = `h${level}` as keyof JSX.IntrinsicElements; return ( -

- -

- ); -} - -export function DisplayH3({ - id, - title, - titleAsText, -}: { - id: string; - title: string; - titleAsText?: string; -}) { - return ( -

- -

- ); -} - -export function DisplayH4({ - id, - title, - titleAsText, -}: { - id: string; - title: string; - titleAsText?: string; -}) { - return ( -

- -

+ + + ); } From 2135fc3f93c4603d2b4d751e5ad4aac4e5ad34c0 Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 3 Aug 2021 17:01:06 -0700 Subject: [PATCH 11/41] Render a single DisplayHeading component based on its level --- client/src/document/ingredients/prose.tsx | 29 ++++++----------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/client/src/document/ingredients/prose.tsx b/client/src/document/ingredients/prose.tsx index 2764be401596..23004dee0392 100644 --- a/client/src/document/ingredients/prose.tsx +++ b/client/src/document/ingredients/prose.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3, DisplayH4 } from "./utils"; +import { DisplayHeading } from "./utils"; export function Prose({ section }) { return
; @@ -7,27 +7,12 @@ export function Prose({ section }) { export function ProseWithHeading({ id, section }) { return ( <> - {section.isH4 && ( - - )} - {section.isH3 && !section.isH4 && ( - - )} - {!section.isH4 && !section.isH3 && ( - - )} + ); From 745b975b1dde42d2b4c0ce37c8b4075367638d70 Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 3 Aug 2021 17:02:37 -0700 Subject: [PATCH 12/41] Replace DisplayH2,H3,H4 component calls with a single DisplayHeading --- client/src/document/ingredients/spec-section.tsx | 6 ++---- client/src/document/lazy-bcd-table.tsx | 7 +++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/client/src/document/ingredients/spec-section.tsx b/client/src/document/ingredients/spec-section.tsx index 6ec14cad8d60..addb1d308b12 100644 --- a/client/src/document/ingredients/spec-section.tsx +++ b/client/src/document/ingredients/spec-section.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3, DisplayH4 } from "./utils"; +import { DisplayHeading } from "./utils"; export function SpecificationSection({ id, @@ -21,9 +21,7 @@ export function SpecificationSection({ }) { return ( <> - {title && !isH3 && } - {title && isH3 && } - {title && isH4 && !isH3 && } + {} {specifications.length > 0 ? (
diff --git a/client/src/document/lazy-bcd-table.tsx b/client/src/document/lazy-bcd-table.tsx index a50ea455288d..0f0c319e3144 100644 --- a/client/src/document/lazy-bcd-table.tsx +++ b/client/src/document/lazy-bcd-table.tsx @@ -1,7 +1,7 @@ import React, { lazy, Suspense, useEffect, useState } from "react"; import useSWR from "swr"; -import { DisplayH2, DisplayH3, DisplayH4 } from "./ingredients/utils"; +import { DisplayHeading } from "./ingredients/utils"; import { Loading } from "../ui/atoms/loading"; // Because it's bad for web performance to lazy-load CSS during the initial render // (because the page is saying "Wait! Stop rendering, now that I've downloaded @@ -36,9 +36,8 @@ export function LazyBrowserCompatibilityTable({ }) { return ( <> - {title && !isH3 && } - {title && isH3 && } - {title && isH4 && !isH3 && } + {} + {dataURL ? ( ) : ( From c1f4cdf1e7b7db3b9beb47b8217b2d8697659a3e Mon Sep 17 00:00:00 2001 From: Tanner Date: Wed, 4 Aug 2021 11:11:13 -0700 Subject: [PATCH 13/41] Comment out flaws.push for excess

tags not at root-level --- build/document-extractor.js | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/build/document-extractor.js b/build/document-extractor.js index 80d6feb819a0..6ca27289d770 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -525,11 +525,16 @@ function _addSectionProse($) { for (const i of [...Array(h4s.length).keys()]) { if (i) { // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h4s - .eq(i) - .attr("id")}', text='${h4s.eq(i).text()}')` - ); + // In light of the transition to Markdown + // and converting any

elements within + //
containers to , + // we don't need to push a flaw at the moment for h4 + // tags that aren't at root-level + // flaws.push( + // `Excess

tag that is NOT at root-level (id='${h4s + // .eq(i) + // .attr("id")}', text='${h4s.eq(i).text()}')` + // ); } else { id = h4s.eq(i).attr("id"); title = h4s.eq(i).html(); From c8c0168718145defcd6663d1398de69e7881b264 Mon Sep 17 00:00:00 2001 From: Tanner Date: Thu, 19 Aug 2021 22:59:16 -0700 Subject: [PATCH 14/41] Create template for test fixture --- .../en-us/web/split_section_by_heading/index.html | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 testing/content/files/en-us/web/split_section_by_heading/index.html diff --git a/testing/content/files/en-us/web/split_section_by_heading/index.html b/testing/content/files/en-us/web/split_section_by_heading/index.html new file mode 100644 index 000000000000..cee9f9882124 --- /dev/null +++ b/testing/content/files/en-us/web/split_section_by_heading/index.html @@ -0,0 +1,14 @@ +--- +title: Split section by heading +slug: Web/Split_section_by_heading +--- + +

This page contains a few h2, h3 and h4 tags at root-level that will be direct linkified.

+ + +

Some heading

+

Foo buzz

+

Foo bar

+

Another heading

+

stuff

+

Final heading

From f0d5dd1f436cc6d5d726c86d77168879bccdd414 Mon Sep 17 00:00:00 2001 From: Tanner Date: Thu, 19 Aug 2021 23:01:18 -0700 Subject: [PATCH 15/41] Add jest tests --- testing/tests/index.test.js | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/testing/tests/index.test.js b/testing/tests/index.test.js index 3335a865290f..9a5e8fb10462 100644 --- a/testing/tests/index.test.js +++ b/testing/tests/index.test.js @@ -1730,6 +1730,41 @@ test("notecards are correctly transformed by the formatNotecards utility", () => ); }); +test("sections should be split by h2, h3 or h4", () => { + const builtFolder = path.join( + buildRoot, + "en-us", + "docs", + "web", + "split_section_by_heading" + ); + + const jsonFile = path.join(builtFolder, "index.json"); + const { doc } = JSON.parse(fs.readFileSync(jsonFile)); + expect(doc.flaws.length).toBeFalsy(); + expect(doc.title).toBe("Split section by heading"); + + const htmlFile = path.join(builtFolder, "index.html"); + const html = fs.readFileSync(htmlFile, "utf-8"); + const $ = cheerio.load(html); + + expect($("h2#some_heading").text()).toBe("Some heading"); + expect($("h2#some_heading").attr("id")).toBe("some_heading"); + expect($("h2#some_heading").html()).toBe( + 'Some heading' + ); + expect($("h3#another_heading").text()).toBe("Another heading"); + expect($("h3#another_heading").attr("id")).toBe("another_heading"); + expect($("h3#another_heading").html()).toBe( + 'Another heading' + ); + expect($("h4#final_heading").text()).toBe("Final heading"); + expect($("h4#final_heading").attr("id")).toBe("final_heading"); + expect($("h4#final_heading").html()).toBe( + 'Final heading' + ); +}); + test("homepage links and flaws", () => { const builtFolder = path.join( buildRoot, From 7306e90c691c4e6f9bc02326bad0ce4c560d2682 Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 21 Jun 2022 19:33:26 -0700 Subject: [PATCH 16/41] Remove unused 'DisplayHeading' reference for now --- client/src/document/ingredients/prose.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/document/ingredients/prose.tsx b/client/src/document/ingredients/prose.tsx index 1a64efc06b9d..ae2fe60278d2 100644 --- a/client/src/document/ingredients/prose.tsx +++ b/client/src/document/ingredients/prose.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3, DisplayHeading } from "./utils"; +import { DisplayH2, DisplayH3 } from "./utils"; export function Prose({ section }: { section: any }) { const { id } = section; From e29a4c94b20b03b3401a0f506497d07d5207447a Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 21 Jun 2022 19:44:32 -0700 Subject: [PATCH 17/41] Replace Set.has() with RegExp.test() for performance --- build/document-extractor.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/build/document-extractor.js b/build/document-extractor.js index 0e1120dc0cc1..140098044ea7 100644 --- a/build/document-extractor.js +++ b/build/document-extractor.js @@ -41,11 +41,10 @@ function extractSections($) { .eq(0); const iterable = [...$("#_body")[0].childNodes]; - let c = 0; - const tagNames = new Set(["h2", "h3", "h4"]); + iterable.forEach((child) => { - if (tagNames.has(child.tagName)) { + if (/^h[2-4]$/.test(child.tagName)) { if (c) { const [subSections, subFlaws] = addSections(section.clone()); sections.push(...subSections); From 044a454856bc6aa52b0cb3329cde22c80d26ca8a Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 21 Jun 2022 19:50:19 -0700 Subject: [PATCH 18/41] Remove unused DisplayH2/H3 references --- client/src/document/ingredients/spec-section.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/document/ingredients/spec-section.tsx b/client/src/document/ingredients/spec-section.tsx index e21854120b72..e78feeeece4e 100644 --- a/client/src/document/ingredients/spec-section.tsx +++ b/client/src/document/ingredients/spec-section.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3, DisplayHeading } from "./utils"; +import { DisplayHeading } from "./utils"; import NoteCard from "../../ui/molecules/notecards"; export function SpecificationSection({ From 986af039a48a648ec9ec6802d099ef82652d384f Mon Sep 17 00:00:00 2001 From: Tanner Date: Tue, 21 Jun 2022 19:52:00 -0700 Subject: [PATCH 19/41] Use the DisplayHeading component --- client/src/document/ingredients/prose.tsx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/client/src/document/ingredients/prose.tsx b/client/src/document/ingredients/prose.tsx index ae2fe60278d2..f5ba25dfaff9 100644 --- a/client/src/document/ingredients/prose.tsx +++ b/client/src/document/ingredients/prose.tsx @@ -1,4 +1,4 @@ -import { DisplayH2, DisplayH3 } from "./utils"; +import { DisplayHeading } from "./utils"; export function Prose({ section }: { section: any }) { const { id } = section; @@ -14,11 +14,10 @@ export function Prose({ section }: { section: any }) { return ; } - const DisplayHx = section.isH3 ? DisplayH3 : DisplayH2; - return (
- Date: Tue, 21 Jun 2022 19:56:01 -0700 Subject: [PATCH 20/41] Access isH4 and isH3 from the section --- client/src/document/ingredients/prose.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client/src/document/ingredients/prose.tsx b/client/src/document/ingredients/prose.tsx index f5ba25dfaff9..8a147a7095b5 100644 --- a/client/src/document/ingredients/prose.tsx +++ b/client/src/document/ingredients/prose.tsx @@ -17,7 +17,7 @@ export function Prose({ section }: { section: any }) { return (
Date: Sun, 4 Sep 2022 13:51:48 -0700 Subject: [PATCH 21/41] Refactor section splitting to reduce duplication --- testing/tests/index.test.ts | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index cd2ad048d2c6..ff181be54605 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1785,19 +1785,24 @@ test("sections should be split by h2, h3 or h4", () => { const html = fs.readFileSync(htmlFile, "utf-8"); const $ = cheerio.load(html); - expect($("h2#some_heading").text()).toBe("Some heading"); - expect($("h2#some_heading").attr("id")).toBe("some_heading"); - expect($("h2#some_heading").html()).toBe( + const h2 = $("h2#some_heading"); + expect(h2.text()).toBe("Some heading"); + expect(h2.attr("id")).toBe("some_heading"); + expect(h2.html()).toBe( 'Some heading' ); - expect($("h3#another_heading").text()).toBe("Another heading"); - expect($("h3#another_heading").attr("id")).toBe("another_heading"); - expect($("h3#another_heading").html()).toBe( + + const h3 = $("h3#another_heading"); + expect(h3.text()).toBe("Another heading"); + expect(h3.attr("id")).toBe("another_heading"); + expect(h3.html()).toBe( 'Another heading' ); - expect($("h4#final_heading").text()).toBe("Final heading"); - expect($("h4#final_heading").attr("id")).toBe("final_heading"); - expect($("h4#final_heading").html()).toBe( + + const h4 = $("h4#final_heading"); + expect(h4.text()).toBe("Final heading"); + expect(h4.attr("id")).toBe("final_heading"); + expect(h4.html()).toBe( 'Final heading' ); }); From f249a3de8a5a1b43aebc43908dc807666e096358 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 13:58:38 -0700 Subject: [PATCH 22/41] Add missing encoding parameter to fs.readFileSync --- testing/tests/index.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index ff181be54605..88675395c90f 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1777,7 +1777,7 @@ test("sections should be split by h2, h3 or h4", () => { ); const jsonFile = path.join(builtFolder, "index.json"); - const { doc } = JSON.parse(fs.readFileSync(jsonFile)); + const { doc } = JSON.parse(fs.readFileSync(jsonFile, "utf-8")); expect(doc.flaws.length).toBeFalsy(); expect(doc.title).toBe("Split section by heading"); From 73476beef846c904f65fe213fe3d55d20ebe036c Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 14:15:45 -0700 Subject: [PATCH 23/41] Add missing isH4 property to Section types --- libs/types/document.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libs/types/document.ts b/libs/types/document.ts index 0447d04395c2..295aa4c4bb1d 100644 --- a/libs/types/document.ts +++ b/libs/types/document.ts @@ -165,6 +165,7 @@ export interface ProseSection { id: string | null; title: string | null; isH3: boolean; + isH4: boolean; content?: string; titleAsText?: string; }; @@ -175,6 +176,7 @@ export interface SpecificationsSection { id: string; title: string; isH3: boolean; + isH4: boolean; query: string; specifications: { bcdSpecificationURL: any; @@ -189,6 +191,7 @@ export interface BCDSection { id: string; title: string; isH3: boolean; + isH4: boolean; data?: BCD.Identifier | null; dataURL?: string; query: string; From a2946863b93cbc3cdd2dc338ec3563e64117ae9f Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 14:21:32 -0700 Subject: [PATCH 24/41] Add isH4 to the ProseSection value to match updated type definition --- build/document-extractor.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 4b8bbf49ecd8..77f7c182e192 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -754,6 +754,7 @@ function _addSectionProse( id, title, isH3, + isH4, content: $.html()?.trim(), }; From 032b95075c2b7a4ccb46f78d152c2b448472b849 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 14:36:56 -0700 Subject: [PATCH 25/41] Remove .html() after extracting Cheerio element to variable --- testing/tests/index.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index 88675395c90f..644d7e587995 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1788,21 +1788,21 @@ test("sections should be split by h2, h3 or h4", () => { const h2 = $("h2#some_heading"); expect(h2.text()).toBe("Some heading"); expect(h2.attr("id")).toBe("some_heading"); - expect(h2.html()).toBe( + expect(h2).toBe( 'Some heading' ); const h3 = $("h3#another_heading"); expect(h3.text()).toBe("Another heading"); expect(h3.attr("id")).toBe("another_heading"); - expect(h3.html()).toBe( + expect(h3).toBe( 'Another heading' ); const h4 = $("h4#final_heading"); expect(h4.text()).toBe("Final heading"); expect(h4.attr("id")).toBe("final_heading"); - expect(h4.html()).toBe( + expect(h4).toBe( 'Final heading' ); }); From ccd7ffb5ec6bd5af642c7ae9ebb25aa89532247d Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 14:46:41 -0700 Subject: [PATCH 26/41] Experimenting with link matching for section splitting tests --- testing/tests/index.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index 644d7e587995..4cc2329f0d74 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1788,21 +1788,21 @@ test("sections should be split by h2, h3 or h4", () => { const h2 = $("h2#some_heading"); expect(h2.text()).toBe("Some heading"); expect(h2.attr("id")).toBe("some_heading"); - expect(h2).toBe( + expect($.html(h2)).toBe( 'Some heading' ); const h3 = $("h3#another_heading"); expect(h3.text()).toBe("Another heading"); expect(h3.attr("id")).toBe("another_heading"); - expect(h3).toBe( + expect($.html(h3)).toBe( 'Another heading' ); const h4 = $("h4#final_heading"); expect(h4.text()).toBe("Final heading"); expect(h4.attr("id")).toBe("final_heading"); - expect(h4).toBe( + expect($.html(h4)).toBe( 'Final heading' ); }); From a4efc65a5f409d4667921a38ff50592c8569996d Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 15:29:11 -0700 Subject: [PATCH 27/41] Debugging getting the innerHTML from a specific h2/h3/h4 element --- testing/tests/index.test.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/testing/tests/index.test.ts b/testing/tests/index.test.ts index 4cc2329f0d74..88675395c90f 100644 --- a/testing/tests/index.test.ts +++ b/testing/tests/index.test.ts @@ -1788,21 +1788,21 @@ test("sections should be split by h2, h3 or h4", () => { const h2 = $("h2#some_heading"); expect(h2.text()).toBe("Some heading"); expect(h2.attr("id")).toBe("some_heading"); - expect($.html(h2)).toBe( + expect(h2.html()).toBe( 'Some heading' ); const h3 = $("h3#another_heading"); expect(h3.text()).toBe("Another heading"); expect(h3.attr("id")).toBe("another_heading"); - expect($.html(h3)).toBe( + expect(h3.html()).toBe( 'Another heading' ); const h4 = $("h4#final_heading"); expect(h4.text()).toBe("Final heading"); expect(h4.attr("id")).toBe("final_heading"); - expect($.html(h4)).toBe( + expect(h4.html()).toBe( 'Final heading' ); }); From 0a051fc4c16135bc3d1799f755ae54f2e75ac782 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 15:53:48 -0700 Subject: [PATCH 28/41] Add closure function to reduce duplication in heading extraction --- build/document-extractor.ts | 87 ++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 35 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 77f7c182e192..41a0da4f3251 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -684,6 +684,30 @@ function _addSingleSpecialSection( } } +function extractHeadings($, headingLevel, id, title, titleAsText, flaws) { + const headings = $.find(headingLevel); + headings.each((i) => { + const heading = headings.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess <${headingLevel}> tag that is NOT at root-level (id='${heading.attr( + "id" + )}', text='${heading.text()}')` + ); + } else { + id = heading.attr("id") ?? ""; + title = heading.html() ?? ""; + titleAsText = heading.text(); + if (id && title) { + if (headingLevel == "h3") isH3 = true; + if (headingLevel == "h4") isH4 = true; + heading.remove(); + } + } + }); +} + function _addSectionProse( $: cheerio.Cheerio ): SectionsAndFlaws { @@ -698,53 +722,46 @@ function _addSectionProse( // The way this works... // Given a section of HTML, try to extract a id, title, - let h2found = false; - const h2s = $.find("h2"); - h2s.each((i) => { - const h2 = h2s.eq(i); - - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h2.attr( - "id" - )}', text='${h2.text()}')` - ); - } else { - // First element - id = h2.attr("id") ?? ""; - title = h2.html() ?? ""; - titleAsText = h2.text(); - h2.remove(); - } - h2found = true; - }); - - // If there was no

, look through all the

s. - let h3found = false; - if (!h2found) { - const h3s = $.find("h3"); - h3s.each((i) => { - const h3 = h3s.eq(i); + // Closure function to reduce duplication for heading extraction + function extractHeadings(headingType) { + const headings = $.find(headingType); + headings.each((i) => { + const heading = headings.eq(i); if (i) { // Excess! flaws.push( - `Excess

tag that is NOT at root-level (id='${h3.attr( + `Excess <${headingType}> tag that is NOT at root-level (id='${heading.attr( "id" - )}', text='${h3.text()}')` + )}', text='${heading.text()}')` ); } else { - id = h3.attr("id") ?? ""; - title = h3.html() ?? ""; - titleAsText = h3.text(); + id = heading.attr("id") ?? ""; + title = heading.html() ?? ""; + titleAsText = heading.text(); if (id && title) { - isH3 = true; - h3.remove(); + if (headingType == "h2") h2found = true; + if (headingType == "h3") isH3 = true; + if (headingType == "h4") isH4 = true; + heading.remove(); } } }); } + let h2found = false; + extractHeadings("h2"); + + // If there was no

, look through all the

s. + let h3found = false; + if (!h2found) { + extractHeadings("h3"); + } + + // If there was no

, look through all the

s. + if (!h3found) { + extractHeadings("h4"); + } + if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492). id = id.replace(/_+$/g, ""); From 08425eb6e54808a29ed0462211f926fdf10432ca Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 15:56:34 -0700 Subject: [PATCH 29/41] Remove stale function definition for extractHeadings --- build/document-extractor.ts | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 41a0da4f3251..03254a588902 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -684,30 +684,6 @@ function _addSingleSpecialSection( } } -function extractHeadings($, headingLevel, id, title, titleAsText, flaws) { - const headings = $.find(headingLevel); - headings.each((i) => { - const heading = headings.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess <${headingLevel}> tag that is NOT at root-level (id='${heading.attr( - "id" - )}', text='${heading.text()}')` - ); - } else { - id = heading.attr("id") ?? ""; - title = heading.html() ?? ""; - titleAsText = heading.text(); - if (id && title) { - if (headingLevel == "h3") isH3 = true; - if (headingLevel == "h4") isH4 = true; - heading.remove(); - } - } - }); -} - function _addSectionProse( $: cheerio.Cheerio ): SectionsAndFlaws { From 375e6cc6d70ad0246a5c76d0e6f124c94a198089 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:07:43 -0700 Subject: [PATCH 30/41] Debugging section splitting tests --- build/document-extractor.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 03254a588902..f7b190864f8e 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -716,7 +716,10 @@ function _addSectionProse( titleAsText = heading.text(); if (id && title) { if (headingType == "h2") h2found = true; - if (headingType == "h3") isH3 = true; + if (headingType == "h3") { + h3found = true; + isH3 = true; + } if (headingType == "h4") isH4 = true; heading.remove(); } From c9af78934441b7d15ba3cebbe2c1ea58420d1359 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:23:29 -0700 Subject: [PATCH 31/41] Testing section splitting --- build/document-extractor.ts | 54 +++++++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index f7b190864f8e..f2ddcabde6ed 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -715,25 +715,63 @@ function _addSectionProse( title = heading.html() ?? ""; titleAsText = heading.text(); if (id && title) { - if (headingType == "h2") h2found = true; - if (headingType == "h3") { - h3found = true; - isH3 = true; - } - if (headingType == "h4") isH4 = true; heading.remove(); } } + if (headingType == "h2") h2found = true; + if (headingType == "h3") { + h3found = true; + isH3 = true; + } + if (headingType == "h4") isH4 = true; }); } let h2found = false; - extractHeadings("h2"); + const h2s = $.find("h2"); + h2s.each((i) => { + const h2 = h2s.eq(i); + + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h2.attr( + "id" + )}', text='${h2.text()}')` + ); + } else { + // First element + id = h2.attr("id") ?? ""; + title = h2.html() ?? ""; + titleAsText = h2.text(); + h2.remove(); + } + h2found = true; + }); // If there was no

, look through all the

s. let h3found = false; if (!h2found) { - extractHeadings("h3"); + const h3s = $.find("h3"); + h3s.each((i) => { + const h3 = h3s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h3.attr( + "id" + )}', text='${h3.text()}')` + ); + } else { + id = h3.attr("id") ?? ""; + title = h3.html() ?? ""; + titleAsText = h3.text(); + if (id && title) { + h3.remove(); + } + } + h3found = true; + }); } // If there was no

, look through all the

s. From ddb074b9978ee38044dec7fbe22baf3ad285ef2b Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:29:56 -0700 Subject: [PATCH 32/41] Tinkering with helper function --- build/document-extractor.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index f2ddcabde6ed..40f3cbf9fca2 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -714,9 +714,7 @@ function _addSectionProse( id = heading.attr("id") ?? ""; title = heading.html() ?? ""; titleAsText = heading.text(); - if (id && title) { - heading.remove(); - } + heading.remove(); } if (headingType == "h2") h2found = true; if (headingType == "h3") { From 0d41c0df248067ceb6787ba1753d4c1ed87689cc Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:39:22 -0700 Subject: [PATCH 33/41] Debugging --- build/document-extractor.ts | 74 ++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 40f3cbf9fca2..781241ad5f93 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -699,31 +699,31 @@ function _addSectionProse( // Given a section of HTML, try to extract a id, title, // Closure function to reduce duplication for heading extraction - function extractHeadings(headingType) { - const headings = $.find(headingType); - headings.each((i) => { - const heading = headings.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess <${headingType}> tag that is NOT at root-level (id='${heading.attr( - "id" - )}', text='${heading.text()}')` - ); - } else { - id = heading.attr("id") ?? ""; - title = heading.html() ?? ""; - titleAsText = heading.text(); - heading.remove(); - } - if (headingType == "h2") h2found = true; - if (headingType == "h3") { - h3found = true; - isH3 = true; - } - if (headingType == "h4") isH4 = true; - }); - } + // function extractHeadings(headingType) { + // const headings = $.find(headingType); + // headings.each((i) => { + // const heading = headings.eq(i); + // if (i) { + // // Excess! + // flaws.push( + // `Excess <${headingType}> tag that is NOT at root-level (id='${heading.attr( + // "id" + // )}', text='${heading.text()}')` + // ); + // } else { + // id = heading.attr("id") ?? ""; + // title = heading.html() ?? ""; + // titleAsText = heading.text(); + // heading.remove(); + // } + // if (headingType == "h2") h2found = true; + // if (headingType == "h3") { + // h3found = true; + // isH3 = true; + // } + // if (headingType == "h4") isH4 = true; + // }); + // } let h2found = false; const h2s = $.find("h2"); @@ -765,6 +765,7 @@ function _addSectionProse( title = h3.html() ?? ""; titleAsText = h3.text(); if (id && title) { + isH3 = true; h3.remove(); } } @@ -774,7 +775,28 @@ function _addSectionProse( // If there was no

, look through all the

s. if (!h3found) { - extractHeadings("h4"); + const h4s = $.find("h4"); + h4s.each((i) => { + const h4 = h4s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h4.attr( + "id" + )}', text='${h4.text()}')` + ); + } else { + id = h4.attr("id") ?? ""; + title = h4.html() ?? ""; + titleAsText = h4.text(); + if (id && title) { + isH4 = true; + h4.remove(); + } + } + // h4found = true; + }); + // extractHeadings("h4"); } if (id) { From 1107894363219a89d484ae4540a69b91f7bef561 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:46:56 -0700 Subject: [PATCH 34/41] Fallback to original extraction logic --- build/document-extractor.ts | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 781241ad5f93..5102256726db 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -698,33 +698,6 @@ function _addSectionProse( // The way this works... // Given a section of HTML, try to extract a id, title, - // Closure function to reduce duplication for heading extraction - // function extractHeadings(headingType) { - // const headings = $.find(headingType); - // headings.each((i) => { - // const heading = headings.eq(i); - // if (i) { - // // Excess! - // flaws.push( - // `Excess <${headingType}> tag that is NOT at root-level (id='${heading.attr( - // "id" - // )}', text='${heading.text()}')` - // ); - // } else { - // id = heading.attr("id") ?? ""; - // title = heading.html() ?? ""; - // titleAsText = heading.text(); - // heading.remove(); - // } - // if (headingType == "h2") h2found = true; - // if (headingType == "h3") { - // h3found = true; - // isH3 = true; - // } - // if (headingType == "h4") isH4 = true; - // }); - // } - let h2found = false; const h2s = $.find("h2"); h2s.each((i) => { From 9fece47ac577c9aae8d0a67752de19260a2f2a05 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 16:54:40 -0700 Subject: [PATCH 35/41] Revert to original to debug --- build/document-extractor.ts | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 5102256726db..c4a38018dbf8 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -721,7 +721,6 @@ function _addSectionProse( }); // If there was no

, look through all the

s. - let h3found = false; if (!h2found) { const h3s = $.find("h3"); h3s.each((i) => { @@ -742,36 +741,9 @@ function _addSectionProse( h3.remove(); } } - h3found = true; }); } - // If there was no

, look through all the

s. - if (!h3found) { - const h4s = $.find("h4"); - h4s.each((i) => { - const h4 = h4s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h4.attr( - "id" - )}', text='${h4.text()}')` - ); - } else { - id = h4.attr("id") ?? ""; - title = h4.html() ?? ""; - titleAsText = h4.text(); - if (id && title) { - isH4 = true; - h4.remove(); - } - } - // h4found = true; - }); - // extractHeadings("h4"); - } - if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492). id = id.replace(/_+$/g, ""); From 7a839c0ea10b60437fd5ec1875c7a4deb283cd6d Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 17:03:44 -0700 Subject: [PATCH 36/41] Replicate existing extraction logic for h4s --- build/document-extractor.ts | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index c4a38018dbf8..c84e25df271f 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -699,6 +699,8 @@ function _addSectionProse( // Given a section of HTML, try to extract a id, title, let h2found = false; + let h3found = false; + const h2s = $.find("h2"); h2s.each((i) => { const h2 = h2s.eq(i); @@ -741,6 +743,30 @@ function _addSectionProse( h3.remove(); } } + h3found = true; + }); + } + + if (!h3found) { + const h4s = $.find("h4"); + h4s.each((i) => { + const h4 = h4s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h4.attr( + "id" + )}', text='${h4.text()}')` + ); + } else { + id = h4.attr("id") ?? ""; + title = h4.html() ?? ""; + titleAsText = h4.text(); + if (id && title) { + isH4 = true; + h4.remove(); + } + } }); } From f89d8b1855c82b80759561579b34e88b493d5314 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 17:19:08 -0700 Subject: [PATCH 37/41] Debugging --- build/document-extractor.ts | 43 ++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index c84e25df271f..840aad3c5397 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -699,8 +699,6 @@ function _addSectionProse( // Given a section of HTML, try to extract a id, title, let h2found = false; - let h3found = false; - const h2s = $.find("h2"); h2s.each((i) => { const h2 = h2s.eq(i); @@ -743,32 +741,29 @@ function _addSectionProse( h3.remove(); } } - h3found = true; }); } - if (!h3found) { - const h4s = $.find("h4"); - h4s.each((i) => { - const h4 = h4s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h4.attr( - "id" - )}', text='${h4.text()}')` - ); - } else { - id = h4.attr("id") ?? ""; - title = h4.html() ?? ""; - titleAsText = h4.text(); - if (id && title) { - isH4 = true; - h4.remove(); - } + const h4s = $.find("h4"); + h4s.each((i) => { + const h4 = h4s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h4.attr( + "id" + )}', text='${h4.text()}')` + ); + } else { + id = h4.attr("id") ?? ""; + title = h4.html() ?? ""; + titleAsText = h4.text(); + if (id && title) { + isH4 = true; + h4.remove(); } - }); - } + } + }); if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492). From 86c61b7eb28dca6c7b42ac574619e8a3c038252f Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 17:26:01 -0700 Subject: [PATCH 38/41] Testing --- build/document-extractor.ts | 40 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 840aad3c5397..e480ba65c602 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -744,26 +744,26 @@ function _addSectionProse( }); } - const h4s = $.find("h4"); - h4s.each((i) => { - const h4 = h4s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h4.attr( - "id" - )}', text='${h4.text()}')` - ); - } else { - id = h4.attr("id") ?? ""; - title = h4.html() ?? ""; - titleAsText = h4.text(); - if (id && title) { - isH4 = true; - h4.remove(); - } - } - }); + // const h4s = $.find("h4"); + // h4s.each((i) => { + // const h4 = h4s.eq(i); + // if (i) { + // // Excess! + // flaws.push( + // `Excess

tag that is NOT at root-level (id='${h4.attr( + // "id" + // )}', text='${h4.text()}')` + // ); + // } else { + // id = h4.attr("id") ?? ""; + // title = h4.html() ?? ""; + // titleAsText = h4.text(); + // if (id && title) { + // isH4 = true; + // h4.remove(); + // } + // } + // }); if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492). From 94f6e6ec0a6115269f9c5fb9a17789dd7b5f2f78 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 18:11:35 -0700 Subject: [PATCH 39/41] Testing extraction behavior --- build/document-extractor.ts | 84 +++++++++++++++++++------------------ 1 file changed, 43 insertions(+), 41 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index e480ba65c602..d4e8223ac416 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -699,6 +699,8 @@ function _addSectionProse( // Given a section of HTML, try to extract a id, title, let h2found = false; + let h3found = false; + const h2s = $.find("h2"); h2s.each((i) => { const h2 = h2s.eq(i); @@ -721,49 +723,49 @@ function _addSectionProse( }); // If there was no

, look through all the

s. - if (!h2found) { - const h3s = $.find("h3"); - h3s.each((i) => { - const h3 = h3s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h3.attr( - "id" - )}', text='${h3.text()}')` - ); - } else { - id = h3.attr("id") ?? ""; - title = h3.html() ?? ""; - titleAsText = h3.text(); - if (id && title) { - isH3 = true; - h3.remove(); - } + + const h3s = $.find("h3"); + h3s.each((i) => { + const h3 = h3s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h3.attr( + "id" + )}', text='${h3.text()}')` + ); + } else { + id = h3.attr("id") ?? ""; + title = h3.html() ?? ""; + titleAsText = h3.text(); + if (id && title) { + isH3 = true; + h3.remove(); } - }); - } + } + h3found = true; + }); - // const h4s = $.find("h4"); - // h4s.each((i) => { - // const h4 = h4s.eq(i); - // if (i) { - // // Excess! - // flaws.push( - // `Excess

tag that is NOT at root-level (id='${h4.attr( - // "id" - // )}', text='${h4.text()}')` - // ); - // } else { - // id = h4.attr("id") ?? ""; - // title = h4.html() ?? ""; - // titleAsText = h4.text(); - // if (id && title) { - // isH4 = true; - // h4.remove(); - // } - // } - // }); + const h4s = $.find("h4"); + h4s.each((i) => { + const h4 = h4s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h4.attr( + "id" + )}', text='${h4.text()}')` + ); + } else { + id = h4.attr("id") ?? ""; + title = h4.html() ?? ""; + titleAsText = h4.text(); + if (id && title) { + isH4 = true; + h4.remove(); + } + } + }); if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492). From 4e50ec92a16e2c3f3798d15307162ef99d694ce5 Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 18:17:33 -0700 Subject: [PATCH 40/41] elements are getting removed, trying understand the flow better --- build/document-extractor.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index d4e8223ac416..40e04a1de2da 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -717,7 +717,7 @@ function _addSectionProse( id = h2.attr("id") ?? ""; title = h2.html() ?? ""; titleAsText = h2.text(); - h2.remove(); + // h2.remove(); } h2found = true; }); @@ -740,7 +740,7 @@ function _addSectionProse( titleAsText = h3.text(); if (id && title) { isH3 = true; - h3.remove(); + // h3.remove(); } } h3found = true; @@ -762,7 +762,7 @@ function _addSectionProse( titleAsText = h4.text(); if (id && title) { isH4 = true; - h4.remove(); + // h4.remove(); } } }); From 1a117529857b26537d7965d8313d62fee384233e Mon Sep 17 00:00:00 2001 From: Tanner Date: Sun, 4 Sep 2022 18:28:21 -0700 Subject: [PATCH 41/41] Comment out the h4 extraction for now --- build/document-extractor.ts | 87 +++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 42 deletions(-) diff --git a/build/document-extractor.ts b/build/document-extractor.ts index 40e04a1de2da..7cc31739c315 100644 --- a/build/document-extractor.ts +++ b/build/document-extractor.ts @@ -717,55 +717,58 @@ function _addSectionProse( id = h2.attr("id") ?? ""; title = h2.html() ?? ""; titleAsText = h2.text(); - // h2.remove(); + h2.remove(); } h2found = true; }); // If there was no

, look through all the

s. - - const h3s = $.find("h3"); - h3s.each((i) => { - const h3 = h3s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h3.attr( - "id" - )}', text='${h3.text()}')` - ); - } else { - id = h3.attr("id") ?? ""; - title = h3.html() ?? ""; - titleAsText = h3.text(); - if (id && title) { - isH3 = true; - // h3.remove(); + if (!h2found) { + const h3s = $.find("h3"); + h3s.each((i) => { + const h3 = h3s.eq(i); + if (i) { + // Excess! + flaws.push( + `Excess

tag that is NOT at root-level (id='${h3.attr( + "id" + )}', text='${h3.text()}')` + ); + } else { + id = h3.attr("id") ?? ""; + title = h3.html() ?? ""; + titleAsText = h3.text(); + if (id && title) { + isH3 = true; + h3.remove(); + } } - } - h3found = true; - }); + h3found = true; + }); + } - const h4s = $.find("h4"); - h4s.each((i) => { - const h4 = h4s.eq(i); - if (i) { - // Excess! - flaws.push( - `Excess

tag that is NOT at root-level (id='${h4.attr( - "id" - )}', text='${h4.text()}')` - ); - } else { - id = h4.attr("id") ?? ""; - title = h4.html() ?? ""; - titleAsText = h4.text(); - if (id && title) { - isH4 = true; - // h4.remove(); - } - } - }); + // TODO: h4 elements are not being linkified and the + // tests are just picking up the initial text instead of the tag + // const h4s = $.find("h4"); + // h4s.each((i) => { + // const h4 = h4s.eq(i); + // if (i) { + // // Excess! + // flaws.push( + // `Excess

tag that is NOT at root-level (id='${h4.attr( + // "id" + // )}', text='${h4.text()}')` + // ); + // } else { + // id = h4.attr("id") ?? ""; + // title = h4.html() ?? ""; + // titleAsText = h4.text(); + // if (id && title) { + // isH4 = true; + // h4.remove(); + // } + // } + // }); if (id) { // Remove trailing underscores (https://github.com/mdn/yari/issues/5492).