diff --git a/packages/epub-utils/package.json b/packages/epub-utils/package.json index 519058ff..d2232bc7 100644 --- a/packages/epub-utils/package.json +++ b/packages/epub-utils/package.json @@ -21,7 +21,7 @@ "fs-extra": "^6.0.1", "tmp": "^0.0.33", "winston": "^2.4.0", - "xmldom": "^0.1.27", + "xmldom": "https://github.com/fchasen/xmldom.git#a38f7ddb536ab74e9fb549477ba9f9b7ea2d0beb", "xpath": "^0.0.24" }, "publishConfig": { diff --git a/packages/epub-utils/src/epub-parse.js b/packages/epub-utils/src/epub-parse.js index 520f3ffe..eb16f6a9 100644 --- a/packages/epub-utils/src/epub-parse.js +++ b/packages/epub-utils/src/epub-parse.js @@ -17,6 +17,13 @@ const path = require('path'); const xpath = require('xpath'); const winston = require('winston'); +// Error Handler for DOMParser instances +const errorHandler = { + warning: w => winston.warn(w), + error: e => winston.warn(e), + fatalError: fe => winston.error(fe), +} + function SpineItem() { this.filepath = ""; this.relpath = ""; @@ -33,7 +40,7 @@ function EpubParser() { function parseNavDoc(fullpath, epubDir) { const content = fs.readFileSync(fullpath).toString(); - const doc = new DOMParser().parseFromString(content); + const doc = new DOMParser({errorHandler}).parseFromString(content, 'application/xhtml+xml'); // Remove all links const aElems = doc.getElementsByTagNameNS('http://www.w3.org/1999/xhtml', 'a'); @@ -127,7 +134,7 @@ EpubParser.prototype.parse = function(epubDir) { EpubParser.prototype.parseData = function(packageDocPath, epubDir) { const content = fs.readFileSync(packageDocPath).toString(); - const doc = new DOMParser().parseFromString(content); + const doc = new DOMParser({errorHandler}).parseFromString(content); const select = xpath.useNamespaces( { opf: 'http://www.idpf.org/2007/opf', dc: 'http://purl.org/dc/elements/1.1/'}); @@ -168,7 +175,7 @@ EpubParser.prototype.parseData = function(packageDocPath, epubDir) { EpubParser.prototype.parseContentDocTitle = function(filepath) { const content = fs.readFileSync(filepath).toString(); - const doc = new DOMParser().parseFromString(content); + const doc = new DOMParser({errorHandler}).parseFromString(content, 'application/xhtml+xml'); const select = xpath.useNamespaces({html: "http://www.w3.org/1999/xhtml", epub: "http://www.idpf.org/2007/ops"}); const title = select('//html:title/text()', doc); if (title.length > 0) { @@ -182,7 +189,7 @@ EpubParser.prototype.parseContentDocTitle = function(filepath) { EpubParser.prototype.calculatePackageDocPath = function(epubDir) { const containerFilePath = `${epubDir}/META-INF/container.xml`; const content = fs.readFileSync(containerFilePath).toString(); - const doc = new DOMParser().parseFromString(content); + const doc = new DOMParser({errorHandler}).parseFromString(content); const select = xpath.useNamespaces({ ocf: 'urn:oasis:names:tc:opendocument:xmlns:container' }); const rootfiles = select('//ocf:rootfile[@media-type="application/oebps-package+xml"]/@full-path', doc); // just grab the first one as we're not handling the case of multiple renditions diff --git a/tests/__tests__/cli.test.js b/tests/__tests__/cli.test.js index ba62b91b..916569d5 100644 --- a/tests/__tests__/cli.test.js +++ b/tests/__tests__/cli.test.js @@ -96,7 +96,17 @@ describe('Running the CLI', () => { const log = stripAnsi(stdout); expect(/^warn:\s+The SVG Content Documents in this EPUB will be ignored\./m.test(log)).toBe(true); }); - }); + }); + + describe('does not raise a warning', () => { + test('when a named character reference is used in XHTML', () => { + const { stdout, stderr, status } = ace(['issue-182'], { + cwd: path.resolve(__dirname, '../data'), + }); + const log = stripAnsi(stdout); + expect(/^warn:\s+\[xmldom error\] entity not found/m.test(log)).toBe(false); + }); + }); /*test('with return-2-on-validation-error set to true should exit with return code 2', () => { // TODO this test won't work until we can specify the CLI option to enable returning 2 on violation(s) diff --git a/tests/__tests__/regression.test.js b/tests/__tests__/regression.test.js index d87573cb..1c1751e2 100644 --- a/tests/__tests__/regression.test.js +++ b/tests/__tests__/regression.test.js @@ -75,3 +75,15 @@ test('issue #170: heading with `doc-subtitle` role were reported empty', async ( const report = await ace('../data/issue-170'); expect(report['earl:result']['earl:outcome']).toEqual('pass'); }); + +test('issue #182: named character references are parsed', async () => { + const report = await ace('../data/issue-182'); + expect(report.assertions).toEqual(expect.arrayContaining([ + expect.objectContaining({ + "earl:testSubject": { + "url": "content_001.xhtml", + "dct:title": "Minimal – EPUB" + } + }) + ])); +}); diff --git a/tests/data/issue-182/EPUB/content_001.xhtml b/tests/data/issue-182/EPUB/content_001.xhtml new file mode 100644 index 00000000..a0415f08 --- /dev/null +++ b/tests/data/issue-182/EPUB/content_001.xhtml @@ -0,0 +1,10 @@ + + +
+Call me Ishmael.
+ + diff --git a/tests/data/issue-182/EPUB/nav.xhtml b/tests/data/issue-182/EPUB/nav.xhtml new file mode 100644 index 00000000..1d538c19 --- /dev/null +++ b/tests/data/issue-182/EPUB/nav.xhtml @@ -0,0 +1,12 @@ + + +