Skip to content

Commit

Permalink
feat: allow SVG/MathML doctype declarations
Browse files Browse the repository at this point in the history
EPUB 3.3. now allows a reserved set of external identifiers in doctype
declarations of documents with select media types.

See: https://www.w3.org/TR/epub-33/#app-identifiers-allowed

This commit:
- adds those as special cases to the XML parser code
- totally removes entity fetching for EPUB 3.3
- keeps forbidding external entities in the internal subset

Fix #1192, Fix #1114
  • Loading branch information
rdeltour committed Jan 23, 2022
1 parent ab99f1d commit 6e44b39
Show file tree
Hide file tree
Showing 23 changed files with 215 additions and 10 deletions.
31 changes: 23 additions & 8 deletions src/main/java/com/adobe/epubcheck/xml/XMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -313,20 +313,18 @@ public InputSource resolveEntity(String publicId, String systemId)

String resourcePath = systemIdMap.get(systemId);

if (resourcePath != null)
// external entities are not resolved in EPUB 3
if (context.version == EPUBVersion.VERSION_3 || systemId.equals("about:legacy-compat")) {
return new InputSource(new StringReader(""));
}
else if (resourcePath != null)
{
InputStream resourceStream = ResourceUtil.getResourceStream(resourcePath);
InputSource source = new InputSource(resourceStream);
source.setPublicId(publicId);
source.setSystemId(systemId);
return source;
}
else if (systemId.equals("about:legacy-compat"))
{
// special case
return new InputSource(new StringReader(""));

}
else
{
// check for a system prop that turns off online fetching
Expand Down Expand Up @@ -797,7 +795,24 @@ else if (context.version == EPUBVersion.VERSION_3)
}
else if (publicId != null || systemId != null)
{
report.message(MessageId.OPF_073, getLocation());
// check if the declaration is allowed for the current media type
boolean isAllowed;
switch (mimeType)
{
case "image/svg+xml":
isAllowed = "-//W3C//DTD SVG 1.1//EN".equals(publicId) && "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd".equals(systemId);
break;
case "application/mathml+xml":
case "application/mathml-content+xml":
case "application/mathml-presentation+xml":
isAllowed = "-//W3C//DTD MathML 3.0//EN".equals(publicId) && "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd".equals(systemId);
break;
default:
isAllowed= false;
}
if (!isAllowed) {
report.message(MessageId.OPF_073, getLocation());
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1>Loomings</h1>
<p>Call me Ishmael.</p>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" "http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">
<math xmlns="http://www.w3.org/1998/Math/MathML" alttext="2x+y-z">
<mrow>
<mn>2</mn>
<mo> &#x2061;<!--INVISIBLE TIMES--></mo>
<mi>x</mi>
</mrow>
<mrow>
<mo>+</mo>
<mi>y</mi>
<mo>-</mo>
<mi>z</mi>
</mrow>
</math>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
<item id="mathml-1" href="mathml-mediatype-1.xml" media-type="application/mathml+xml" fallback="content_001"/>
<item id="mathml-2" href="mathml-mediatype-2.xml" media-type="application/mathml-presentation+xml" fallback="content_001"/>
<item id="mathml-3" href="mathml-mediatype-3.xml" media-type="application/mathml-content+xml" fallback="content_001"/>
<item id="svg" href="svg.svg" media-type="image/svg+xml"/>
</manifest>
<spine>
<itemref idref="content_001" />
<itemref idref="svg"/>
<itemref idref="mathml-1"/>
<itemref idref="mathml-2"/>
<itemref idref="mathml-3"/>
</spine>
</package>
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal EPUB</title>
</head>
<body>
<h1 id="ch1">Loomings</h1>
<p>Call me Ishmael.</p>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal Nav</title>
</head>
<body>
<nav epub:type="toc">
<ol>
<li><a href="content_001.xhtml">content 001</a></li>
</ol>
</nav>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<ncx xmlns:ncx="http://www.daisy.org/z3986/2005/ncx/"
xmlns="http://www.daisy.org/z3986/2005/ncx/"
version="2005-1"
xml:lang="en">
<head>
<meta name="dtb:uid" content="NOID"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>NCX</text>
</docTitle>
<navMap>
<navPoint id="ch1" playOrder="1">
<navLabel>
<text>Chapter 1</text>
</navLabel>
<content src="content_001.xhtml#ch1"/>
</navPoint>
</navMap>
</ncx>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8" ?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
application/epub+zip
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Minimal Nav</title>
</head>
<body>
<nav epub:type="toc">
<ol>
<li><a href="content_001.xhtml">content 001</a></li>
</ol>
</nav>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" xml:lang="en" unique-identifier="q">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
<dc:title id="title">Minimal EPUB 3.0</dc:title>
<dc:language>en</dc:language>
<dc:identifier id="q">NOID</dc:identifier>
<meta property="dcterms:modified">2017-06-14T00:00:01Z</meta>
</metadata>
<manifest>
<item id="content_001" href="content_001.xhtml" media-type="application/xhtml+xml"/>
<item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml" />
</manifest>
<spine toc="ncx">
<itemref idref="content_001" />
</spine>
</package>
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
application/epub+zip
13 changes: 11 additions & 2 deletions src/test/resources/epub3/resources-publication.feature
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,17 @@ Feature: EPUB 3 ▸ Publication Resources ▸ Full Publication Checks

## 3.3 XML Conformance

Scenario: Report an NCX file with a DOCTYPE declaration including the external identifier (issue 305)
When checking EPUB 'xml-ncx-doctype-external-identifier-error'
Scenario: Verify DOCTYPE declarations with allowed external identifiers
When checking EPUB 'xml-external-identifier-allowed-valid'
Then no errors or warnings are reported

Scenario: Report a DOCTYPE declaration with an allowed external identifier but not on the expected media type
When checking EPUB 'xml-external-identifier-bad-mediatype-error'
Then error OPF-073 is reported
And no other errors or warnings are reported

Scenario: Report a DOCTYPE declaration with an external identifier that is not allowed
When checking EPUB 'xml-external-identifier-disallowed-error'
Then error OPF-073 is reported
And no other errors or warnings are reported

Expand Down

0 comments on commit 6e44b39

Please sign in to comment.