Skip to content

Commit

Permalink
feat: report reserved string in XHTML custom attribute namespaces
Browse files Browse the repository at this point in the history
EPUB 3.3 disallows "w3.org" and "idpf.org" in domains of XHTML custom
attribute namespaces.

Fix #1190
  • Loading branch information
rdeltour committed Jan 23, 2022
1 parent 6e44b39 commit bc86db8
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ private void initialize()
severities.put(MessageId.HTM_051, Severity.WARNING);
severities.put(MessageId.HTM_052, Severity.ERROR);
severities.put(MessageId.HTM_053, Severity.INFO);
severities.put(MessageId.HTM_054, Severity.ERROR);

// Media
severities.put(MessageId.MED_001, Severity.ERROR);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ public enum MessageId implements Comparable<MessageId>
HTM_051("HTM-051"),
HTM_052("HTM-052"),
HTM_053("HTM_053"),
HTM_054("HTM_054"),

// Messages associated with media (images, audio and video)
MED_001("MED-001"),
Expand Down
25 changes: 24 additions & 1 deletion src/main/java/com/adobe/epubcheck/xml/XMLParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Locale;
Expand Down Expand Up @@ -581,15 +583,20 @@ private Attributes preprocessAttributes(String elemNamespace, String elemName, S
{
if (context.version == EPUBVersion.VERSION_3)
{
String attrNamespace = attributes.getURI(i);
// Remove data-* attributes in both XHTML and SVG
if (isDataAttribute(attributes, i))
{
attributes.removeAttribute(i);
}
// Remove custom namespace attributes in XHTML
else if ("application/xhtml+xml".equals(context.mimeType)
&& isHTMLCustomNamespace(attributes.getURI(i)))
&& isHTMLCustomNamespace(attrNamespace))
{
String reserved = findReservedStringInHTMLCustomNamespace(attrNamespace);
if (reserved != null) {
report.message(MessageId.HTM_054, getLocation(), attrNamespace, reserved);
}
attributes.removeAttribute(i);
}
// Normalize case of case-insensitive attributes in XHTML
Expand Down Expand Up @@ -624,6 +631,22 @@ private static boolean isHTMLCustomNamespace(String namespace)
return !KNOWN_XHTML_NAMESPACES.contains(namespace.trim());
}

private static String findReservedStringInHTMLCustomNamespace(String namespace)
{
if (namespace != null) {
try
{
URI uri = new URI(namespace);
if (uri.getHost().contains("w3.org")) return "w3.org";
if (uri.getHost().contains("idpf.org")) return "idpf.org";
} catch (URISyntaxException e)
{
// ignore
}
}
return null;
}

private static boolean isCaseInsensitiveAttribute(Attributes attributes, int index)
{
return (attributes.getURI(index).isEmpty()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ HTM_050=Found epub:type="pagebreak" attribute in content document.
HTM_051=Found Microdata semantic enrichments but no RDFa. EDUPUB recommends using RDFa Lite.
HTM_052=The property "region-based" is only allowed on nav elements in Data Navigation Documents.
HTM_053=Found an external file link (file://) in file: "%1$s".
HTM_054=Custom attribute namespace ("%1$s") must not include the string "%2$s" in its domain.

#media
MED_001=Video poster must have core media image type.
Expand Down
6 changes: 5 additions & 1 deletion src/test/resources/epub3/content-document-xhtml.feature
Original file line number Diff line number Diff line change
Expand Up @@ -615,7 +615,11 @@ Feature: EPUB 3 ▸ Content Documents ▸ XHTML Document Checks
Scenario: Verify attributes in custom namespaces are ignored
When checking document 'attrs-custom-ns-valid.xhtml'
Then no errors or warnings are reported


Scenario: Report custom attributes using reserved strings in their namespace
When checking document 'attrs-custom-ns-reserved-error.xhtml'
Then error HTM-054 is reported 2 times
And no other errors or warnings are reported

## 2.5 HTML Deviations and Constraints

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8" />
<title>Test</title>
</head>
<body w3:attr="disallowed" idpf:attr="disallowed" ok:attr="allowed"
xmlns:w3="http://example.w3.org" xmlns:idpf="http://example.idpf.org"
xmlns:ok="http://example.org/w3.org">
<h1>Test</h1>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:foo="https://example.org" xml:lang="en" lang="en">
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="utf-8"/>
<title>Attributes in custom namespace</title>
<meta charset="utf-8" />
<title>Test</title>
</head>
<body>
<body custom:attribute="allowed" xmlns:custom="http://example.org">
<h1>Test</h1>
<p foo:bar1="baz" foo:bar2="baz" foo:bar3="baz">custom attribute!</p>
</body>
</html>

0 comments on commit bc86db8

Please sign in to comment.