From 5949b6ceb03d0f80f719400403bea3804ffdbbee Mon Sep 17 00:00:00 2001 From: Romain Deltour Date: Fri, 26 Feb 2021 00:20:34 +0100 Subject: [PATCH] fix: do not treat escaped elements as hyperlinks in HTM-053 Fix the regex used to report "file:" hyperlinks as `HTM-053` (informative) to only consider HTML elements and not plain text. This regex-based parsing is still brittle, but we'll refactor this whole package later. For now this simple fix will do. Fixes #1182 --- .../com/adobe/epubcheck/ctc/FileLinkSearch.java | 2 +- .../resources/epub3/content-publication.feature | 6 ++++++ .../EPUB/content_001.xhtml | 12 ++++++++++++ .../EPUB/nav.xhtml | 14 ++++++++++++++ .../EPUB/package.opf | 16 ++++++++++++++++ .../META-INF/container.xml | 6 ++++++ .../mimetype | 1 + 7 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/content_001.xhtml create mode 100644 src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/nav.xhtml create mode 100644 src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/package.opf create mode 100644 src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/META-INF/container.xml create mode 100644 src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/mimetype diff --git a/src/main/java/com/adobe/epubcheck/ctc/FileLinkSearch.java b/src/main/java/com/adobe/epubcheck/ctc/FileLinkSearch.java index 907b623b7..5aef376de 100644 --- a/src/main/java/com/adobe/epubcheck/ctc/FileLinkSearch.java +++ b/src/main/java/com/adobe/epubcheck/ctc/FileLinkSearch.java @@ -20,7 +20,7 @@ * ========================================================
*/ public class FileLinkSearch extends TextSearch { - private static final Pattern fileLinkPattern = Pattern.compile("href=[\"']file://"); + private static final Pattern fileLinkPattern = Pattern.compile("]*\\s)?href=[\"']file://"); public FileLinkSearch(EPUBVersion version, ZipFile zip, Report report) { diff --git a/src/test/resources/epub3/content-publication.feature b/src/test/resources/epub3/content-publication.feature index 81a2c20e6..db7c57a6d 100644 --- a/src/test/resources/epub3/content-publication.feature +++ b/src/test/resources/epub3/content-publication.feature @@ -66,6 +66,12 @@ Feature: EPUB 3 ▸ Content Documents ▸ Full Publication Checks When checking EPUB 'content-xhtml-link-to-local-file-valid' Then info HTM-053 is reported And no errors or warnings are reported + + Scenario: Do not report escaped hyperlinks to resources in the local file system + See issue #1182 + When checking EPUB 'content-xhtml-link-to-local-file-escaped-valid' + Then info HTM-053 is reported 0 times + And no errors or warnings are reported Scenario: Report a hyperlink to a resource missing from the publication When checking EPUB 'content-xhtml-link-to-missing-doc-error' diff --git a/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/content_001.xhtml b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/content_001.xhtml new file mode 100644 index 000000000..d6f89fc1c --- /dev/null +++ b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/content_001.xhtml @@ -0,0 +1,12 @@ + + + + + Minimal EPUB + + +

Loomings

+

Call me Ishmael.

+ <a class="external" href="file:///C:/path/file.pdf">link to local file</a< + + diff --git a/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/nav.xhtml b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/nav.xhtml new file mode 100644 index 000000000..240745e63 --- /dev/null +++ b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/nav.xhtml @@ -0,0 +1,14 @@ + + + + + Minimal Nav + + +
+ + diff --git a/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/package.opf b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/package.opf new file mode 100644 index 000000000..0d1eec6e9 --- /dev/null +++ b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/EPUB/package.opf @@ -0,0 +1,16 @@ + + + + Minimal EPUB 3.0 + en + NOID + 2017-06-14T00:00:01Z + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/META-INF/container.xml b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/META-INF/container.xml new file mode 100644 index 000000000..318782179 --- /dev/null +++ b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/META-INF/container.xml @@ -0,0 +1,6 @@ + + + + + + diff --git a/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/mimetype b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/mimetype new file mode 100644 index 000000000..57ef03f24 --- /dev/null +++ b/src/test/resources/epub3/files/epub/content-xhtml-link-to-local-file-escaped-valid/mimetype @@ -0,0 +1 @@ +application/epub+zip \ No newline at end of file