Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improve detection of non-UTF-8 file names #1404

Merged
merged 1 commit into from
Dec 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ private void initialize()
severities.put(MessageId.PKG_024, Severity.INFO);
severities.put(MessageId.PKG_025, Severity.ERROR);
severities.put(MessageId.PKG_026, Severity.ERROR);
severities.put(MessageId.PKG_027, Severity.FATAL);

// Resources
severities.put(MessageId.RSC_001, Severity.ERROR);
Expand Down
1 change: 1 addition & 0 deletions src/main/java/com/adobe/epubcheck/messages/MessageId.java
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ public enum MessageId implements Comparable<MessageId>
PKG_024("PKG-024"),
PKG_025("PKG-025"),
PKG_026("PKG-026"),
PKG_027("PKG-027"),

// Messages relating to resources
RSC_001("RSC-001"),
Expand Down
139 changes: 76 additions & 63 deletions src/main/java/com/adobe/epubcheck/ocf/OCFChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ public void check()
// Check the OCF Container file structure
// --------------------------------------
//
checkContainerStructure(state);
if (!checkContainerStructure(state))
{
return;
}
;
OCFContainer container = state.getContainer();

//
Expand Down Expand Up @@ -270,83 +274,92 @@ private boolean checkContainerFile(OCFCheckerState state)
return true;
}

private void checkContainerStructure(OCFCheckerState state)
private boolean checkContainerStructure(OCFCheckerState state)
{
// Get a container
Iterable<OCFResource> resourcesProvider;
try
{
// FIXME 2022 build resourcesProvider depending on MIME type
resourcesProvider = new OCFZipResources(context.url);
} catch (IOException e)
{
// FIXME 2022 see how to propagate fatal IOError
report.message(MessageId.PKG_008, EPUBLocation.of(context), e.getLocalizedMessage());
return;
}
// Map to store the container resource files
Map<String, OCFResource> resources = new HashMap<>();
// List to store the container resource directories
List<String> directories = new LinkedList<>();

// Loop through the entries
OCFFilenameChecker filenameChecker = new OCFFilenameChecker(state.context().build());
for (OCFResource resource : resourcesProvider)
{
Preconditions.checkNotNull(resource.getPath());
Preconditions.checkNotNull(resource.getProperties());
// Get a container
Iterable<OCFResource> resourcesProvider = new OCFZipResources(context.url);
// Map to store the container resource files
Map<String, OCFResource> resources = new HashMap<>();
// List to store the container resource directories
List<String> directories = new LinkedList<>();

// Loop through the entries
OCFFilenameChecker filenameChecker = new OCFFilenameChecker(state.context().build());
// FIXME catch IAE MALFORMED entries
for (OCFResource resource : resourcesProvider)
{
Preconditions.checkNotNull(resource.getPath());
Preconditions.checkNotNull(resource.getProperties());

// FIXME 2022 report symbolic links and continue
// FIXME 2022 report symbolic links and continue

// Check duplicate entries
if (resources.containsKey(resource.getPath().toLowerCase(Locale.ROOT)))
{
context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath());
}
// Check duplicate entries after NFC normalization
else if (resources.containsKey(
Normalizer.normalize(resource.getPath().toLowerCase(Locale.ROOT), Normalizer.Form.NFC)))
{
context.report.message(MessageId.OPF_061, EPUBLocation.of(context), resource.getPath());
}
// Check duplicate entries
if (resources.containsKey(resource.getPath().toLowerCase(Locale.ROOT)))
{
context.report.message(MessageId.OPF_060, EPUBLocation.of(context), resource.getPath());
}
// Check duplicate entries after NFC normalization
else if (resources.containsKey(
Normalizer.normalize(resource.getPath().toLowerCase(Locale.ROOT), Normalizer.Form.NFC)))
{
context.report.message(MessageId.OPF_061, EPUBLocation.of(context), resource.getPath());
}

// Store the resource in the data structure
if (resource.isDirectory())
{
// the container resource is a directory,
// store it for later checking of empty directories
directories.add(resource.getPath());
}
else
{
// Check file name requirements
filenameChecker.checkCompatiblyEscaped(resource.getPath());

// report entry metadata
reportFeatures(resource.getProperties());
// the container resource is a file,
// add the resource to the container model
resources.put(resource.getPath().toLowerCase(Locale.ROOT), resource);
state.addResource(resource);
// Store the resource in the data structure
if (resource.isDirectory())
{
// the container resource is a directory,
// store it for later checking of empty directories
directories.add(resource.getPath());
}
else
{
// Check file name requirements
filenameChecker.checkCompatiblyEscaped(resource.getPath());

// report entry metadata
reportFeatures(resource.getProperties());
// the container resource is a file,
// add the resource to the container model
resources.put(resource.getPath().toLowerCase(Locale.ROOT), resource);
state.addResource(resource);
}
}
}

// Report empty directories
for (String directory : directories)
{
boolean hasContents = false;
for (OCFResource resource : resources.values())
// Report empty directories
for (String directory : directories)
{
if (resource.getPath().startsWith(directory))
boolean hasContents = false;
for (OCFResource resource : resources.values())
{
if (resource.getPath().startsWith(directory))
{
hasContents = true;
break;
}
}
if (!hasContents)
{
hasContents = true;
break;
report.message(MessageId.PKG_014, EPUBLocation.of(context), directory);
}
}
if (!hasContents)
return true;
} catch (Exception e)
{
switch (e.getMessage())
{
report.message(MessageId.PKG_014, EPUBLocation.of(context), directory);
case "invalid CEN header (bad entry name)": // reported by OpenJDK
case "MALFORMED": // reported by Oracle JDK 1.8
report.message(MessageId.PKG_027, EPUBLocation.of(context), e.getLocalizedMessage());
break;
default:
report.message(MessageId.PKG_008, EPUBLocation.of(context), e.getLocalizedMessage());
break;
}
return false;
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/adobe/epubcheck/ocf/OCFZipResources.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.Enumeration;
import java.util.Iterator;
Expand Down Expand Up @@ -32,7 +33,7 @@ public OCFZipResources(URL url) throws IOException
{
new IllegalArgumentException("Not a file URL: " + url);
}
this.zip = new ZipFile(file);
this.zip = new ZipFile(file, StandardCharsets.UTF_8);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ PKG_023=Validating the EPUB against version 2.0, default validation profile will
PKG_024=Uncommon EPUB file extension.
PKG_024_SUG=For maximum compatibility, use ".epub".
PKG_025=Publication resource must not be located in the META-INF directory
PKG_026=Obfuscated resource must be a Font Core Media Type (was declared as "%1$s" in "%2$s").
PKG_026=Obfuscated resource must be a Font Core Media Type (was declared as "%1$s" in "%2$s").
PKG_027=Could not extract EPUB ZIP content, probably due to file names not encoded in UTF-8.

#Resources
RSC_001=File "%1$s" could not be found.
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
22 changes: 22 additions & 0 deletions src/test/resources/epub3/04-ocf/ocf.feature
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,28 @@ Feature: EPUB 3 — Open Container Format
Then error OPF-060 is reported
And no other errors or warnings are reported

@spec @xref:sec-zip-container-zipreqs
Scenario: Verify file names with non-ASCII UTF-8-encoded character are allowed
When checking EPUB 'ocf-filename-utf8-valid.epub'
Then no errors or warnings are reported

@spec @xref:sec-zip-container-zipreqs
Scenario: Report file names that are not encoded as UTF-8
When checking EPUB 'ocf-filename-not-utf8-error.epub'
Then fatal error PKG-027 is reported
Then no errors or warnings are reported

@spec @xref:sec-zip-container-zipreqs
Scenario: Verify path names with non-ASCII UTF-8-encoded character are allowed
When checking EPUB 'ocf-filepath-utf8-valid.epub'
Then no errors or warnings are reported

@spec @xref:sec-zip-container-zipreqs
Scenario: Report file names that are not encoded as UTF-8
When checking EPUB 'ocf-filepath-not-utf8-error.epub'
Then fatal error PKG-027 is reported
Then no errors or warnings are reported


### 4.2.3 OCF ZIP container media type idenfication

Expand Down