puidFormatMap) {
-
- synchronized (this) {
- setBinarySignatureIdentifier(binarySignatureIdentifier);
- setContainerSignatureDefinitions(containerSignatureDefinitions);
- setPath(path);
- setSlash(slash);
- setSlash1(slash1);
- setExpandWebArchives(expandWebArchives);
- if (getTmpDir() == null) {
- setTmpDir(new File(System.getProperty("java.io.tmpdir")));
- }
- this.puidFormatMap = puidFormatMap;
- }
- }
- /**
- * @return local path element delimiter
- */
- protected String getSlash() {
- return slash;
- }
- /**
- * @param newSlash path element delimiter
- */
- protected void setSlash(String newSlash) {
- this.slash = newSlash;
- }
- /**
- * @return container element delimiter
- */
- protected String getSlash1() {
- return slash1;
- }
- /**
- * @param newSlash1 container element delimiter
- */
- protected void setSlash1(String newSlash1) {
- this.slash1 = newSlash1;
- }
- /**
- * @return binary signature identifier
- */
- protected BinarySignatureIdentifier getBinarySignatureIdentifier() {
- return binarySignatureIdentifier;
- }
- /**
- * @param bis binary signature identifier
- */
- protected void setBinarySignatureIdentifier(BinarySignatureIdentifier bis) {
- this.binarySignatureIdentifier = bis;
- }
- /**
- * @return container signatures
- */
- protected ContainerSignatureDefinitions getContainerSignatureDefinitions() {
- return containerSignatureDefinitions;
- }
- /**
- * @param csd container signatures
- */
- protected void setContainerSignatureDefinitions(ContainerSignatureDefinitions csd) {
- this.containerSignatureDefinitions = csd;
- }
-
- /**
- * @return temporary file directory
- */
- protected File getTmpDir() {
- return tmpDir;
- }
- /**
- * @param tmpDir temporary file directory
- */
- protected void setTmpDir(File tmpDir) {
- this.tmpDir = tmpDir;
- }
- /**
- * @return archive path
- */
- protected String getPath() {
- return path;
- }
- /**
- * @param path archive path
- */
- protected void setPath(String path) {
- this.path = path;
- }
- /**
- * @return whether to expand (W)ARCs
- */
- protected Boolean getExpandWebArchives() {
- return expandWebArchives;
- }
- /**
- * @param ewa whether to expand (W)ARCs
- */
- protected void setExpandWebArchives(Boolean ewa) {
- this.expandWebArchives = ewa;
- }
-
- /**
- *
- * @param prefix String describing container-type
- * @param filename Name of file
- * @return URI for container
- */
- protected String makeContainerURI(String prefix, String filename) {
- return prefix + ":" + getSlash1() + getPath() + filename + "!" + getSlash();
- }
-
- /**
- * @param request The request
- * @param in The container input stream
- * @param newPath Path for the Container file
- * @param aggregator Aggregates ZIP file container information
- * @throws CommandExecutionException When an exception happens during execution
- */
- protected void expandContainer(
- IdentificationRequest request, InputStream in, String newPath, ContainerAggregator aggregator)
- throws CommandExecutionException, UnsupportedZipFeatureException {
-
- try {
- request.open(in);
- IdentificationResultCollection results =
- getBinarySignatureIdentifier().matchBinarySignatures(request);
-
- if (results.getResults().isEmpty()) {
- results = binarySignatureIdentifier.matchExtensions(request, true);
- }
-
- final ResultPrinter resultPrinter = new ResultPrinter(
- getBinarySignatureIdentifier(),
- getContainerSignatureDefinitions(),
- newPath,
- getSlash(),
- getSlash1(),
- true,
- getExpandWebArchives(),
- aggregator,
- puidFormatMap);
-
- resultPrinter.print(results, request);
- request.close();
- } catch (UnsupportedZipFeatureException e) {
- // output: org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException: unsupported feature
- // encryption used in entry Book_pdfx1a.pdf
- throw e;
- } catch (IOException ioe) {
- logger.warn(ioe + " " + newPath);
- } finally {
- try {
- // make sure no temp files are left behind
- request.close();
- } catch (IOException ioe) {
- logger.warn("Failed to close temp file for Container request:" + ioe);
- // not a lot we can do here - warning msg already given and deleteOnExit set
- }
- }
- }
-}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/CollectingResultHandler.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/CollectingResultHandler.java
new file mode 100644
index 00000000..58a90489
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/CollectingResultHandler.java
@@ -0,0 +1,79 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.util.ArrayList;
+import java.util.List;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationException;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
+import uk.gov.nationalarchives.droid.core.interfaces.ResourceId;
+import uk.gov.nationalarchives.droid.core.interfaces.ResultHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.filter.Filter;
+
+/**
+ * Droid calls this class whenever it identifies a file, and we collect the results. This is necessary because it's
+ * the only way to get the results from archive contents. Droid only returns the result for the specified file directly,
+ * and will not return the results for any files that are contained within the specified file.
+ *
+ * This class is NOT THREAD SAFE. You must use a different instance per thread, and you must call {@link #reset()}
+ * between files.
+ */
+class CollectingResultHandler implements ResultHandler {
+
+ private static final Logger log = LoggerFactory.getLogger(CollectingResultHandler.class);
+
+ private final List results = new ArrayList<>();
+
+ /**
+ * Clears the accumulated results in preparation for processing a new file.
+ */
+ public void reset() {
+ results.clear();
+ }
+
+ /**
+ * @return the accumulated identification results
+ */
+ public List getResults() {
+ return List.copyOf(results);
+ }
+
+ @Override
+ public ResourceId handle(IdentificationResultCollection identificationResultCollection) {
+ results.add(identificationResultCollection);
+ return new ResourceId(DroidId.nextId(), "");
+ }
+
+ @Override
+ public ResourceId handleDirectory(IdentificationResult identificationResult, ResourceId resourceId, boolean b) {
+ return new ResourceId(DroidId.nextId(), "");
+ }
+
+ @Override
+ public void handleError(IdentificationException e) {
+ log.warn("DROID identification error", e);
+ }
+
+ @Override
+ public void deleteCascade(Long aLong) {}
+
+ @Override
+ public void commit() {}
+
+ @Override
+ public void init() {}
+
+ @Override
+ public void setResultsFilter(Filter filter) {}
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ContainerAggregator.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ContainerAggregator.java
deleted file mode 100644
index 98ff538f..00000000
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ContainerAggregator.java
+++ /dev/null
@@ -1,163 +0,0 @@
-//
-// Copyright (c) 2017 by The President and Fellows of Harvard College
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software distributed under the License is
-// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permission and limitations under the License.
-//
-
-package edu.harvard.hul.ois.fits.tools.droid;
-
-import java.util.Collections;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.zip.ZipEntry;
-
-/**
- * This class aggregates data from Droid about container (ZIP) type files.
- *
- * @author dan179
- */
-public class ContainerAggregator {
-
- // Maps format type to number of these types of files with a ZIP file
- private final Map formatToCount;
-
- // Aggregated original size of all files contained within the ZIP file.
- private long originalSize;
-
- // Aggregated compressed size of all files contained within the ZIP file.
- // If ZIP is not compressed then should equal the original size.
- private long compressedSize;
-
- private boolean isEncrypted = false;
-
- private static final String UNKNOWN_FORMAT = "Unknown";
-
- public ContainerAggregator() {
- formatToCount = new TreeMap<>(); // order entries by key for the sake of XMLUnit tests
- }
-
- /**
- * Aggregated original size of files with a ZIP file.
- *
- * @return Original size of all files within ZIP file in bytes.
- */
- public long getOriginalSize() {
- return originalSize;
- }
-
- /**
- * Increment the calculated original size of the examined ZIP file by the original size of a contained file.
- */
- public void incrementOriginalSize(long originalSize) {
- this.originalSize += originalSize;
- }
-
- /**
- * Aggregated compressed size of files with a ZIP file.
- *
- * @return Compressed size of all files within ZIP file in bytes.
- */
- public long getCompressedSize() {
- return compressedSize;
- }
-
- /**
- * Increment the compressed size of the examined ZIP file by the original size of a contained file.
- */
- public void incrementCompressedSize(long compressedSize) {
- this.compressedSize += compressedSize;
- }
-
- /**
- * Add a format type to this collection and increment count for this type.
- */
- public void addFormat(String format) {
- if (format != null) {
- Integer cnt = formatToCount.get(format);
- if (cnt == null) {
- formatToCount.put(format, 1);
- } else {
- cnt++;
- formatToCount.put(format, cnt);
- }
- }
- }
-
- public void incrementUnknownFormat() {
- Integer cnt = formatToCount.get(UNKNOWN_FORMAT);
- if (cnt == null) {
- formatToCount.put(UNKNOWN_FORMAT, 1);
- } else {
- cnt++;
- formatToCount.put(UNKNOWN_FORMAT, cnt);
- }
- }
-
- /**
- * A Map of format type to number of each format type.
- *
- * @return Format to count mapping
- */
- public Map getFormatCounts() {
- return Collections.unmodifiableMap(formatToCount);
- }
-
- /**
- * Total number of all format types added to this collection.
- *
- * @return Total number for formats added to this collection.
- */
- public int getTotalEntriesCount() {
- int total = 0;
- for (Integer val : formatToCount.values()) {
- total += val;
- }
- return total;
- }
-
- /**
- * The compression method as defined the Java ZipEntry. Currently only values for 'stored' (uncompressed)
- * and 'deflate' (compressed) are used.
- *
- * @return The value corresponding to
- * @see java.util.zip.ZipEntry
- */
- public int getCompressionMethod() {
- return getCompressedSize() < getOriginalSize() ? ZipEntry.DEFLATED : ZipEntry.STORED;
- }
-
- /**
- * Whether the container being examined is encrypted.
- */
- public boolean isEncrypted() {
- return isEncrypted;
- }
-
- /**
- * Sets whether this container being examined is encrypted.
- */
- public void setEncrypted(boolean isEncrypted) {
- this.isEncrypted = isEncrypted;
- }
-
- @Override
- public String toString() {
- StringBuilder builder = new StringBuilder();
- builder.append("ContainerAggregator [formatToCount=");
- builder.append(formatToCount);
- builder.append("]");
- builder.append(" total count: ");
- builder.append(getTotalEntriesCount());
- builder.append(", originalSize: ");
- builder.append(originalSize);
- builder.append(", compressedSize: ");
- builder.append(compressedSize);
- builder.append(", isEncrypted: ");
- builder.append(isEncrypted);
- return builder.toString();
- }
-}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/Droid.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/Droid.java
index a6733e0f..15537745 100644
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/Droid.java
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/Droid.java
@@ -11,41 +11,19 @@
package edu.harvard.hul.ois.fits.tools.droid;
import edu.harvard.hul.ois.fits.Fits;
-import edu.harvard.hul.ois.fits.FitsMetadataValues;
import edu.harvard.hul.ois.fits.exceptions.FitsToolException;
import edu.harvard.hul.ois.fits.tools.ToolBase;
import edu.harvard.hul.ois.fits.tools.ToolInfo;
import edu.harvard.hul.ois.fits.tools.ToolOutput;
import java.io.File;
-import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.time.Duration;
+import java.time.Instant;
import org.apache.commons.configuration.XMLConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.gov.nationalarchives.droid.command.action.VersionCommand;
-import uk.gov.nationalarchives.droid.container.ContainerFileIdentificationRequestFactory;
-import uk.gov.nationalarchives.droid.container.ContainerSignatureDefinitions;
-import uk.gov.nationalarchives.droid.container.ContainerSignatureFileReader;
-import uk.gov.nationalarchives.droid.container.ole2.Ole2Identifier;
-import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine;
-import uk.gov.nationalarchives.droid.container.zip.ZipIdentifier;
-import uk.gov.nationalarchives.droid.container.zip.ZipIdentifierEngine;
-import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
-import uk.gov.nationalarchives.droid.core.SignatureParseException;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ArchiveFormatResolver;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ArchiveFormatResolverImpl;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifierFactory;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifierFactoryImpl;
-import uk.gov.nationalarchives.droid.profile.referencedata.Format;
-import uk.gov.nationalarchives.droid.signature.SaxSignatureFileParser;
-import uk.gov.nationalarchives.droid.signature.SignatureParser;
/** The principal glue class for invoking DROID under FITS.
*/
@@ -53,17 +31,8 @@ public class Droid extends ToolBase {
private boolean enabled = true;
private final Fits fits;
- private final List includeExts;
- private long kbReadLimit;
- private static File sigFile;
- private static final BinarySignatureIdentifier sigIdentifier = new BinarySignatureIdentifier();
- private static final ContainerIdentifierFactory containerIdentifierFactory = new ContainerIdentifierFactoryImpl();
- private static final ArchiveFormatResolver containerFormatResolver = new ArchiveFormatResolverImpl();
- private static ContainerSignatureDefinitions containerSignatureDefinitions;
- private static final Map puidFormatMap = new HashMap<>(2500);
-
- private static final List CONTAINER_TYPE_MIMETYPES = Arrays.asList("application/zip");
+ private final DroidWrapper droidWrapper;
private static final Logger logger = LoggerFactory.getLogger(Droid.class);
@@ -74,118 +43,33 @@ public Droid(Fits fits) throws FitsToolException {
info = new ToolInfo("Droid", getDroidVersion(), null);
try {
- String droid_conf = Fits.FITS_TOOLS_DIR + "droid" + File.separator;
XMLConfiguration config = fits.getConfig();
- // only need a single Droid signature file.
- if (sigFile == null) {
- synchronized (this) {
- if (sigFile == null) {
- sigFile = new File(droid_conf + config.getString("droid_sigfile"));
- sigIdentifier.setSignatureFile(sigFile.getAbsolutePath());
- sigIdentifier.init();
-
- // The following is necessary to init the code that identifies formats like docx, xlsx, etc
- SignatureParser sigParser = new SaxSignatureFileParser(sigFile.toURI());
- sigParser.formats(format -> {
- puidFormatMap.put(format.getPuid(), format);
- });
-
- String containerSigFile = droid_conf + config.getString("droid_container_sigfile");
- ContainerSignatureFileReader signatureReader =
- new ContainerSignatureFileReader(containerSigFile);
-
- containerSignatureDefinitions = signatureReader.getDefinitions();
-
- ZipIdentifierEngine zipIdentifierEngine = new ZipIdentifierEngine();
- zipIdentifierEngine.setRequestFactory(new ContainerFileIdentificationRequestFactory());
-
- ZipIdentifier zipIdentifier = new ZipIdentifier();
- zipIdentifier.setContainerType("ZIP");
- zipIdentifier.setContainerIdentifierFactory(containerIdentifierFactory);
- zipIdentifier.setContainerFormatResolver(containerFormatResolver);
- zipIdentifier.setDroidCore(sigIdentifier);
- zipIdentifier.setIdentifierEngine(zipIdentifierEngine);
- zipIdentifier.setSignatureReader(signatureReader);
- zipIdentifier.init();
-
- Ole2IdentifierEngine ole2IdentifierEngine = new Ole2IdentifierEngine();
- ole2IdentifierEngine.setRequestFactory(new ContainerFileIdentificationRequestFactory());
-
- Ole2Identifier ole2Identifier = new Ole2Identifier();
- ole2Identifier.setContainerType("OLE2");
- ole2Identifier.setContainerIdentifierFactory(containerIdentifierFactory);
- ole2Identifier.setContainerFormatResolver(containerFormatResolver);
- ole2Identifier.setDroidCore(sigIdentifier);
- ole2Identifier.setIdentifierEngine(ole2IdentifierEngine);
- ole2Identifier.setSignatureReader(signatureReader);
- ole2Identifier.init();
- }
- }
- }
- includeExts = (List) (List>) config.getList("droid_read_limit[@include-exts]");
- String limit = config.getString("droid_read_limit[@read-limit-kb]");
- kbReadLimit = -1L;
- if (limit != null) {
- try {
- kbReadLimit = Long.parseLong(limit);
- } catch (NumberFormatException nfe) {
- throw new FitsToolException(
- "Invalid long value in fits.xml droid_read_limit[@read-limit-kb]: " + limit, nfe);
- }
- }
+ droidWrapper = DroidWrapperFactory.getOrCreateFactory(DroidConfig.fromFitsConfig(config))
+ .createInstance();
} catch (Throwable e) {
- throw new FitsToolException("Error initilizing DROID", e);
+ throw new FitsToolException("Error initializing DROID", e);
}
}
@Override
public ToolOutput extractInfo(File file) throws FitsToolException {
- logger.debug("Droid.extractInfo starting on " + file.getName());
- long startTime = System.currentTimeMillis();
- IdentificationResultCollection results;
- ContainerAggregator aggregator = null;
+ logger.debug("Droid.extractInfo starting on {}", file.getName());
+ Instant startTime = Instant.now();
+
+ DroidResult result;
+
try {
- DroidQuery droidQuery = new DroidQuery(
- sigIdentifier,
- containerIdentifierFactory,
- containerFormatResolver,
- puidFormatMap,
- containerSignatureDefinitions,
- includeExts,
- kbReadLimit,
- file);
- // the following will almost always return a single result
- results = droidQuery.queryFile();
- for (IdentificationResult res : results.getResults()) {
- String mimeType = res.getMimeType();
-
- if (FitsMetadataValues.getInstance().normalizeMimeType(mimeType) != null) {
- mimeType = FitsMetadataValues.getInstance().normalizeMimeType(mimeType);
- }
-
- String fileName = file.getName();
- int lastDot = fileName.lastIndexOf('.');
- String extension = "";
- if (lastDot > -1) {
- extension = fileName.substring(lastDot + 1);
- }
-
- if (CONTAINER_TYPE_MIMETYPES.contains(mimeType) && "zip".equals(extension)) {
- aggregator = droidQuery.queryContainerData(results);
- }
- }
-
- } catch (IOException e) {
+ result = droidWrapper.analyze(file.toPath());
+ } catch (Exception e) {
throw new FitsToolException("DROID can't query file " + file.getAbsolutePath(), e);
- } catch (SignatureParseException e) {
- throw new FitsToolException("Problem with DROID signature file");
}
- DroidToolOutputter outputter = new DroidToolOutputter(this, results, fits, aggregator);
+
+ DroidToolOutputter outputter = new DroidToolOutputter(this, fits, result);
ToolOutput output = outputter.toToolOutput();
- duration = System.currentTimeMillis() - startTime;
+ duration = Duration.between(startTime, Instant.now()).toMillis();
runStatus = RunStatus.SUCCESSFUL;
- logger.debug("Droid.extractInfo finished on " + file.getName());
+ logger.debug("Droid.extractInfo finished on {}", file.getName());
return output;
}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidConfig.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidConfig.java
new file mode 100644
index 00000000..7a52b09c
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidConfig.java
@@ -0,0 +1,203 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import edu.harvard.hul.ois.fits.Fits;
+import edu.harvard.hul.ois.fits.exceptions.FitsToolException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.commons.configuration.XMLConfiguration;
+
+class DroidConfig {
+
+ private Path sigFile;
+ private Path containerSigFile;
+ private Path tempDir;
+ private Set extsToLimitBytesRead = Collections.emptySet();
+ private long byteReadLimit = -1;
+ private boolean processZip = true;
+ private boolean processTar = true;
+ private boolean processGzip = true;
+ private boolean processRar = true;
+ private boolean process7zip = true;
+ private boolean processIso = true;
+ private boolean processBzip2 = true;
+ private boolean processArc = true;
+ private boolean processWarc = true;
+
+ public static DroidConfig fromFitsConfig(XMLConfiguration fitsConfig) throws FitsToolException {
+ var droidConfig = new DroidConfig();
+
+ Path droidConfigDir = Paths.get(Fits.FITS_TOOLS_DIR + "droid");
+ droidConfig.setSigFile(droidConfigDir.resolve(fitsConfig.getString("droid_sigfile")));
+ droidConfig.setContainerSigFile(droidConfigDir.resolve(fitsConfig.getString("droid_container_sigfile")));
+
+ String tempStr = fitsConfig.getString("process.tmpdir", System.getProperty("java.io.tmpdir"));
+ droidConfig.setTempDir(tempStr == null ? null : Paths.get(tempStr));
+
+ droidConfig.setExtsToLimitBytesRead(
+ new HashSet<>((List) (List>) fitsConfig.getList("droid_read_limit[@include-exts]")));
+ String limit = fitsConfig.getString("droid_read_limit[@read-limit-kb]");
+ long kbReadLimit = -1L;
+ if (limit != null) {
+ try {
+ kbReadLimit = Long.parseLong(limit);
+ } catch (NumberFormatException nfe) {
+ throw new FitsToolException(
+ "Invalid long value in fits.xml droid_read_limit[@read-limit-kb]: " + limit, nfe);
+ }
+ }
+
+ droidConfig.setByteReadLimit(kbReadLimit == -1 ? -1 : 1024 * kbReadLimit);
+
+ droidConfig.setProcessZip(fitsConfig.getBoolean("droid.process.zip", true));
+ droidConfig.setProcessTar(fitsConfig.getBoolean("droid.process.tar", true));
+ droidConfig.setProcessGzip(fitsConfig.getBoolean("droid.process.gzip", true));
+ droidConfig.setProcessArc(fitsConfig.getBoolean("droid.process.arc", true));
+ droidConfig.setProcessWarc(fitsConfig.getBoolean("droid.process.warc", true));
+ droidConfig.setProcessBzip2(fitsConfig.getBoolean("droid.process.bzip2", true));
+ droidConfig.setProcess7zip(fitsConfig.getBoolean("droid.process.seven-zip", true));
+ droidConfig.setProcessIso(fitsConfig.getBoolean("droid.process.iso", true));
+ droidConfig.setProcessRar(fitsConfig.getBoolean("droid.process.rar", true));
+
+ return droidConfig;
+ }
+
+ public Path getSigFile() {
+ return sigFile;
+ }
+
+ public DroidConfig setSigFile(Path sigFile) {
+ this.sigFile = sigFile;
+ return this;
+ }
+
+ public Path getContainerSigFile() {
+ return containerSigFile;
+ }
+
+ public DroidConfig setContainerSigFile(Path containerSigFile) {
+ this.containerSigFile = containerSigFile;
+ return this;
+ }
+
+ public Path getTempDir() {
+ return tempDir;
+ }
+
+ public DroidConfig setTempDir(Path tempDir) {
+ this.tempDir = tempDir;
+ return this;
+ }
+
+ public Set getExtsToLimitBytesRead() {
+ return extsToLimitBytesRead;
+ }
+
+ public DroidConfig setExtsToLimitBytesRead(Set extsToLimitBytesRead) {
+ this.extsToLimitBytesRead = extsToLimitBytesRead;
+ return this;
+ }
+
+ public long getByteReadLimit() {
+ return byteReadLimit;
+ }
+
+ public DroidConfig setByteReadLimit(long byteReadLimit) {
+ this.byteReadLimit = byteReadLimit;
+ return this;
+ }
+
+ public boolean isProcessZip() {
+ return processZip;
+ }
+
+ public DroidConfig setProcessZip(boolean processZip) {
+ this.processZip = processZip;
+ return this;
+ }
+
+ public boolean isProcessTar() {
+ return processTar;
+ }
+
+ public DroidConfig setProcessTar(boolean processTar) {
+ this.processTar = processTar;
+ return this;
+ }
+
+ public boolean isProcessGzip() {
+ return processGzip;
+ }
+
+ public DroidConfig setProcessGzip(boolean processGzip) {
+ this.processGzip = processGzip;
+ return this;
+ }
+
+ public boolean isProcessRar() {
+ return processRar;
+ }
+
+ public DroidConfig setProcessRar(boolean processRar) {
+ this.processRar = processRar;
+ return this;
+ }
+
+ public boolean isProcess7zip() {
+ return process7zip;
+ }
+
+ public DroidConfig setProcess7zip(boolean process7zip) {
+ this.process7zip = process7zip;
+ return this;
+ }
+
+ public boolean isProcessIso() {
+ return processIso;
+ }
+
+ public DroidConfig setProcessIso(boolean processIso) {
+ this.processIso = processIso;
+ return this;
+ }
+
+ public boolean isProcessBzip2() {
+ return processBzip2;
+ }
+
+ public DroidConfig setProcessBzip2(boolean processBzip2) {
+ this.processBzip2 = processBzip2;
+ return this;
+ }
+
+ public boolean isProcessArc() {
+ return processArc;
+ }
+
+ public DroidConfig setProcessArc(boolean processArc) {
+ this.processArc = processArc;
+ return this;
+ }
+
+ public boolean isProcessWarc() {
+ return processWarc;
+ }
+
+ public DroidConfig setProcessWarc(boolean processWarc) {
+ this.processWarc = processWarc;
+ return this;
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidId.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidId.java
new file mode 100644
index 00000000..3146c073
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidId.java
@@ -0,0 +1,33 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Every file Droid process needs an ID. These ids are normally assigned by adding the resource to a db, but FITS
+ * doesn't use a DB. Instead, we just generate the id here.
+ */
+final class DroidId {
+
+ private static final AtomicLong ID = new AtomicLong(1);
+
+ private DroidId() {
+ // noop
+ }
+
+ /**
+ * @return new id
+ */
+ public static long nextId() {
+ return ID.getAndIncrement();
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidQuery.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidQuery.java
deleted file mode 100644
index 0e7d26bf..00000000
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidQuery.java
+++ /dev/null
@@ -1,203 +0,0 @@
-//
-// Copyright (c) 2016 by The President and Fellows of Harvard College
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
-// http://www.apache.org/licenses/LICENSE-2.0
-// Unless required by applicable law or agreed to in writing, software distributed under the License is
-// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permission and limitations under the License.
-//
-
-/* Droid 6.1 has no nicely packaged way to make simple queries. This
- * class attempts to fill that gap for FITS, in a way that will let it
- * be lifted for other uses and perhaps incorporated into Droid itself.
- */
-package edu.harvard.hul.ois.fits.tools.droid;
-
-import edu.harvard.hul.ois.fits.exceptions.FitsToolException;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import uk.gov.nationalarchives.droid.command.action.CommandExecutionException;
-import uk.gov.nationalarchives.droid.container.ContainerSignatureDefinitions;
-import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
-import uk.gov.nationalarchives.droid.core.SignatureParseException;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultImpl;
-import uk.gov.nationalarchives.droid.core.interfaces.RequestIdentifier;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ArchiveFormatResolver;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifier;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifierFactory;
-import uk.gov.nationalarchives.droid.core.interfaces.resource.FileSystemIdentificationRequest;
-import uk.gov.nationalarchives.droid.core.interfaces.resource.RequestMetaData;
-import uk.gov.nationalarchives.droid.profile.referencedata.Format;
-
-public class DroidQuery {
-
- private final BinarySignatureIdentifier sigIdentifier;
- private final ContainerIdentifierFactory containerIdentifierFactory;
- private final ArchiveFormatResolver containerFormatResolver;
- private final Map puidFormatMap;
- private final ContainerSignatureDefinitions containerSignatureDefinitions;
- // Certain file types (possibly really large file), we only want to examine the beginning of the file.
- private long bytesToRead = -1;
- private final List fileExtensions; // file extensions for files on which to apply file read limit
- private final File file; // input file that is being processed
-
- /**
- * Create a DroidQuery object. This can be retained for any number of
- * different queries.
- *
- * @param sigIdentifier BinarySignatureIdentifier for a Droid signature file
- * @param containerIdentifierFactory container identifier
- * @param containerFormatResolver container format resolver
- * @param puidFormatMap map of puids to formats
- * @param containerSignatureDefinitions container sig definitions
- * @param includeExts File extensions to include for possibly limiting number of bytes to read of file to process.
- * @param kbReadLimit Number of bytes to process in KB from the beginning of the file. -1 indicates read entire file.
- * @param file The file to be processed by DROID.
- * @throws SignatureParseException If there is a problem processing the DROID signature file.
- */
- public DroidQuery(
- BinarySignatureIdentifier sigIdentifier,
- ContainerIdentifierFactory containerIdentifierFactory,
- ArchiveFormatResolver containerFormatResolver,
- Map puidFormatMap,
- ContainerSignatureDefinitions containerSignatureDefinitions,
- List includeExts,
- long kbReadLimit,
- File file)
- throws SignatureParseException, FileNotFoundException {
- this.sigIdentifier = sigIdentifier;
- this.containerIdentifierFactory = containerIdentifierFactory;
- this.containerFormatResolver = containerFormatResolver;
- this.puidFormatMap = puidFormatMap;
- this.containerSignatureDefinitions = containerSignatureDefinitions;
- this.fileExtensions = includeExts;
- if (kbReadLimit > 0) {
- this.bytesToRead = (kbReadLimit * 1024) - 1;
- }
- this.file = file;
- }
-
- /**
- * Process the file by DROID.
- * @return A collection of results from DROID. Usually a single result.
- * @throws IOException
- */
- IdentificationResultCollection queryFile() throws IOException {
-
- // For certain file types, set max. number of bytes at beginning of file to process.
- // See https://groups.google.com/forum/#!msg/droid-list/HqN6lKOATJk/i-qTEI-XEwAJ;context-place=forum/droid-list
- // which indicates minimum number of bytes required to identify certain input file types.
- long bytesToExamine = file.length();
- String filename = file.getName();
- int lastDot = filename.lastIndexOf('.');
- if (lastDot > 0 && filename.length() > lastDot) {
- String fileExtension = filename.substring(++lastDot).toLowerCase(); // examine extension past the last dot
- if (fileExtensions != null && fileExtensions.contains(fileExtension) && bytesToRead > 0) {
- bytesToExamine = Math.min(file.length(), bytesToRead);
- }
- }
- RequestMetaData metadata = new RequestMetaData(bytesToExamine, file.lastModified(), file.getName());
- RequestIdentifier identifier = new RequestIdentifier(file.toURI());
- FileSystemIdentificationRequest req = null;
- try {
- req = new FileSystemIdentificationRequest(metadata, identifier);
- req.open(file.toPath());
-
- // This logic is based on
- // https://github.com/digital-preservation/droid/blob/master/droid-results/src/main/java/uk/gov/nationalarchives/droid/submitter/SubmissionGateway.java
-
- IdentificationResultCollection results = sigIdentifier.matchBinarySignatures(req);
- IdentificationResultCollection containerResults = handleContainer(req, results);
-
- if (containerResults != null) {
- results = containerResults;
- }
-
- sigIdentifier.removeLowerPriorityHits(results);
- if (results.getResults().isEmpty()) {
- results = sigIdentifier.matchExtensions(req, false);
- }
-
- return results;
- } finally {
- if (req != null) {
- req.close();
- }
- }
- }
-
- private IdentificationResultCollection handleContainer(
- IdentificationRequest request, IdentificationResultCollection results) throws IOException {
- String containerFormat = getContainerFormat(results);
-
- if (containerFormat != null) {
- ContainerIdentifier containerIdentifier = containerIdentifierFactory.getIdentifier(containerFormat);
- IdentificationResultCollection containerResults = containerIdentifier.submit(request);
- sigIdentifier.removeLowerPriorityHits(containerResults);
-
- // container results only have the PUID filled in
- for (IdentificationResult result : containerResults.getResults()) {
- IdentificationResultImpl impl = (IdentificationResultImpl) result;
- Format format = puidFormatMap.get(result.getPuid());
- if (format != null) {
- impl.setName(format.getName());
- impl.setMimeType(format.getMimeType());
- impl.setVersion(format.getVersion());
- }
- }
-
- return containerResults.getResults().isEmpty() ? null : containerResults;
- }
-
- return null;
- }
-
- private String getContainerFormat(IdentificationResultCollection results) {
- for (IdentificationResult result : results.getResults()) {
- final String format = containerFormatResolver.forPuid(result.getPuid());
- if (format != null) {
- return format;
- }
- }
-
- return null;
- }
-
- /**
- * Provides additional results from DROID for processing ZIP files.
- *
- * @param results This is the same value returned from the call to queryFile().
- * @return Aggregated data of all files contained within the ZIP file.
- * @throws IOException If the file cannot be read.
- * @throws FitsToolException If the file is not a ZIP file.
- */
- ContainerAggregator queryContainerData(IdentificationResultCollection results)
- throws IOException, FitsToolException {
-
- RequestMetaData metadata = new RequestMetaData(bytesToRead, file.lastModified(), file.getName());
- RequestIdentifier identifier = new RequestIdentifier(file.toURI());
- FileSystemIdentificationRequest request = null;
- request = new FileSystemIdentificationRequest(metadata, identifier);
- request.open(file.toPath());
-
- ZipArchiveContentIdentifier zipArchiveIdentifier = new ZipArchiveContentIdentifier(
- this.sigIdentifier, containerSignatureDefinitions, "", File.separator, File.separator, puidFormatMap);
- try {
- ContainerAggregator aggregator = zipArchiveIdentifier.identify(results.getUri(), request);
- return aggregator;
- } catch (CommandExecutionException e) {
- throw new FitsToolException("DROID can't execute zipArchiveIdentifier", e);
- } finally {
- if (request != null) {
- request.close();
- }
- }
- }
-}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidResult.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidResult.java
new file mode 100644
index 00000000..0c6b30a6
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidResult.java
@@ -0,0 +1,61 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.nio.file.Path;
+import java.util.List;
+import java.util.Objects;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
+
+/**
+ * Encapsulates the identification results of a file and any files that the file contains.
+ */
+class DroidResult {
+
+ private final Path file;
+ private final IdentificationResultCollection primaryResult;
+ private final List innerResults;
+
+ /**
+ * @param file the file that was analyzed
+ * @param primaryResult the primary identification result
+ * @param innerResults the identifications result of any files the primary file contained
+ */
+ public DroidResult(
+ Path file,
+ IdentificationResultCollection primaryResult,
+ List innerResults) {
+ this.file = Objects.requireNonNull(file, "file cannot be null");
+ this.primaryResult = Objects.requireNonNull(primaryResult, "primaryResult cannot be null");
+ this.innerResults = List.copyOf(Objects.requireNonNull(innerResults, "innerResults cannot be null"));
+ }
+
+ /**
+ * @return the file that was analyzed
+ */
+ public Path getFile() {
+ return file;
+ }
+
+ /**
+ * @return the primary identification result
+ */
+ public IdentificationResultCollection getPrimaryResult() {
+ return primaryResult;
+ }
+
+ /**
+ * @return the identification results for any files contained within the primary file, or an empty list
+ */
+ public List getInnerResults() {
+ return innerResults;
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidToolOutputter.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidToolOutputter.java
index cf09b6d0..6bd0a206 100644
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidToolOutputter.java
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidToolOutputter.java
@@ -18,17 +18,20 @@
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
-import java.util.HashMap;
+import java.io.UncheckedIOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
-import java.util.zip.ZipEntry;
+import java.util.TreeMap;
+import java.util.function.Function;
+import java.util.stream.Collectors;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.input.SAXBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
@@ -38,47 +41,62 @@
*
* @author Gary McGath
*/
-public class DroidToolOutputter {
+class DroidToolOutputter {
private static final Namespace fitsNS = Namespace.getNamespace(Fits.XML_NAMESPACE);
- private static final Map COMPRESSION_METHOD_TO_STRING_VALUE;
- private static final Logger logger = LoggerFactory.getLogger(DroidToolOutputter.class);
+ private static final String UNKNOWN_FORMAT = "Unknown";
+ private static final String ZIP_PUID = "x-fmt/263";
- private final IdentificationResultCollection results;
private final ToolBase toolBase;
private final Fits fits;
- private final ContainerAggregator aggregator; // could be null!!!
+ private final DroidResult result;
- static {
- COMPRESSION_METHOD_TO_STRING_VALUE = new HashMap<>();
- COMPRESSION_METHOD_TO_STRING_VALUE.put(ZipEntry.STORED, "stored");
- COMPRESSION_METHOD_TO_STRING_VALUE.put(ZipEntry.DEFLATED, "deflate");
- }
+ // The following fields are only relevant when the file was an archive containing other files. They're stored at
+ // the class level because we need the values for both the fits output and the raw output and this avoids computing
+ // them twice.
+ private final Map countsByFormat;
+ private final long originalSize;
+ private final long fileSize;
+ private final String compressionMethod;
- public DroidToolOutputter(
- ToolBase toolBase, IdentificationResultCollection results, Fits fits, ContainerAggregator aggregator) {
+ public DroidToolOutputter(ToolBase toolBase, Fits fits, DroidResult result) {
this.toolBase = toolBase;
- this.results = results;
this.fits = fits;
- this.aggregator = aggregator;
+ this.result = result;
+
+ if (!result.getInnerResults().isEmpty()) {
+ countsByFormat = countByFormat(result.getInnerResults());
+ originalSize = calculateTotalSize(result.getInnerResults());
+ fileSize = fileSize(result.getFile());
+ if (isZip(result.getPrimaryResult())) {
+ compressionMethod = zipCompressionMethod(fileSize, originalSize);
+ } else {
+ compressionMethod = null;
+ }
+ } else {
+ countsByFormat = Collections.emptyMap();
+ originalSize = -1;
+ fileSize = -1;
+ compressionMethod = null;
+ }
}
/** Produce a JDOM document with fits as its root element. This
* will contain just identification, not metadata elements.
*/
public ToolOutput toToolOutput() throws FitsToolException {
- List resList = results.getResults();
+ List resList = result.getPrimaryResult().getResults();
+
Document fitsXml = createToolData();
Document rawOut = buildRawData(resList);
- ToolOutput output = new ToolOutput(toolBase, fitsXml, rawOut, fits);
- return output;
+ return new ToolOutput(toolBase, fitsXml, rawOut, fits);
}
/** Create a base tool data document and add elements
* for each format. */
- private Document createToolData() throws FitsToolException {
- List resList = results.getResults();
+ private Document createToolData() {
+ List resList = result.getPrimaryResult().getResults();
Element fitsElem = new Element("fits", fitsNS);
Document toolDoc = new Document(fitsElem);
Element idElem = new Element("identification", fitsNS);
@@ -93,7 +111,6 @@ private Document createToolData() throws FitsToolException {
mimeType = FitsMetadataValues.getInstance().normalizeMimeType(mimeType);
}
- // maybe this block should be moved to mapFormatName() ???
if (formatName.equals("Digital Negative (DNG)")) {
mimeType = "image/x-adobe-dng";
}
@@ -102,11 +119,9 @@ private Document createToolData() throws FitsToolException {
version = mapVersion(version);
Element identityElem = new Element("identity", fitsNS);
- Attribute attr = null;
- if (formatName != null) {
- attr = new Attribute("format", formatName);
- identityElem.setAttribute(attr);
- }
+ Attribute attr = new Attribute("format", formatName);
+ identityElem.setAttribute(attr);
+
if (mimeType != null) {
attr = new Attribute("mimetype", mimeType);
identityElem.setAttribute(attr);
@@ -133,46 +148,106 @@ private Document createToolData() throws FitsToolException {
}
}
+ List innerResults = result.getInnerResults();
+
// The only time there will be a metadata section from DROID is when
// there is an aggregator for ZIP files and there are file entries.
- if (aggregator != null && aggregator.getTotalEntriesCount() > 0) {
+ if (!innerResults.isEmpty()) {
Element metadataElem = new Element("metadata", fitsNS);
fitsElem.addContent(metadataElem);
Element containerElem = new Element("container", fitsNS);
metadataElem.addContent(containerElem);
Element origSizeElem = new Element("originalSize", fitsNS);
- origSizeElem.addContent(String.valueOf(aggregator.getOriginalSize()));
+ origSizeElem.addContent(String.valueOf(originalSize));
containerElem.addContent(origSizeElem);
- Element compressionMethodElem = new Element("compressionMethod", fitsNS);
- compressionMethodElem.addContent(COMPRESSION_METHOD_TO_STRING_VALUE.get(aggregator.getCompressionMethod()));
- containerElem.addContent(compressionMethodElem);
+ if (compressionMethod != null) {
+ Element compressionMethodElem = new Element("compressionMethod", fitsNS);
+ compressionMethodElem.addContent(compressionMethod);
+ containerElem.addContent(compressionMethodElem);
+ }
Element entriesElem = new Element("entries", fitsNS);
- Attribute totalEntriesCountAttr =
- new Attribute("totalEntries", String.valueOf(aggregator.getTotalEntriesCount()));
+ Attribute totalEntriesCountAttr = new Attribute("totalEntries", String.valueOf(innerResults.size()));
entriesElem.setAttribute(totalEntriesCountAttr);
containerElem.addContent(entriesElem);
- for (Map.Entry formatEntry :
- aggregator.getFormatCounts().entrySet()) {
+ countsByFormat.forEach((format, count) -> {
Element entryElem = new Element("format", fitsNS);
- Attribute nameAttr = new Attribute("name", formatEntry.getKey());
+ Attribute nameAttr = new Attribute("name", format);
entryElem.setAttribute(nameAttr);
- Attribute numberAttr = new Attribute("number", String.valueOf(formatEntry.getValue()));
+ Attribute numberAttr = new Attribute("number", String.valueOf(count));
entryElem.setAttribute(numberAttr);
entriesElem.addContent(entryElem);
- }
+ });
}
return toolDoc;
}
- public static String mapFormatName(String formatName) {
+ /**
+ * Groups and counts the results by format name.
+ *
+ * @param innerResults the identification results of files within an archive
+ * @return a map of format names to the number of occurrences of that format
+ */
+ private Map countByFormat(List innerResults) {
+ return innerResults.stream()
+ .map(r -> {
+ if (r.getResults().isEmpty()) {
+ return UNKNOWN_FORMAT;
+ }
+ return mapFormatName(r.getResults().get(0).getName());
+ })
+ .collect(Collectors.groupingBy(Function.identity(), TreeMap::new, Collectors.counting()));
+ }
+
+ /**
+ * Sums the combined file size based on the file size reported in the identification results.
+ *
+ * @param innerResults the identification results of files within an archive
+ * @return total file size
+ */
+ private long calculateTotalSize(List innerResults) {
+ return innerResults.stream()
+ .map(IdentificationResultCollection::getFileLength)
+ .reduce(0L, Long::sum);
+ }
+
+ /**
+ * @return the file size of the target file on disk
+ */
+ private long fileSize(Path file) {
+ try {
+ return Files.size(file);
+ } catch (IOException e) {
+ throw new UncheckedIOException(e);
+ }
+ }
+
+ /**
+ * @return true if the target file was determined to be a zip file
+ */
+ private boolean isZip(IdentificationResultCollection identificationResult) {
+ return identificationResult.getResults().stream()
+ .map(IdentificationResult::getPuid)
+ .anyMatch(ZIP_PUID::equals);
+ }
+
+ /**
+ * @param fileSize the size of the file on disk
+ * @param originalSize the reported size of all of the components of a file, this will be different from the file
+ * size if the file is compressed
+ * @return the zip compression method
+ */
+ private String zipCompressionMethod(long fileSize, long originalSize) {
+ return fileSize < originalSize ? "deflate" : "stored";
+ }
+ private static String mapFormatName(String formatName) {
if (formatName == null || formatName.length() == 0) {
return FitsMetadataValues.DEFAULT_FORMAT;
} else if (formatName.startsWith("JPEG2000") || formatName.startsWith("JP2 (JPEG 2000")) {
@@ -199,7 +274,6 @@ public static String mapFormatName(String formatName) {
}
private String mapVersion(String version) {
-
if (version == null || version.length() == 0) {
return version;
} else if (version.equals("1987a")) {
@@ -217,7 +291,6 @@ private String mapVersion(String version) {
* @throws SAXException
*/
private Document buildRawData(List resList) throws FitsToolException {
-
StringWriter out = new StringWriter();
out.write("");
@@ -241,30 +314,33 @@ private Document buildRawData(List resList) throws FitsToo
out.write("");
}
- if (aggregator != null && aggregator.getTotalEntriesCount() > 0) {
+ var innerResults = result.getInnerResults();
+
+ if (!innerResults.isEmpty()) {
out.write("");
out.write("\n");
out.write("");
out.write("\n");
- for (Map.Entry entry : aggregator.getFormatCounts().entrySet()) {
+ countsByFormat.forEach((format, count) -> {
out.write("");
out.write("\n");
- }
+ });
out.write("");
out.write("\n");
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapper.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapper.java
new file mode 100644
index 00000000..aed00412
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapper.java
@@ -0,0 +1,142 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+import org.apache.commons.io.FilenameUtils;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultImpl;
+import uk.gov.nationalarchives.droid.core.interfaces.RequestIdentifier;
+import uk.gov.nationalarchives.droid.core.interfaces.resource.FileSystemIdentificationRequest;
+import uk.gov.nationalarchives.droid.core.interfaces.resource.RequestMetaData;
+import uk.gov.nationalarchives.droid.profile.referencedata.Format;
+import uk.gov.nationalarchives.droid.submitter.SubmissionGateway;
+
+/**
+ * Submits a file to Droid to identify and returns the identification results for the submitted file and any files that
+ * it contains.
+ *
+ * The intended use is to construct one instance of this class per thread using the {@link DroidWrapperFactory}. This
+ * allows the reuse of expensive, thread-safe components.
+ *
+ * This class is NOT THREAD SAFE.
+ */
+class DroidWrapper {
+
+ private final SubmissionGateway submissionGateway;
+ private final CollectingResultHandler resultHandler;
+ private final Map puidFormatMap;
+ private final Set extsToLimitBytesRead;
+ private final long byteReadLimit;
+
+ /**
+ * @param submissionGateway the Droid entry point
+ * @param resultHandler the handler for collecting identification results
+ * @param puidFormatMap the map of puids to formats
+ * @param extsToLimitBytesRead set of file extensions where the number of bytes read should be restricted
+ * @param byteReadLimit the max number of bytes to read of files with byte restrictions
+ */
+ public DroidWrapper(
+ SubmissionGateway submissionGateway,
+ CollectingResultHandler resultHandler,
+ Map puidFormatMap,
+ Set extsToLimitBytesRead,
+ long byteReadLimit) {
+ this.submissionGateway = Objects.requireNonNull(submissionGateway, "submissionGateway cannot be null");
+ this.resultHandler = Objects.requireNonNull(resultHandler, "resultHandler cannot be null");
+ this.puidFormatMap = Objects.requireNonNull(puidFormatMap, "puidFormatMap cannot be null");
+ this.extsToLimitBytesRead = Objects.requireNonNull(extsToLimitBytesRead, "extsToLimitBytesRead cannot be null");
+ this.byteReadLimit = byteReadLimit;
+ }
+
+ /**
+ * Submits a file to be analyzed by Droid, and returns the identification results of the file and any files that
+ * it contains.
+ *
+ * Recursion of archive formats is restricted to a depth of 1.
+ *
+ * @param file the file to analyze
+ * @return the identification results
+ * @throws IOException
+ * @throws InterruptedException
+ * @throws ExecutionException
+ */
+ public DroidResult analyze(Path file) throws IOException, InterruptedException, ExecutionException {
+ var bytesToRead = Files.size(file);
+ var filename = file.getFileName().toString();
+ var ext = FilenameUtils.getExtension(file.getFileName().toString());
+
+ if (byteReadLimit > 0 && extsToLimitBytesRead.contains(ext)) {
+ bytesToRead = Math.min(byteReadLimit, bytesToRead);
+ }
+
+ var meta =
+ new RequestMetaData(bytesToRead, Files.getLastModifiedTime(file).toMillis(), filename);
+ var id = new RequestIdentifier(file.toUri());
+ id.setParentId(DroidId.nextId());
+ id.setParentPrefix("");
+ var request = new FileSystemIdentificationRequest(meta, id);
+
+ try {
+ request.open(file);
+
+ resultHandler.reset();
+ submissionGateway.submit(request).get();
+ submissionGateway.awaitFinished();
+
+ var results = resultHandler.getResults();
+
+ results.forEach(this::augmentContainerResults);
+
+ List innerResults =
+ results.size() == 1 ? Collections.emptyList() : results.subList(1, results.size());
+
+ return new DroidResult(file, results.get(0), innerResults);
+ } finally {
+ request.close();
+ }
+ }
+
+ /**
+ * Closes the object and any underlying resources
+ *
+ * @throws IOException
+ */
+ public void close() throws IOException {
+ submissionGateway.close();
+ }
+
+ /**
+ * Modifies the result objects to include mime type and version. This is necessary because, for some reason Droid
+ * does not include this information for files that were identified by container signature.
+ *
+ * @param result the result to modify
+ */
+ private void augmentContainerResults(IdentificationResultCollection result) {
+ result.getResults().stream().filter(r -> r.getMimeType() == null).forEach(r -> {
+ var format = puidFormatMap.get(r.getPuid());
+ if (format != null) {
+ var ri = (IdentificationResultImpl) r;
+ ri.setName(format.getName());
+ ri.setMimeType(format.getMimeType());
+ ri.setVersion(format.getVersion());
+ }
+ });
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapperFactory.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapperFactory.java
new file mode 100644
index 00000000..5f76d0e7
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/DroidWrapperFactory.java
@@ -0,0 +1,319 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Objects;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+import uk.gov.nationalarchives.droid.container.ContainerFileIdentificationRequestFactory;
+import uk.gov.nationalarchives.droid.container.ContainerSignatureFileReader;
+import uk.gov.nationalarchives.droid.container.ole2.Ole2Identifier;
+import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine;
+import uk.gov.nationalarchives.droid.container.zip.ZipIdentifier;
+import uk.gov.nationalarchives.droid.container.zip.ZipIdentifierEngine;
+import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
+import uk.gov.nationalarchives.droid.core.SignatureParseException;
+import uk.gov.nationalarchives.droid.core.interfaces.RequestIdentifier;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ArcArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ArchiveFormatResolverImpl;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ArchiveHandlerFactoryImpl;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.BZipArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.BZipRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ContainerIdentifierFactoryImpl;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.FatArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.FatEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.GZipArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.GZipRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ISOEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ISOImageArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.RarArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.RarEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.SevenZipArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.SevenZipRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.TarArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.TarEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.TrueVfsArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.WarcArchiveHandler;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.WebArchiveEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.archive.ZipEntryRequestFactory;
+import uk.gov.nationalarchives.droid.core.interfaces.control.PauseAspect;
+import uk.gov.nationalarchives.droid.core.interfaces.signature.SignatureFileException;
+import uk.gov.nationalarchives.droid.profile.referencedata.Format;
+import uk.gov.nationalarchives.droid.signature.SaxSignatureFileParser;
+import uk.gov.nationalarchives.droid.signature.SignatureParser;
+import uk.gov.nationalarchives.droid.submitter.SubmissionQueue;
+import uk.gov.nationalarchives.droid.submitter.SubmissionQueueData;
+
+/**
+ * Factory for generating {@link DroidWrapper} instances. This is necessary because {@link DroidWrapper} is not thread
+ * safe, but many of the components that it uses are expensive and can be shared between instances. The setup in this
+ * class is based on Droid's spring-result.xml.
+ */
+class DroidWrapperFactory {
+
+ private static DroidWrapperFactory instance;
+
+ /**
+ * Creates a new DroidWrapperFactory instance if one does not exist, or returns the existing instance if one does.
+ *
+ * @param config the droid config
+ * @return the DroidWrapperFactory
+ * @throws SignatureParseException
+ * @throws SignatureFileException
+ */
+ public static synchronized DroidWrapperFactory getOrCreateFactory(DroidConfig config)
+ throws SignatureParseException, SignatureFileException {
+ if (instance == null) {
+ instance = new DroidWrapperFactory(Objects.requireNonNull(config, "config cannot be null"));
+ }
+ return instance;
+ }
+
+ private final DroidConfig config;
+ private final Map puidFormatMap;
+ private final BinarySignatureIdentifier droid;
+ private final ContainerSignatureFileReader signatureFileReader;
+ private final ContainerIdentifierFactoryImpl containerIdentifierFactory;
+ private final ArchiveFormatResolverImpl containerPuidResolver;
+ private final ZipIdentifier zipContainerHandler;
+ private final Ole2Identifier ole2ContainerHandler;
+ private final ArchiveFormatResolverImpl archivePuidResolver;
+ private final ZipEntryRequestFactory zipFactory;
+ private final TarEntryRequestFactory tarFactory;
+ private final SevenZipRequestFactory sevenZipFactory;
+ private final BZipRequestFactory bzipFactory;
+ private final GZipRequestFactory gzipFactory;
+ private final WebArchiveEntryRequestFactory arcFactory;
+ private final WebArchiveEntryRequestFactory warcFactory;
+ private final ISOEntryRequestFactory isoFactory;
+ private final RarEntryRequestFactory rarFactory;
+ private final FatEntryRequestFactory fatFactory;
+
+ private DroidWrapperFactory(DroidConfig config) throws SignatureParseException, SignatureFileException {
+ this.config = config;
+
+ // The following is necessary to init the code that identifies formats like docx, xlsx, etc
+ puidFormatMap = new HashMap<>();
+ SignatureParser sigParser =
+ new SaxSignatureFileParser(config.getSigFile().toUri());
+ sigParser.formats(format -> {
+ puidFormatMap.put(format.getPuid(), format);
+ });
+
+ droid = new BinarySignatureIdentifier();
+ droid.setSignatureFile(config.getSigFile().toAbsolutePath().toString());
+ droid.init();
+
+ signatureFileReader = new ContainerSignatureFileReader();
+ signatureFileReader.setFilePath(
+ config.getContainerSigFile().toAbsolutePath().toString());
+
+ containerIdentifierFactory = new ContainerIdentifierFactoryImpl();
+ containerPuidResolver = new ArchiveFormatResolverImpl();
+ var containerFileIdentificationRequestFactory = new ContainerFileIdentificationRequestFactory();
+
+ var zipIdentifierEngine = new ZipIdentifierEngine();
+ zipIdentifierEngine.setRequestFactory(containerFileIdentificationRequestFactory);
+
+ var ole2IdentifierEngine = new Ole2IdentifierEngine();
+ ole2IdentifierEngine.setRequestFactory(containerFileIdentificationRequestFactory);
+
+ zipContainerHandler = new ZipIdentifier();
+ zipContainerHandler.setContainerType("ZIP");
+ zipContainerHandler.setContainerIdentifierFactory(containerIdentifierFactory);
+ zipContainerHandler.setContainerFormatResolver(containerPuidResolver);
+ zipContainerHandler.setDroidCore(droid);
+ zipContainerHandler.setIdentifierEngine(zipIdentifierEngine);
+ zipContainerHandler.setSignatureReader(signatureFileReader);
+ zipContainerHandler.init();
+
+ ole2ContainerHandler = new Ole2Identifier();
+ ole2ContainerHandler.setContainerType("OLE2");
+ ole2ContainerHandler.setContainerIdentifierFactory(containerIdentifierFactory);
+ ole2ContainerHandler.setContainerFormatResolver(containerPuidResolver);
+ ole2ContainerHandler.setDroidCore(droid);
+ ole2ContainerHandler.setIdentifierEngine(ole2IdentifierEngine);
+ ole2ContainerHandler.setSignatureReader(signatureFileReader);
+ ole2ContainerHandler.init();
+
+ archivePuidResolver = new ArchiveFormatResolverImpl();
+ archivePuidResolver.setPuids(Map.of(
+ "ZIP", "x-fmt/263",
+ "TAR", "x-fmt/265",
+ "GZ", "x-fmt/266",
+ "ARC", "x-fmt/219, fmt/410",
+ "WARC", "fmt/289, fmt/1281, fmt/1355",
+ "BZ", "x-fmt/267, x-fmt/268",
+ "7Z", "fmt/484",
+ "ISO", "fmt/468, fmt/1739",
+ "RAR", "x-fmt/264, fmt/411",
+ "FAT", "fmt/1087"));
+
+ zipFactory = new ZipEntryRequestFactory();
+ zipFactory.setTempDirLocation(config.getTempDir());
+ tarFactory = new TarEntryRequestFactory();
+ tarFactory.setTempDirLocation(config.getTempDir());
+ sevenZipFactory = new SevenZipRequestFactory();
+ sevenZipFactory.setTempDirLocation(config.getTempDir());
+ bzipFactory = new BZipRequestFactory();
+ bzipFactory.setTempDirLocation(config.getTempDir());
+ gzipFactory = new GZipRequestFactory();
+ gzipFactory.setTempDirLocation(config.getTempDir());
+ arcFactory = new WebArchiveEntryRequestFactory();
+ arcFactory.setTempDirLocation(config.getTempDir());
+ warcFactory = new WebArchiveEntryRequestFactory();
+ warcFactory.setTempDirLocation(config.getTempDir());
+ isoFactory = new ISOEntryRequestFactory();
+ isoFactory.setTempDirLocation(config.getTempDir());
+ rarFactory = new RarEntryRequestFactory();
+ rarFactory.setTempDirLocation(config.getTempDir());
+ fatFactory = new FatEntryRequestFactory();
+ fatFactory.setTempDirLocation(config.getTempDir());
+ }
+
+ /**
+ * Creates a new {@link DroidWrapper} instance. {@link DroidWrapper} is NOT THREAD SAFE.
+ *
+ * @return {@link DroidWrapper}
+ */
+ public DroidWrapper createInstance() {
+ var submissionGateway = new RecursionRestrictedSubmissionGateway();
+ submissionGateway.setDroidCore(droid);
+ submissionGateway.setContainerFormatResolver(containerPuidResolver);
+ submissionGateway.setContainerIdentifierFactory(containerIdentifierFactory);
+ submissionGateway.setArchiveFormatResolver(archivePuidResolver);
+ submissionGateway.setPauseAspect(new PauseAspect());
+ submissionGateway.setSubmissionQueue(new NoOpSubmissionQueue());
+
+ // We need these threads to be daemon threads so that an application that uses FITS can exit. FITS has not
+ // historically required that users shut it down, so without this user application would hang.
+ submissionGateway.setExecutorService(Executors.newSingleThreadExecutor(new ThreadFactory() {
+ private final ThreadFactory delegate = Executors.defaultThreadFactory();
+
+ @Override
+ public Thread newThread(Runnable runnable) {
+ var thread = delegate.newThread(runnable);
+ thread.setDaemon(true);
+ return thread;
+ }
+ }));
+
+ submissionGateway.setProcessZip(config.isProcessZip());
+ submissionGateway.setProcessTar(config.isProcessTar());
+ submissionGateway.setProcessGzip(config.isProcessGzip());
+ submissionGateway.setProcessArc(config.isProcessArc());
+ submissionGateway.setProcessWarc(config.isProcessWarc());
+ submissionGateway.setProcessBzip2(config.isProcessBzip2());
+ submissionGateway.setProcess7zip(config.isProcess7zip());
+ submissionGateway.setProcessIso(config.isProcessIso());
+ submissionGateway.setProcessRar(config.isProcessRar());
+
+ var resultHandler = new CollectingResultHandler();
+
+ submissionGateway.setResultHandler(resultHandler);
+
+ var zipHandler = new TrueVfsArchiveHandler();
+ zipHandler.setDroidCore(submissionGateway);
+ zipHandler.setResultHandler(resultHandler);
+ zipHandler.setFactory(zipFactory);
+
+ var tarHandler = new TarArchiveHandler();
+ tarHandler.setDroidCore(submissionGateway);
+ tarHandler.setResultHandler(resultHandler);
+ tarHandler.setFactory(tarFactory);
+
+ var sevenZipHandler = new SevenZipArchiveHandler();
+ sevenZipHandler.setDroid(submissionGateway);
+ sevenZipHandler.setResultHandler(resultHandler);
+ sevenZipHandler.setFactory(sevenZipFactory);
+
+ var bzipHandler = new BZipArchiveHandler();
+ bzipHandler.setDroidCore(submissionGateway);
+ bzipHandler.setResultHandler(resultHandler);
+ bzipHandler.setFactory(bzipFactory);
+
+ var gzHandler = new GZipArchiveHandler();
+ gzHandler.setDroidCore(submissionGateway);
+ gzHandler.setFactory(gzipFactory);
+
+ var arcHandler = new ArcArchiveHandler();
+ arcHandler.setDroidCore(submissionGateway);
+ arcHandler.setResultHandler(resultHandler);
+ arcHandler.setFactory(arcFactory);
+
+ var warcHandler = new WarcArchiveHandler();
+ warcHandler.setDroidCore(submissionGateway);
+ warcHandler.setResultHandler(resultHandler);
+ warcHandler.setFactory(warcFactory);
+
+ var isoHandler = new ISOImageArchiveHandler();
+ isoHandler.setDroid(submissionGateway);
+ isoHandler.setResultHandler(resultHandler);
+ isoHandler.setFactory(isoFactory);
+
+ var rarHandler = new RarArchiveHandler();
+ rarHandler.setDroid(submissionGateway);
+ rarHandler.setResultHandler(resultHandler);
+ rarHandler.setIdentificationRequestFactory(rarFactory);
+
+ var fatHandler = new FatArchiveHandler();
+ fatHandler.setDroid(submissionGateway);
+ fatHandler.setResultHandler(resultHandler);
+ fatHandler.setFactory(fatFactory);
+
+ var archiveHandlerLocator = new ArchiveHandlerFactoryImpl();
+ archiveHandlerLocator.setHandlers(Map.of(
+ "ZIP", zipHandler,
+ "TAR", tarHandler,
+ "GZ", gzHandler,
+ "ARC", arcHandler,
+ "WARC", warcHandler,
+ "BZ", bzipHandler,
+ "7Z", sevenZipHandler,
+ "ISO", isoHandler,
+ "RAR", rarHandler,
+ "FAT", fatHandler));
+
+ submissionGateway.setArchiveHandlerFactory(archiveHandlerLocator);
+
+ return new DroidWrapper(
+ submissionGateway,
+ resultHandler,
+ puidFormatMap,
+ config.getExtsToLimitBytesRead(),
+ config.getByteReadLimit());
+ }
+
+ private static class NoOpSubmissionQueue implements SubmissionQueue {
+ @Override
+ public void add(RequestIdentifier request) {
+ // noop
+ }
+
+ @Override
+ public void remove(RequestIdentifier request) {
+ // noop
+ }
+
+ @Override
+ public void save() {
+ // noop
+ }
+
+ @Override
+ public SubmissionQueueData list() {
+ // noop
+ return null;
+ }
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/RecursionRestrictedSubmissionGateway.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/RecursionRestrictedSubmissionGateway.java
new file mode 100644
index 00000000..126cce8f
--- /dev/null
+++ b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/RecursionRestrictedSubmissionGateway.java
@@ -0,0 +1,48 @@
+//
+// Copyright (c) 2023 by The President and Fellows of Harvard College
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License. You may obtain a copy of the License at:
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software distributed under the License is
+// distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permission and limitations under the License.
+//
+
+package edu.harvard.hul.ois.fits.tools.droid;
+
+import java.net.URI;
+import java.util.Optional;
+import java.util.concurrent.Future;
+import org.apache.commons.lang.StringUtils;
+import uk.gov.nationalarchives.droid.core.interfaces.AsynchDroid;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest;
+import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
+import uk.gov.nationalarchives.droid.core.interfaces.RequestIdentifier;
+import uk.gov.nationalarchives.droid.submitter.SubmissionGateway;
+
+/**
+ * When identifying the contents of an archive, Droid will recurse into nested archives. However, FITS prefers not
+ * to recurse into inner archives, and this class is used to restrict the recursion.
+ *
+ * Unfortunately, I had to implement this as a subclass of {@link SubmissionGateway} because {@link AsynchDroid} does
+ * not define a close() method.
+ */
+class RecursionRestrictedSubmissionGateway extends SubmissionGateway {
+
+ @Override
+ public Future submit(IdentificationRequest identificationRequest) {
+ // Droid inserts "!/" every time it enters an archive, so by counting the occurrences of that string we can
+ // limit the recursion.
+ var url = Optional.ofNullable(identificationRequest.getIdentifier())
+ .map(RequestIdentifier::getUri)
+ .map(URI::toString)
+ .orElse("");
+ var depth = StringUtils.countMatches(url, "!/");
+
+ if (depth > 1) {
+ return null;
+ }
+
+ return super.submit(identificationRequest);
+ }
+}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ResultPrinter.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ResultPrinter.java
deleted file mode 100644
index 413edd41..00000000
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ResultPrinter.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/**
- * This file has been modified by Harvard University, June, 2017, for the purposes of incorporating
- * into the FITS application. The original can be found here: https://github.com/digital-preservation/droid
- *
- * Copyright (c) 2016, The National Archives
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name of the The National Archives nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-package edu.harvard.hul.ois.fits.tools.droid;
-
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import uk.gov.nationalarchives.droid.command.action.CommandExecutionException;
-import uk.gov.nationalarchives.droid.command.container.Ole2ContainerContentIdentifier;
-import uk.gov.nationalarchives.droid.command.container.ZipContainerContentIdentifier;
-import uk.gov.nationalarchives.droid.container.ContainerFileIdentificationRequestFactory;
-import uk.gov.nationalarchives.droid.container.ContainerSignatureDefinitions;
-import uk.gov.nationalarchives.droid.container.TriggerPuid;
-import uk.gov.nationalarchives.droid.container.ole2.Ole2IdentifierEngine;
-import uk.gov.nationalarchives.droid.container.zip.ZipIdentifierEngine;
-import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResult;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultCollection;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationResultImpl;
-import uk.gov.nationalarchives.droid.core.interfaces.archive.IdentificationRequestFactory;
-import uk.gov.nationalarchives.droid.profile.referencedata.Format;
-
-/**
- * File identification results printer.
- *
- * NB: This class is called recursively when archive files are opened
- *
- * @author rbrennan
- */
-public class ResultPrinter {
-
- private static final String R_SLASH = "/";
- private static final String L_BRACKET = "(";
- private static final String R_BRACKET = ")";
- private static final String SPACE = " ";
-
- private final BinarySignatureIdentifier binarySignatureIdentifier;
- private final ContainerSignatureDefinitions containerSignatureDefinitions;
- private List triggerPuids;
- private IdentificationRequestFactory requestFactory;
- private final String path;
- private final String slash;
- private final String slash1;
- private final String wrongSlash;
- private final boolean archives;
- private final boolean webArchives;
- private final String OLE2_CONTAINER = "OLE2";
- private final String ZIP_CONTAINER = "ZIP";
- private final String ZIP_ARCHIVE = "x-fmt/263";
- private final String JIP_ARCHIVE = "x-fmt/412";
- private final String TAR_ARCHIVE = "x-fmt/265";
- private final String GZIP_ARCHIVE = "x-fmt/266";
- private final String ARC_ARCHIVE = "x-fmt/219";
- private final String OTHERARC_ARCHIVE = "fmt/410";
- private final String WARC_ARCHIVE = "fmt/289";
-
- private final ContainerAggregator aggregator;
- private final Map puidFormatMap;
-
- private static final Logger logger = LoggerFactory.getLogger(ResultPrinter.class);
-
- /**
- * Store signature files.
- *
- * @param binarySignatureIdentifier binary signature identifier
- * @param containerSignatureDefinitions container signatures
- * @param path current file/container path
- * @param slash local path element delimiter
- * @param slash1 local first container prefix delimiter
- * @param archives Should archives be examined?
- * @param webArchives Should web archives be examined?
- * @param aggregator
- * @param puidFormatMap map of puids to formats
- */
- public ResultPrinter(
- final BinarySignatureIdentifier binarySignatureIdentifier,
- final ContainerSignatureDefinitions containerSignatureDefinitions,
- final String path,
- final String slash,
- final String slash1,
- boolean archives,
- boolean webArchives,
- final ContainerAggregator aggregator,
- final Map puidFormatMap) {
-
- this.binarySignatureIdentifier = binarySignatureIdentifier;
- this.containerSignatureDefinitions = containerSignatureDefinitions;
- this.path = path;
- this.slash = slash;
- this.slash1 = slash1;
- this.wrongSlash = this.slash.equals(R_SLASH) ? "\\" : R_SLASH;
- this.archives = archives;
- this.webArchives = webArchives;
- if (containerSignatureDefinitions != null) {
- triggerPuids = containerSignatureDefinitions.getTiggerPuids();
- }
- this.aggregator = aggregator;
- this.puidFormatMap = puidFormatMap;
- }
-
- /**
- * Output identification for this file.
- *
- * @param results identification Results
- * @param request identification Request
- *
- * @throws CommandExecutionException if unexpected container type encountered
- */
- public void print(final IdentificationResultCollection results, final IdentificationRequest request)
- throws CommandExecutionException {
-
- final String fileName = (path + request.getFileName()).replace(wrongSlash, slash);
- final IdentificationResultCollection containerResults = getContainerResults(results, request, fileName);
-
- IdentificationResultCollection finalResults = new IdentificationResultCollection(request);
- boolean container = false;
- if (containerResults.getResults().size() > 0) {
- container = true;
- finalResults = containerResults;
- } else if (results.getResults().size() > 0) {
- finalResults = results;
- }
- if (finalResults.getResults().size() > 0) {
- binarySignatureIdentifier.removeLowerPriorityHits(finalResults);
- }
- if (finalResults.getResults().size() > 0) {
- int cnt = 0;
- for (IdentificationResult identResult : finalResults.getResults()) {
- if (+cnt > 1) {
- logger.warn("Count: " + cnt);
- }
- String formatName = identResult.getName();
- String puid = identResult.getPuid();
- if (!container && JIP_ARCHIVE.equals(puid)) {
- puid = ZIP_ARCHIVE;
- }
-
- String normalizedFormat = DroidToolOutputter.mapFormatName(formatName);
- String output = String.format(
- "fileName: %s,\n mimeType: %s,\n formatName: %s,\n normalizedFormat: %s,\n puid: %s",
- fileName, identResult.getMimeType(), formatName, normalizedFormat, puid);
- logger.debug(output);
- // add a single format type
- aggregator.addFormat(normalizedFormat);
- }
- } else {
- aggregator.incrementUnknownFormat();
- logger.debug(fileName + " -- Unknown filetype");
- }
- }
-
- private IdentificationResultCollection getContainerResults(
- final IdentificationResultCollection results, final IdentificationRequest request, final String fileName)
- throws CommandExecutionException {
-
- IdentificationResultCollection containerResults = new IdentificationResultCollection(request);
-
- if (results.getResults().size() > 0 && containerSignatureDefinitions != null) {
- int cnt = 0;
- for (IdentificationResult identResult : results.getResults()) {
- if (+cnt > 1) {
- logger.info("IdentificationResult count: " + cnt);
- }
- String filePuid = identResult.getPuid();
- if (filePuid != null) {
- TriggerPuid containerPuid = getTriggerPuidByPuid(filePuid);
- if (containerPuid != null) {
-
- requestFactory = new ContainerFileIdentificationRequestFactory();
- String containerType = containerPuid.getContainerType();
-
- if (OLE2_CONTAINER.equals(containerType)) {
- try {
- Ole2ContainerContentIdentifier ole2Identifier = new Ole2ContainerContentIdentifier();
- ole2Identifier.init(containerSignatureDefinitions, containerType);
- Ole2IdentifierEngine ole2IdentifierEngine = new Ole2IdentifierEngine();
- ole2IdentifierEngine.setRequestFactory(requestFactory);
- ole2Identifier.setIdentifierEngine(ole2IdentifierEngine);
- containerResults =
- ole2Identifier.process(request.getSourceInputStream(), containerResults);
- } catch (IOException e) { // carry on after container i/o problems
- logger.warn(e + SPACE + L_BRACKET + fileName + R_BRACKET);
- }
- } else if (ZIP_CONTAINER.equals(containerType)) {
- try {
- ZipContainerContentIdentifier zipIdentifier = new ZipContainerContentIdentifier();
- zipIdentifier.init(containerSignatureDefinitions, containerType);
- ZipIdentifierEngine zipIdentifierEngine = new ZipIdentifierEngine();
- zipIdentifierEngine.setRequestFactory(requestFactory);
- zipIdentifier.setIdentifierEngine(zipIdentifierEngine);
- containerResults =
- zipIdentifier.process(request.getSourceInputStream(), containerResults);
- } catch (IOException e) { // carry on after container i/o problems
- logger.warn(e + SPACE + L_BRACKET + fileName + R_BRACKET);
- }
- } else {
- throw new CommandExecutionException("Unknown container type: " + containerPuid);
- }
- }
- }
- }
- }
-
- // container results only have the PUID filled in
- for (IdentificationResult result : containerResults.getResults()) {
- IdentificationResultImpl impl = (IdentificationResultImpl) result;
- Format format = puidFormatMap.get(result.getPuid());
- if (format != null) {
- impl.setName(format.getName());
- impl.setMimeType(format.getMimeType());
- impl.setVersion(format.getVersion());
- }
- }
-
- return containerResults;
- }
-
- private TriggerPuid getTriggerPuidByPuid(final String puid) {
- for (final TriggerPuid tp : triggerPuids) {
- if (tp.getPuid().equals(puid)) {
- return tp;
- }
- }
- return null;
- }
-}
diff --git a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ZipArchiveContentIdentifier.java b/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ZipArchiveContentIdentifier.java
deleted file mode 100644
index adad010a..00000000
--- a/src/main/java/edu/harvard/hul/ois/fits/tools/droid/ZipArchiveContentIdentifier.java
+++ /dev/null
@@ -1,157 +0,0 @@
-/**
- * This file has been modified by Harvard University, June, 2017, for the purposes of incorporating
- * into the FITS application. The original can be found here: https://github.com/digital-preservation/droid
- *
- * Copyright (c) 2016, The National Archives
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following
- * conditions are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name of the The National Archives nor the
- * names of its contributors may be used to endorse or promote products
- * derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-package edu.harvard.hul.ois.fits.tools.droid;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.util.Map;
-import org.apache.commons.compress.archivers.zip.UnsupportedZipFeatureException;
-import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
-import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import uk.gov.nationalarchives.droid.command.action.CommandExecutionException;
-import uk.gov.nationalarchives.droid.container.ContainerSignatureDefinitions;
-import uk.gov.nationalarchives.droid.core.BinarySignatureIdentifier;
-import uk.gov.nationalarchives.droid.core.interfaces.IdentificationRequest;
-import uk.gov.nationalarchives.droid.core.interfaces.RequestIdentifier;
-import uk.gov.nationalarchives.droid.core.interfaces.resource.RequestMetaData;
-import uk.gov.nationalarchives.droid.core.interfaces.resource.ZipEntryIdentificationRequest;
-import uk.gov.nationalarchives.droid.profile.referencedata.Format;
-
-/**
- * Identifier for files held in a ZIP archive.
- *
- * @author rbrennan
- */
-public class ZipArchiveContentIdentifier extends ArchiveContentIdentifier {
-
- private static final Logger logger = LoggerFactory.getLogger(ZipArchiveContentIdentifier.class);
-
- /**
- *
- * @param binarySignatureIdentifier binary signature identifier
- * @param containerSignatureDefinitions container signatures
- * @param path current archive path
- * @param slash local path element delimiter
- * @param slash1 local first container prefix delimiter
- * @param puidFormatMap map of puids to formats
- */
- public ZipArchiveContentIdentifier(
- final BinarySignatureIdentifier binarySignatureIdentifier,
- final ContainerSignatureDefinitions containerSignatureDefinitions,
- final String path,
- final String slash,
- final String slash1,
- final Map puidFormatMap) {
-
- super(binarySignatureIdentifier, containerSignatureDefinitions, path, slash, slash1, false, puidFormatMap);
- }
-
- /**
- * @param uri The URI of the file to identify
- * @param request The Identification Request
- * @return The aggregated data of the examined ZIP file
- * @throws CommandExecutionException When an exception happens during execution
- * @throws CommandExecutionException When an exception happens during archive file access
- */
- public ContainerAggregator identify(final URI uri, final IdentificationRequest request)
- throws CommandExecutionException {
-
- final String newPath = makeContainerURI("zip", request.getFileName());
- setSlash1("");
- InputStream zipIn = null;
- ContainerAggregator aggregator = new ContainerAggregator();
- try {
- zipIn = request.getSourceInputStream();
- final ZipArchiveInputStream in = new ZipArchiveInputStream(zipIn);
- try {
- ZipArchiveEntry entry = null;
- Integer compressionMethod = null;
- while ((entry = in.getNextZipEntry()) != null) {
- final String name = entry.getName();
- if (!entry.isDirectory()) {
- final RequestMetaData metaData = new RequestMetaData(entry.getSize(), 2L, name);
- final RequestIdentifier identifier = new RequestIdentifier(uri);
- final ZipEntryIdentificationRequest zipRequest = new ZipEntryIdentificationRequest(
- metaData, identifier, getTmpDir().toPath(), false);
-
- if (compressionMethod != null && !compressionMethod.equals(entry.getMethod())) {
- logger.warn("Different compression method: " + compressionMethod + ", entry method: "
- + entry.getMethod());
- }
-
- compressionMethod = entry.getMethod(); // throws UnsupportedZipFeatureException
- expandContainer(zipRequest, in, newPath, aggregator); // zipRequest.size() is uncompressed
- logger.debug("zipRequest size(): " + zipRequest.size() + " -- entry.getCompressedSize(): "
- + entry.getCompressedSize() + " -- entry.getSize(): " + entry.getSize());
- if (entry.getCompressedSize() > 0) {
- aggregator.incrementCompressedSize(entry.getCompressedSize());
- }
- // in some situations the value returned is -1
- if (entry.getSize() > 0) {
- aggregator.incrementOriginalSize(entry.getSize());
- } else if (zipRequest.size() > 0) {
- aggregator.incrementOriginalSize(zipRequest.size());
- }
- }
- }
- } catch (UnsupportedZipFeatureException e) {
- // For now this indicates that we're attempting (and failing) to read from an encrypted ZIP file.
- aggregator.setEncrypted(true);
- } finally {
- if (in != null) {
- in.close();
- }
- // shows collection of files within ZIP file
- logger.debug("--------------");
- logger.debug("{}", aggregator);
- logger.debug("--------------");
- }
- } catch (IOException ioe) {
- logger.warn(ioe + " (" + newPath + ")"); // continue after corrupt archive
- } finally {
- if (zipIn != null) {
- try {
- zipIn.close();
- } catch (IOException ioe) {
- throw new CommandExecutionException(ioe.getMessage(), ioe);
- }
- }
- }
- return aggregator;
- }
-}
diff --git a/src/test/java/edu/harvard/hul/ois/fits/junit/DocMDXmlUnitTest.java b/src/test/java/edu/harvard/hul/ois/fits/junit/DocMDXmlUnitTest.java
index b4c97f30..7d598003 100644
--- a/src/test/java/edu/harvard/hul/ois/fits/junit/DocMDXmlUnitTest.java
+++ b/src/test/java/edu/harvard/hul/ois/fits/junit/DocMDXmlUnitTest.java
@@ -137,9 +137,9 @@ public void testEpubOutput() throws Exception {
// process multiple files to examine different types of output
String[] inputFilenames = {
- "Winnie-the-Pooh-protected.epub", // not properly identified as epub mimetype
+ "Winnie-the-Pooh-protected.epub",
"GeographyofBliss_oneChapter.epub",
- "aliceDynamic_images_metadata_tableOfContents.epub", // not properly identified as epub mimetype
+ "aliceDynamic_images_metadata_tableOfContents.epub", // Missing mimetype file; DROID cannot id it
"epub30-test-font-embedding-obfuscation.epub",
"Calibre_hasTable_of_Contents.epub"
};
diff --git a/src/test/java/edu/harvard/hul/ois/fits/junit/ZipDisabledXmlUnitTest.java b/src/test/java/edu/harvard/hul/ois/fits/junit/ZipDisabledXmlUnitTest.java
new file mode 100644
index 00000000..942a1ea1
--- /dev/null
+++ b/src/test/java/edu/harvard/hul/ois/fits/junit/ZipDisabledXmlUnitTest.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2016 Harvard University Library
+ *
+ * This file is part of FITS (File Information Tool Set).
+ *
+ * FITS is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * FITS is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FITS. If not, see .
+ */
+package edu.harvard.hul.ois.fits.junit;
+
+import edu.harvard.hul.ois.fits.tests.AbstractXmlUnitTest;
+import org.junit.Test;
+
+/**
+ * These tests compare actual FITS output with expected output on ZIP files.
+ * These tests should be run with <display-tool-output>false</display-tool-output> in fits.xml.
+ *
+ * @author dan179
+ */
+public class ZipDisabledXmlUnitTest extends AbstractXmlUnitTest {
+
+ @Override
+ protected String fitsConfigFile() {
+ return "fits_archives_disabled.xml";
+ }
+
+ @Test
+ public void testCompressedZipFile() throws Exception {
+ testFile("assorted-files.zip", fits, OutputType.DEFAULT);
+ }
+}
diff --git a/src/test/java/edu/harvard/hul/ois/fits/tests/AbstractXmlUnitTest.java b/src/test/java/edu/harvard/hul/ois/fits/tests/AbstractXmlUnitTest.java
index 8cda3b9c..f59070d3 100644
--- a/src/test/java/edu/harvard/hul/ois/fits/tests/AbstractXmlUnitTest.java
+++ b/src/test/java/edu/harvard/hul/ois/fits/tests/AbstractXmlUnitTest.java
@@ -201,7 +201,10 @@ protected void writeAndValidate(FitsOutput fitsOut, String inputFilename, Output
}
// Read in the expected XML file
- String expectedFile = OUTPUT_DIR + inputFilename + namePart + EXPECTED_OUTPUT_FILE_SUFFIX;
+ String expectedFile = OUTPUT_DIR + inputFilename + namePart + "_" + className + ACTUAL_OUTPUT_FILE_SUFFIX;
+ if (Files.notExists(Paths.get(expectedFile))) {
+ expectedFile = OUTPUT_DIR + inputFilename + namePart + EXPECTED_OUTPUT_FILE_SUFFIX;
+ }
String expectedXmlStr = FileUtils.readFileToString(new File(expectedFile), StandardCharsets.UTF_8);
if (overwrite) {
diff --git a/testfiles/output/aliceDynamic_images_metadata_tableOfContents.epub_XmlUnitExpectedOutput.xml b/testfiles/output/aliceDynamic_images_metadata_tableOfContents.epub_XmlUnitExpectedOutput.xml
index d82ecca6..ae4e233f 100644
--- a/testfiles/output/aliceDynamic_images_metadata_tableOfContents.epub_XmlUnitExpectedOutput.xml
+++ b/testfiles/output/aliceDynamic_images_metadata_tableOfContents.epub_XmlUnitExpectedOutput.xml
@@ -16,18 +16,31 @@
-
- yes
- fiction
- urn:uuid:1a16ce38-82bd-4e9b-861e-773c2e787a50
- en-GB
- Lewis Carroll
- Alice's Adventures in Wonderland
+
+ 1409497
+ deflate
+
+
+
+
+
+
+
+
+
en-GB
+
+
+ yes
+ fiction
+ urn:uuid:1a16ce38-82bd-4e9b-861e-773c2e787a50
+ en-GB
+ Lewis Carroll
+ Alice's Adventures in Wonderland
diff --git a/testfiles/output/assorted-files.zip-default_ZipDisabledXmlUnitTest_XmlUnitExpectedOutput.xml b/testfiles/output/assorted-files.zip-default_ZipDisabledXmlUnitTest_XmlUnitExpectedOutput.xml
new file mode 100644
index 00000000..3deddbfd
--- /dev/null
+++ b/testfiles/output/assorted-files.zip-default_ZipDisabledXmlUnitTest_XmlUnitExpectedOutput.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+ 1.0
+ x-fmt/263
+
+
+
+ /fits/testfiles/input/assorted-files.zip
+ assorted-files.zip
+ 30400659
+ 381dd28336fef8e188ebec5c6c29c596
+ 1666562273086
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/testfiles/properties/fits_archives_disabled.xml b/testfiles/properties/fits_archives_disabled.xml
new file mode 100644
index 00000000..910c7579
--- /dev/null
+++ b/testfiles/properties/fits_archives_disabled.xml
@@ -0,0 +1,69 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 20
+
+
+
+ DROID_SignatureFile_V109_Alt.xml
+ container-signature-20221102.xml
+
+
+
+
+
+
+
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+ false
+
+
+
+
diff --git a/xml/fits.xml b/xml/fits.xml
index 8f5a9397..7a695b91 100644
--- a/xml/fits.xml
+++ b/xml/fits.xml
@@ -50,5 +50,20 @@
-
+
+
+
+
+ true
+ true
+ true
+ true
+ true
+ true
+ true
+ true
+ true
+
+
+