Skip to content

Commit

Permalink
Merge pull request #8475 from QualitativeDataRepository/IQSS/8474-poi…
Browse files Browse the repository at this point in the history
…_update

Iqss/8474 poi/tika updates
  • Loading branch information
kcondon authored Apr 12, 2022
2 parents 95beeaa + e1a52cc commit a5f703e
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 20 deletions.
17 changes: 12 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
<flyway.version>5.2.4</flyway.version>
<jhove.version>1.20.1</jhove.version>
<jacoco.version>0.8.7</jacoco.version>
<poi.version>5.2.1</poi.version>
<tika.version>2.3.0</tika.version>
</properties>

<!-- Versions of dependencies used both directly and transitive are managed here.
Expand Down Expand Up @@ -293,17 +295,17 @@
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.1</version>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>4.1.1</version>
<version>${poi.version}</version>
</dependency>
<dependency>
<groupId>org.openpreservation.jhove</groupId>
Expand Down Expand Up @@ -495,8 +497,13 @@
<!-- Full text indexing -->
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.27</version>
<artifactId>tika-core</artifactId>
<version>${tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers-standard-package</artifactId>
<version>${tika.version}</version>
</dependency>
<!-- Named Entity Recognition -->
<dependency>
Expand Down
11 changes: 6 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.api;

import edu.harvard.iq.dataverse.*;
import edu.harvard.iq.dataverse.DatasetLock.Reason;
import edu.harvard.iq.dataverse.actionlogging.ActionLogRecord;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
Expand Down Expand Up @@ -146,9 +147,6 @@
import org.glassfish.jersey.media.multipart.FormDataContentDisposition;
import org.glassfish.jersey.media.multipart.FormDataParam;
import com.amazonaws.services.s3.model.PartETag;
import com.beust.jcommander.Strings;

import java.util.Map.Entry;

@Path("datasets")
public class Datasets extends AbstractApiBean {
Expand Down Expand Up @@ -2691,9 +2689,12 @@ public Response listLocks(@QueryParam("type") String lockType, @QueryParam("user
try {
lockTypeValue = DatasetLock.Reason.valueOf(lockType);
} catch (IllegalArgumentException iax) {
String validValues = Strings.join(",", DatasetLock.Reason.values());
StringJoiner reasonJoiner = new StringJoiner(", ");
for (Reason r: Reason.values()) {
reasonJoiner.add(r.name());
};
String errorMessage = "Invalid lock type value: " + lockType +
"; valid lock types: " + validValues;
"; valid lock types: " + reasonJoiner.toString();
return error(Response.Status.BAD_REQUEST, errorMessage);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
import java.nio.channels.ReadableByteChannel;
import java.util.logging.Logger;

import org.apache.tika.io.IOUtils;
import org.apache.commons.io.IOUtils;

/**
*
* @author Leonid Andreev
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@

import org.apache.solr.client.solrj.SolrServerException;

import com.beust.jcommander.Strings;
import com.google.api.LabelDescriptor;

@RequiredPermissions(Permission.PublishDataset)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
Expand Down Expand Up @@ -229,7 +229,7 @@ public void processSheet(InputStream inputStream, DataTable dataTable, PrintWrit
dbglog.info("entering processSheet");
OPCPackage pkg = OPCPackage.open(inputStream);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
SharedStrings sst = r.getSharedStringsTable();

XMLReader parser = fetchSheetParser(sst, dataTable, tempOut);

Expand All @@ -241,7 +241,7 @@ public void processSheet(InputStream inputStream, DataTable dataTable, PrintWrit
sheet1.close();
}

public XMLReader fetchSheetParser(SharedStringsTable sst, DataTable dataTable, PrintWriter tempOut) throws SAXException {
public XMLReader fetchSheetParser(SharedStrings sst, DataTable dataTable, PrintWriter tempOut) throws SAXException {
// An attempt to use org.apache.xerces.parsers.SAXParser resulted
// in some weird conflict in the app; the default XMLReader obtained
// from the XMLReaderFactory (from xml-apis.jar) appears to be working
Expand All @@ -265,7 +265,7 @@ public XMLReader fetchSheetParser(SharedStringsTable sst, DataTable dataTable, P
private static class SheetHandler extends DefaultHandler {

private DataTable dataTable;
private SharedStringsTable sst;
private SharedStrings sst;
private String cellContents;
private boolean nextIsString;
private boolean variableHeader;
Expand All @@ -277,11 +277,11 @@ private static class SheetHandler extends DefaultHandler {
String[] dataRow;
PrintWriter tempOut;

private SheetHandler(SharedStringsTable sst) {
private SheetHandler(SharedStrings sst) {
this(sst, null, null);
}

private SheetHandler(SharedStringsTable sst, DataTable dataTable, PrintWriter tempOut) {
private SheetHandler(SharedStrings sst, DataTable dataTable, PrintWriter tempOut) {
this.sst = sst;
this.dataTable = dataTable;
this.tempOut = tempOut;
Expand Down Expand Up @@ -410,7 +410,7 @@ public void endElement(String uri, String localName, String name)
// Do it now, as characters() may be called more than once
if (nextIsString) {
int idx = Integer.parseInt(cellContents);
cellContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
cellContents = sst.getItemAt(idx).getString();
nextIsString = false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
import javax.json.JsonObject;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
Expand All @@ -80,7 +82,6 @@
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
Expand Down

0 comments on commit a5f703e

Please sign in to comment.