Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add option to disable OCR #768

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.sismics.docs.core.constant;

/**
* Configuration parameters.
* Configuration parameters.
*
* @author jtremeaux
* @author jtremeaux
*/
public enum ConfigType {
/**
Expand All @@ -20,6 +20,11 @@ public enum ConfigType {
*/
GUEST_LOGIN,

/**
* OCR enabled.
*/
OCR_ENABLED,

/**
* Default language.
*/
Expand Down
29 changes: 21 additions & 8 deletions docs-core/src/main/java/com/sismics/docs/core/util/ConfigUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@

/**
* Configuration parameter utilities.
*
* @author jtremeaux
*
*/
public class ConfigUtil {
/**
* Returns the textual value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Textual value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
Expand All @@ -30,7 +29,7 @@ public static String getConfigStringValue(ConfigType configType) {

/**
* Returns the configuration resource bundle.
*
*
* @return Resource bundle
*/
public static ResourceBundle getConfigBundle() {
Expand All @@ -39,14 +38,14 @@ public static ResourceBundle getConfigBundle() {

/**
* Returns the integer value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Integer value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static int getConfigIntegerValue(ConfigType configType) {
String value = getConfigStringValue(configType);

return Integer.parseInt(value);
}

Expand All @@ -65,14 +64,28 @@ public static long getConfigLongValue(ConfigType configType) {

/**
* Returns the boolean value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Boolean value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static boolean getConfigBooleanValue(ConfigType configType) {
String value = getConfigStringValue(configType);

return Boolean.parseBoolean(value);
}

/**
* Returns the boolean value of a configuration parameter with a default value.
*
* @param configType Type of the configuration parameter
* @param defaultValue Default value to return if the configuration parameter is undefined
* @return Boolean value of the configuration parameter
*/
public static boolean getConfigBooleanValue(ConfigType configType, boolean defaultValue) {
try {
return getConfigBooleanValue(configType);
} catch (IllegalStateException e) {
return defaultValue;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import com.google.common.io.Closer;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand All @@ -22,7 +24,6 @@
/**
* Image format handler.
*
* @author bgamard
*/
public class ImageFormatHandler implements FormatHandler {
/**
Expand All @@ -45,7 +46,7 @@ public BufferedImage generateThumbnail(Path file) throws Exception {

@Override
public String extractContent(String language, Path file) throws Exception {
if (language == null) {
if (language == null || !ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.google.common.io.Closer;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
Expand Down Expand Up @@ -53,7 +55,7 @@ public String extractContent(String language, Path file) {
}

// No text content, try to OCR it
if (language != null && content != null && content.trim().isEmpty()) {
if (language != null && content != null && content.trim().isEmpty() && ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
StringBuilder sb = new StringBuilder();
try (InputStream inputStream = Files.newInputStream(file);
PDDocument pdfDocument = PDDocument.load(inputStream)) {
Expand Down
24 changes: 12 additions & 12 deletions docs-core/src/main/java/com/sismics/util/jpa/DbOpenHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ abstract class DbOpenHelper {
private static final Logger log = LoggerFactory.getLogger(DbOpenHelper.class);

private final JdbcConnectionAccess jdbcConnectionAccess;

private final List<Exception> exceptions = new ArrayList<>();

private Formatter formatter;
Expand Down Expand Up @@ -99,7 +99,7 @@ public void open() {
onCreate();
oldVersion = 0;
}

// Execute update script
ResourceBundle configBundle = ConfigUtil.getConfigBundle();
Integer currentVersion = Integer.parseInt(configBundle.getString("db.version"));
Expand All @@ -126,7 +126,7 @@ public void open() {

/**
* Execute all upgrade scripts in ascending order for a given version.
*
*
* @param version Version number
* @throws Exception e
*/
Expand All @@ -136,7 +136,7 @@ void executeAllScript(final int version) throws Exception {
return name.matches("dbupdate-" + versionString + "-\\d+\\.sql");
});
Collections.sort(fileNameList);

for (String fileName : fileNameList) {
if (log.isInfoEnabled()) {
log.info(MessageFormat.format("Executing script: {0}", fileName));
Expand All @@ -145,16 +145,16 @@ void executeAllScript(final int version) throws Exception {
executeScript(is);
}
}

/**
* Execute a SQL script. All statements must be one line only.
*
*
* @param inputScript Script to execute
* @throws IOException e
*/
private void executeScript(InputStream inputScript) throws IOException {
List<String> lines = CharStreams.readLines(new InputStreamReader(inputScript));

for (String sql : lines) {
if (Strings.isNullOrEmpty(sql) || sql.startsWith("--")) {
continue;
Expand All @@ -178,21 +178,21 @@ private void executeScript(InputStream inputScript) throws IOException {
}

public abstract void onCreate() throws Exception;

public abstract void onUpgrade(int oldVersion, int newVersion) throws Exception;

/**
* Returns a List of all Exceptions which occured during the export.
* Returns a List of all Exceptions which occurred during the export.
*
* @return A List containig the Exceptions occured during the export
* @return A List containing the Exceptions occurred during the export
*/
public List<?> getExceptions() {
return exceptions;
}

/**
* Format the output SQL statements.
*
*
* @param format True to format
*/
public void setFormat(boolean format) {
Expand Down
2 changes: 1 addition & 1 deletion docs-core/src/main/resources/config.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
db.version=30
db.version=31
7 changes: 7 additions & 0 deletions docs-core/src/main/resources/db/update/dbupdate-031-0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- DBUPDATE-031-0.SQL

-- Insert a new setting for OCR recognition
insert into T_CONFIG (CFG_ID_C, CFG_VALUE_C) values ('OCR_ENABLED', 'true');

-- Update the database version
update T_CONFIG set CFG_VALUE_C = '31' where CFG_ID_C = 'DB_VERSION';
2 changes: 1 addition & 1 deletion docs-web/src/dev/resources/config.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
api.current_version=${project.version}
api.min_version=1.0
db.version=30
db.version=31
Loading