Skip to content

Commit

Permalink
feat: add option to disable OCR (#768)
Browse files Browse the repository at this point in the history
fixes #344
refs #767
  • Loading branch information
alexanderadam authored Sep 7, 2024
1 parent 8f1ff56 commit c2d7f3e
Show file tree
Hide file tree
Showing 16 changed files with 221 additions and 62 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package com.sismics.docs.core.constant;

/**
* Configuration parameters.
* Configuration parameters.
*
* @author jtremeaux
* @author jtremeaux
*/
public enum ConfigType {
/**
Expand All @@ -20,6 +20,11 @@ public enum ConfigType {
*/
GUEST_LOGIN,

/**
* OCR enabled.
*/
OCR_ENABLED,

/**
* Default language.
*/
Expand Down
29 changes: 21 additions & 8 deletions docs-core/src/main/java/com/sismics/docs/core/util/ConfigUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@

/**
* Configuration parameter utilities.
*
* @author jtremeaux
*
*/
public class ConfigUtil {
/**
* Returns the textual value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Textual value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
Expand All @@ -30,7 +29,7 @@ public static String getConfigStringValue(ConfigType configType) {

/**
* Returns the configuration resource bundle.
*
*
* @return Resource bundle
*/
public static ResourceBundle getConfigBundle() {
Expand All @@ -39,14 +38,14 @@ public static ResourceBundle getConfigBundle() {

/**
* Returns the integer value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Integer value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static int getConfigIntegerValue(ConfigType configType) {
String value = getConfigStringValue(configType);

return Integer.parseInt(value);
}

Expand All @@ -65,14 +64,28 @@ public static long getConfigLongValue(ConfigType configType) {

/**
* Returns the boolean value of a configuration parameter.
*
*
* @param configType Type of the configuration parameter
* @return Boolean value of the configuration parameter
* @throws IllegalStateException Configuration parameter undefined
*/
public static boolean getConfigBooleanValue(ConfigType configType) {
String value = getConfigStringValue(configType);

return Boolean.parseBoolean(value);
}

/**
* Returns the boolean value of a configuration parameter with a default value.
*
* @param configType Type of the configuration parameter
* @param defaultValue Default value to return if the configuration parameter is undefined
* @return Boolean value of the configuration parameter
*/
public static boolean getConfigBooleanValue(ConfigType configType, boolean defaultValue) {
try {
return getConfigBooleanValue(configType);
} catch (IllegalStateException e) {
return defaultValue;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import com.google.common.io.Closer;
import com.sismics.docs.core.constant.Constants;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.pdmodel.PDDocument;
Expand All @@ -22,7 +24,6 @@
/**
* Image format handler.
*
* @author bgamard
*/
public class ImageFormatHandler implements FormatHandler {
/**
Expand All @@ -45,7 +46,7 @@ public BufferedImage generateThumbnail(Path file) throws Exception {

@Override
public String extractContent(String language, Path file) throws Exception {
if (language == null) {
if (language == null || !ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
return null;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.google.common.io.Closer;
import com.sismics.docs.core.util.FileUtil;
import com.sismics.docs.core.util.ConfigUtil;
import com.sismics.docs.core.constant.ConfigType;
import com.sismics.util.mime.MimeType;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
Expand Down Expand Up @@ -53,7 +55,7 @@ public String extractContent(String language, Path file) {
}

// No text content, try to OCR it
if (language != null && content != null && content.trim().isEmpty()) {
if (language != null && content != null && content.trim().isEmpty() && ConfigUtil.getConfigBooleanValue(ConfigType.OCR_ENABLED, true)) {
StringBuilder sb = new StringBuilder();
try (InputStream inputStream = Files.newInputStream(file);
PDDocument pdfDocument = PDDocument.load(inputStream)) {
Expand Down
24 changes: 12 additions & 12 deletions docs-core/src/main/java/com/sismics/util/jpa/DbOpenHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ abstract class DbOpenHelper {
private static final Logger log = LoggerFactory.getLogger(DbOpenHelper.class);

private final JdbcConnectionAccess jdbcConnectionAccess;

private final List<Exception> exceptions = new ArrayList<>();

private Formatter formatter;
Expand Down Expand Up @@ -99,7 +99,7 @@ public void open() {
onCreate();
oldVersion = 0;
}

// Execute update script
ResourceBundle configBundle = ConfigUtil.getConfigBundle();
Integer currentVersion = Integer.parseInt(configBundle.getString("db.version"));
Expand All @@ -126,7 +126,7 @@ public void open() {

/**
* Execute all upgrade scripts in ascending order for a given version.
*
*
* @param version Version number
* @throws Exception e
*/
Expand All @@ -136,7 +136,7 @@ void executeAllScript(final int version) throws Exception {
return name.matches("dbupdate-" + versionString + "-\\d+\\.sql");
});
Collections.sort(fileNameList);

for (String fileName : fileNameList) {
if (log.isInfoEnabled()) {
log.info(MessageFormat.format("Executing script: {0}", fileName));
Expand All @@ -145,16 +145,16 @@ void executeAllScript(final int version) throws Exception {
executeScript(is);
}
}

/**
* Execute a SQL script. All statements must be one line only.
*
*
* @param inputScript Script to execute
* @throws IOException e
*/
private void executeScript(InputStream inputScript) throws IOException {
List<String> lines = CharStreams.readLines(new InputStreamReader(inputScript));

for (String sql : lines) {
if (Strings.isNullOrEmpty(sql) || sql.startsWith("--")) {
continue;
Expand All @@ -178,21 +178,21 @@ private void executeScript(InputStream inputScript) throws IOException {
}

public abstract void onCreate() throws Exception;

public abstract void onUpgrade(int oldVersion, int newVersion) throws Exception;

/**
* Returns a List of all Exceptions which occured during the export.
* Returns a List of all Exceptions which occurred during the export.
*
* @return A List containig the Exceptions occured during the export
* @return A List containing the Exceptions occurred during the export
*/
public List<?> getExceptions() {
return exceptions;
}

/**
* Format the output SQL statements.
*
*
* @param format True to format
*/
public void setFormat(boolean format) {
Expand Down
2 changes: 1 addition & 1 deletion docs-core/src/main/resources/config.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
db.version=30
db.version=31
7 changes: 7 additions & 0 deletions docs-core/src/main/resources/db/update/dbupdate-031-0.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
-- DBUPDATE-031-0.SQL

-- Insert a new setting for OCR recognition
insert into T_CONFIG (CFG_ID_C, CFG_VALUE_C) values ('OCR_ENABLED', 'true');

-- Update the database version
update T_CONFIG set CFG_VALUE_C = '31' where CFG_ID_C = 'DB_VERSION';
2 changes: 1 addition & 1 deletion docs-web/src/dev/resources/config.properties
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
api.current_version=${project.version}
api.min_version=1.0
db.version=30
db.version=31
Loading

0 comments on commit c2d7f3e

Please sign in to comment.