Skip to content

Commit

Permalink
migrate to logback instead of log4j2; allow logs in json format; fix …
Browse files Browse the repository at this point in the history
…long due problem with logs in trainer command
  • Loading branch information
kermitt2 committed Nov 11, 2023
1 parent 113298d commit f3b0ed1
Show file tree
Hide file tree
Showing 14 changed files with 138 additions and 106 deletions.
67 changes: 37 additions & 30 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,11 @@ subprojects {
maven { url "https://jitpack.io" }
}

/*configurations {
configurations {
all*.exclude group: 'org.slf4j', module: "slf4j-log4j12"
//all*.exclude group: 'log4j', module: "log4j"
// implementation.setCanBeResolved(true)
}*/
all*.exclude group: 'log4j', module: "log4j"
implementation.setCanBeResolved(true)
}

ext {
// treating them separately, these jars will be flattened into grobid-core.jar on installing,
Expand Down Expand Up @@ -107,8 +107,6 @@ subprojects {
implementation "com.fasterxml.jackson.core:jackson-databind:2.10.1"
implementation "com.fasterxml.jackson.module:jackson-module-afterburner:2.10.1"
implementation "com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.10.1"

implementation "org.apache.logging.log4j:log4j-layout-template-json:2.21.1"
}

task sourceJar(type: Jar) {
Expand Down Expand Up @@ -172,7 +170,7 @@ subprojects {

if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED",
"--add-opens", "java.base/java.io=ALL-UNNAMED"
"--add-opens", "java.base/java.io=ALL-UNNAMED", "--add-opens", "java.xml/jdk.xml.internal=ALL-UNNAMED"
}
systemProperty "java.library.path","${System.getProperty('java.library.path')}:" + libraries
}
Expand All @@ -199,9 +197,8 @@ project("grobid-core") {
}

// Logs
api 'org.slf4j:slf4j-api:1.7.25'
//api 'org.slf4j:slf4j-log4j12:1.7.25'
runtimeOnly 'org.slf4j:slf4j-jdk14:1.7.25'
implementation 'org.slf4j:slf4j-api:1.7.30'
implementation 'ch.qos.logback:logback-classic:1.2.3'

implementation "org.apache.pdfbox:pdfbox:2.0.18"

Expand Down Expand Up @@ -333,9 +330,6 @@ project(":grobid-service") {
}

configurations {
all*.exclude group: 'org.slf4j', module: "slf4j-jdk14"
all*.exclude group: 'org.slf4j', module: "slf4j-log4j12"
all*.exclude group: 'log4j', module: "log4j"
}

tasks.distZip.enabled = false
Expand All @@ -350,19 +344,31 @@ project(":grobid-service") {
dependencies {
implementation project(':grobid-core')
implementation project(':grobid-trainer')
implementation "io.dropwizard:dropwizard-core:1.3.23"
implementation "io.dropwizard:dropwizard-assets:1.3.23"
implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.0"
implementation "io.dropwizard:dropwizard-testing:1.3.23"
implementation "io.dropwizard:dropwizard-forms:1.3.23"
implementation "io.dropwizard:dropwizard-client:1.3.23"
implementation "io.dropwizard:dropwizard-auth:1.3.23"
implementation "io.dropwizard:dropwizard-core:1.3.29"
implementation "io.dropwizard:dropwizard-assets:1.3.29"
implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.2"
implementation "io.dropwizard:dropwizard-forms:1.3.29"
implementation "io.dropwizard:dropwizard-client:1.3.29"
implementation "io.dropwizard:dropwizard-auth:1.3.29"
implementation "io.dropwizard:dropwizard-json-logging:1.3.29"
testImplementation "io.dropwizard:dropwizard-testing:1.3.29"

// note: moving to dropwizard 2.* breaks the support of JDK 1.8
// Guise dependency requires to change to the more modern package ru.vyarus.dropwizard-guicey
// and a few code updates
/*implementation "io.dropwizard:dropwizard-core:2.1.10"
implementation "io.dropwizard:dropwizard-assets:2.1.10"
implementation "ru.vyarus:dropwizard-guicey:5.2.0"
implementation "io.dropwizard:dropwizard-forms:2.1.10"
implementation "io.dropwizard:dropwizard-client:2.1.10"
implementation "io.dropwizard:dropwizard-auth:2.1.10"
implementation "io.dropwizard:dropwizard-json-logging:2.1.10"
testImplementation "io.dropwizard:dropwizard-testing:2.1.10"*/

implementation "org.apache.pdfbox:pdfbox:2.0.3"
implementation "javax.activation:activation:1.1.1"
implementation "io.prometheus:simpleclient_dropwizard:0.11.0"
implementation "io.prometheus:simpleclient_servlet:0.11.0"

testImplementation "io.dropwizard:dropwizard-testing:1.3.17"
implementation "io.prometheus:simpleclient_servlet:0.11.0"
}

shadowJar {
Expand Down Expand Up @@ -413,15 +419,13 @@ project(":grobid-trainer") {
implementation project(':grobid-core')
implementation "com.rockymadden.stringmetric:stringmetric-core_2.10:0.27.3"
implementation "me.tongfei:progressbar:0.9.0"
//implementation 'org.slf4j:slf4j-log4j12:1.7.25'
implementation 'org.slf4j:slf4j-api:1.7.25'
//implementation 'org.slf4j:slf4j-jdk14:1.7.25'

// logs
implementation 'org.slf4j:slf4j-api:1.7.30'
implementation 'ch.qos.logback:logback-classic:1.2.3'
}

configurations {
//all*.exclude group: 'org.slf4j', module: "slf4j-jdk14"
//all*.exclude group: 'org.slf4j', module: "slf4j-log4j12"
//all*.exclude group: 'log4j', module: "log4j"
}

jar {
Expand All @@ -442,6 +446,10 @@ project(":grobid-trainer") {
attributes 'Main-Class': 'org.grobid.trainer.TrainerRunner'
}

from('src/main/resources') {
include '*.xml'
}

duplicatesStrategy = DuplicatesStrategy.EXCLUDE
}

Expand Down Expand Up @@ -593,7 +601,6 @@ coveralls {
sourceDirs = files(subprojects.sourceSets.main.allSource.srcDirs).files.absolutePath
}


tasks.coveralls {
dependsOn codeCoverageReport
}
Expand Down
10 changes: 5 additions & 5 deletions doc/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,11 @@ When executing the service, models can be loaded in a lazy manner (if you plan t

```yml
# for **service only**: how to load the models,
# false -> models are loaded when needed (default), avoiding putting in memory useless models but slow down significantly
# the service at first call
# true -> all the models are loaded into memory at the server startup, slow the start of the services and models not
# used will take some memory, but server is immediatly warm and ready
modelPreload: false
# false -> models are loaded when needed, avoiding putting in memory useless models (only in case of CRF) but slow down
# significantly the service at first call
# true -> all the models are loaded into memory at the server startup (default), slow the start of the services
# and models not used will take some more memory (only in case of CRF), but server is immediatly warm and ready
modelPreload: true
```

Finally the following part specifies the port to be used by the GROBID web service:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ private void processFullTextDirectory(File[] files,
for (final File currPdf : files) {
try {
if (currPdf.getName().toLowerCase().endsWith(".pdf")) {
System.out.println("Processing: " + currPdf.getPath());
LOGGER.info("Processing: " + currPdf.getPath());
GrobidAnalysisConfig config = null;
// path for saving assets
if (saveAssets) {
Expand Down Expand Up @@ -570,7 +570,7 @@ public void processPDFAnnotation(final GrobidMainArgs pGbdArgs) throws Exception
for (final File currPDF : pdfDirectory.listFiles()) {
try {
if (currPDF.getName().toLowerCase().endsWith(".pdf")) {
System.out.println("Processing: " + currPDF.getName());
LOGGER.info("Processing: " + currPDF.getName());
List<String> elementWithCoords = new ArrayList();
elementWithCoords.add("ref");
elementWithCoords.add("biblStruct");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
import org.grobid.core.GrobidModel;
import org.grobid.core.GrobidModels;
import org.grobid.core.exceptions.GrobidException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;

public class WapitiModel {
public static final Logger LOGGER = LoggerFactory.getLogger(WapitiModel.class);
private static final Logger LOGGER = LoggerFactory.getLogger(WapitiModel.class);

private SWIGTYPE_p_mdl_t model;
private File modelFile;
Expand All @@ -33,7 +34,7 @@ private synchronized void init() {
if (!modelFile.exists() || modelFile.isDirectory()) {
throw new GrobidException("Model file does not exists or is a directory: " + modelFile.getAbsolutePath());
}
LOGGER.info("Loading model: " + modelFile + " (size: " + modelFile.length() + ")");
//LOGGER.info("Loading model: " + modelFile + " (size: " + modelFile.length() + ")");
model = WapitiWrapper.getModel(modelFile);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,7 @@ public GrobidHomeFinder(List<String> grobidHomePossibleLocations) {

public File findGrobidHomeOrFail() {
File gh = getGrobidHomePathOrLoadFromClasspath();

LOGGER.info("***************************************************************");
LOGGER.info("*** USING GROBID HOME: " + gh.getAbsolutePath());
LOGGER.info("***************************************************************");

if (!gh.exists() || !gh.isDirectory()) {
fail("Grobid home folder '" + gh.getAbsolutePath() + "' was detected for usage, but does not exist");
}
Expand Down
16 changes: 0 additions & 16 deletions grobid-core/src/main/resources/log4j.xml

This file was deleted.

30 changes: 30 additions & 0 deletions grobid-core/src/main/resources/logback.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<configuration>

<!-- logs in console -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
</encoder>
</appender>

<!-- logs in rolling files -->
<appender name="rollingFile" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>./logs/grobid.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>./logs/grobid-%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>5</maxHistory>
<totalSizeCap>100MB</totalSizeCap>
</rollingPolicy>
<triggeringPolicy class="ch.qos.logback.core.rolling.SizeBasedTriggeringPolicy">
<maxFileSize>10MB</maxFileSize>
</triggeringPolicy>
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>

<root level="info">
<appender-ref ref="STDOUT" />
<appender-ref ref="rollingFile" />
</root>
</configuration>
14 changes: 0 additions & 14 deletions grobid-core/src/test/resources/log4j-test.xml

This file was deleted.

29 changes: 16 additions & 13 deletions grobid-home/config/grobid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -261,11 +261,11 @@ grobid:
batch_size: 40

# for **service only**: how to load the models,
# false -> models are loaded when needed (default), avoiding putting in memory useless models but slow down significantly
# the service at first call
# true -> all the models are loaded into memory at the server startup, slow the start of the services and models not
# used will take some memory, but server is immediatly warm and ready
modelPreload: false
# false -> models are loaded when needed, avoiding putting in memory useless models (only in case of CRF) but slow down
# significantly the service at first call
# true -> all the models are loaded into memory at the server startup (default), slow the start of the services
# and models not used will take some more memory (only in case of CRF), but server is immediatly warm and ready
modelPreload: true

server:
type: custom
Expand All @@ -276,28 +276,31 @@ server:
- type: http
port: 8071
registerDefaultExceptionMappers: false
# change the following for having all http requests logged
requestLog:
appenders: []

# these logging settings apply to the Grobid service usage mode
logging:
loggers:
org.apache.pdfbox.pdmodel.font.PDSimpleFont: "OFF"
org.glassfish.jersey.internal: "OFF"
com.squarespace.jersey2.guice.JerseyGuiceUtils: "OFF"
appenders:
- type: console
level: INFO
threshold: WARN
timeZone: UTC
# uncomment to have the logs in json format
#layout:
# type: json
- type: file
currentLogFilename: logs/grobid-service.log
threshold: ALL
archive: true
archivedLogFilenamePattern: logs/grobid-service-%d.log
archivedFileCount: 5
timeZone: UTC
# - type: console
# name: ConsoleJSONAppender
# level: INFO
# threshold: WARN
# timeZone: UTC
# encoding: UTF-8
# layout: org.apache.log4j.PatternLayout
# layout.ConversionPattern: {"debug_level":"%p","debug_timestamp":"%d{ISO8601}","debug_thread":"%t","debug_file":"%F", "debug_line":"%L","debug_message":"%m"}%n
# uncomment to have the logs in json format
#layout:
# type: json
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.grobid.service;

import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.hibernate.validator.constraints.NotEmpty;

@JsonIgnoreProperties(ignoreUnknown=true)
public class GrobidServicePropConfiguration {
@NotEmpty
@JsonProperty
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import com.google.common.collect.Lists;
import com.google.inject.Module;
import com.hubspot.dropwizard.guicier.GuiceBundle;
//import ru.vyarus.dropwizard.guice.GuiceBundle;
import io.dropwizard.Application;
import io.dropwizard.assets.AssetsBundle;
import io.dropwizard.forms.MultiPartBundle;
Expand Down Expand Up @@ -47,6 +48,11 @@ public void initialize(Bootstrap<GrobidServiceConfiguration> bootstrap) {
.modules(getGuiceModules())
.build();
bootstrap.addBundle(guiceBundle);

/*bootstrap.addBundle(GuiceBundle.builder()
.enableAutoConfig(getClass().getPackage().getName())
.build());*/

bootstrap.addBundle(new MultiPartBundle());
bootstrap.addBundle(new AssetsBundle("/web", "/", "index.html", "grobidAssets"));
}
Expand All @@ -57,6 +63,7 @@ private List<? extends Module> getGuiceModules() {

@Override
public void run(GrobidServiceConfiguration configuration, Environment environment) {

LOGGER.info("Service config={}", configuration);
new DropwizardExports(environment.metrics()).register();
ServletRegistration.Dynamic registration = environment.admin().addServlet("Prometheus", new MetricsServlet());
Expand Down Expand Up @@ -95,21 +102,21 @@ public static void main(String... args) throws Exception {
File confLocation = new File(p).getAbsoluteFile();
if (confLocation.exists()) {
foundConf = confLocation.getAbsolutePath();
LOGGER.info("Found conf path: {}", foundConf);
//LOGGER.info("Found conf path: {}", foundConf);
break;
}
}

if (foundConf != null) {
LOGGER.warn("Running with default arguments: \"server\" \"{}\"", foundConf);
//LOGGER.info("Running with default arguments: \"server\" \"{}\"", foundConf);
args = new String[]{"server", foundConf};
} else {
throw new RuntimeException("No explicit config provided and cannot find in one of the default locations: "
+ Arrays.toString(DEFAULT_CONF_LOCATIONS));
}
}

LOGGER.info("Configuration file: {}", new File(args[1]).getAbsolutePath());
//LOGGER.info("Configuration file: {}", new File(args[1]).getAbsolutePath());
new GrobidServiceApplication().run(args);
}
}
Loading

0 comments on commit f3b0ed1

Please sign in to comment.