Skip to content

Commit

Permalink
add funder consolidation service parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 19, 2023
1 parent d2d0aea commit 55216df
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,12 +185,17 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
localFunders.add(entry.getKey());
}

Map<Integer,Funder> consolidatedFunders = Consolidation.getInstance().consolidateFunders(localFunders);
Map<Integer,Funder> consolidatedFunders = null;
if (config.getConsolidateFunders() != 0) {
consolidatedFunders = Consolidation.getInstance().consolidateFunders(localFunders);
}

int n =0;
for (Map.Entry<Funder, List<Funding>> entry : fundingRelation.entrySet()) {
String funderPiece = null;
Funder consolidatedFunder = consolidatedFunders.get(n);
Funder consolidatedFunder = null;
if (consolidatedFunders != null)
consolidatedFunder = consolidatedFunders.get(n);
if (consolidatedFunder != null)
funderPiece = consolidatedFunder.toTEI(4);
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2476,11 +2476,9 @@ private void toTEI(Document doc,
if (localResult != null && localResult.getLeft() != null) {
String local_tei = localResult.getLeft().toXML();
local_tei = local_tei.replace(" xmlns=\"http://www.tei-c.org/ns/1.0\"", "");
//tei.append(local_tei);
annexStatements.add(local_tei);
}
else {
//tei.append(acknowledgmentStmt);
annexStatements.add(acknowledgmentStmt.toString());
}

Expand All @@ -2497,11 +2495,6 @@ private void toTEI(Document doc,
if (StringUtils.isNotBlank(resHeader.getFunding())) {
List<LayoutToken> headerFundingTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_FUNDING);

/*List<Funding> fundings = this.parsers.getFundingAcknowledgementParser().processing(headerFundingTokens);
for (Funding funding : fundings) {
System.out.println(funding.toString());
}*/

Pair<String, List<LayoutToken>> headerFundingProcessed = processShort(headerFundingTokens, doc);
if (headerFundingProcessed != null) {
fundingStmt = teiFormatter.processTEIDivSection("funding",
Expand All @@ -2518,10 +2511,8 @@ private void toTEI(Document doc,
if (localResult != null && localResult.getLeft() != null) {
String local_tei = localResult.getLeft().toXML();
local_tei = local_tei.replace(" xmlns=\"http://www.tei-c.org/ns/1.0\"", "");
//tei.append(local_tei);
annexStatements.add(local_tei);
} else {
//tei.append(fundingStmt);
annexStatements.add(fundingStmt.toString());
}

Expand Down Expand Up @@ -2549,10 +2540,8 @@ private void toTEI(Document doc,
if (localResult != null && localResult.getLeft() != null){
String local_tei = localResult.getLeft().toXML();
local_tei = local_tei.replace(" xmlns=\"http://www.tei-c.org/ns/1.0\"", "");
//tei.append(local_tei);
annexStatements.add(local_tei);
} else {
//tei.append(fundingStmt);
annexStatements.add(fundingStmt.toString());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,25 @@ public MutablePair<Element, MutableTriple<List<Funding>,List<Person>,List<Affili
globalResult = MutablePair.of(root, localResult.getRight());
} else {
// concatenate members of the local results to the global ones
MutableTriple<List<Funding>,List<Person>,List<Affiliation>> localEntities = localResult.getRight();
MutableTriple<List<Funding>,List<Person>,List<Affiliation>> globalEntities = globalResult.getRight();

List<Funding> localFundings = localEntities.getLeft();
List<Funding> globalFundings = globalEntities.getLeft();
globalFundings.addAll(localFundings);
globalEntities.setLeft(globalFundings);

List<Person> localPersons = localEntities.getMiddle();
List<Person> globalPersons = globalEntities.getMiddle();
globalPersons.addAll(localPersons);
globalEntities.setMiddle(globalPersons);

List<Affiliation> localAffiliation = localEntities.getRight();
List<Affiliation> globalAffiliations = globalEntities.getRight();
globalAffiliations.addAll(localAffiliation);
globalEntities.setRight(globalAffiliations);

globalResult.setRight(globalEntities);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class GrobidRestService implements GrobidPaths {
public static final String INPUT = "input";
public static final String CONSOLIDATE_CITATIONS = "consolidateCitations";
public static final String CONSOLIDATE_HEADER = "consolidateHeader";
public static final String CONSOLIDATE_FUNDERS = "consolidateFunder";
public static final String INCLUDE_RAW_AFFILIATIONS = "includeRawAffiliations";
public static final String INCLUDE_RAW_CITATIONS = "includeRawCitations";
public static final String INCLUDE_FIGURES_TABLES = "includeFiguresTables";
Expand Down Expand Up @@ -210,6 +211,7 @@ public Response processFulltextDocument_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidateHeader,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
Expand All @@ -218,7 +220,7 @@ public Response processFulltextDocument_post(
@FormDataParam("segmentSentences") String segmentSentences,
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processFulltext(
inputStream, consolidateHeader, consolidateCitations,
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
Expand All @@ -232,6 +234,7 @@ public Response processFulltextDocument(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidateHeader,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
Expand All @@ -240,7 +243,7 @@ public Response processFulltextDocument(
@FormDataParam("segmentSentences") String segmentSentences,
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processFulltext(
inputStream, consolidateHeader, consolidateCitations,
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
Expand All @@ -249,6 +252,7 @@ public Response processFulltextDocument(
private Response processFulltext(InputStream inputStream,
String consolidateHeader,
String consolidateCitations,
String consolidateFunders,
String includeRawAffiliations,
String includeRawCitations,
int startPage,
Expand All @@ -259,14 +263,15 @@ private Response processFulltext(InputStream inputStream,
) throws Exception {
int consolHeader = validateConsolidationParam(consolidateHeader);
int consolCitations = validateConsolidationParam(consolidateCitations);
int consolFunders = validateConsolidationParam(consolidateFunders);
boolean includeRaw = validateIncludeRawParam(includeRawCitations);
boolean generate = validateGenerateIdParam(generateIDs);
boolean segment = validateGenerateIdParam(segmentSentences);

List<String> teiCoordinates = collectCoordinates(coordinates);

return restProcessFiles.processFulltextDocument(
inputStream, consolHeader, consolCitations,
inputStream, consolHeader, consolCitations, consolFunders,
validateIncludeRawParam(includeRawAffiliations),
includeRaw,
startPage, endPage, generate, segment, teiCoordinates
Expand Down Expand Up @@ -316,6 +321,7 @@ public Response processFulltextAssetDocument_post(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidateHeader,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
Expand All @@ -324,7 +330,7 @@ public Response processFulltextAssetDocument_post(
@FormDataParam("segmentSentences") String segmentSentences,
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processStatelessFulltextAssetHelper(
inputStream, consolidateHeader, consolidateCitations,
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
Expand All @@ -338,6 +344,7 @@ public Response processStatelessFulltextAssetDocument(
@FormDataParam(INPUT) InputStream inputStream,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_HEADER) String consolidateHeader,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_CITATIONS) String consolidateCitations,
@DefaultValue("0") @FormDataParam(CONSOLIDATE_FUNDERS) String consolidateFunders,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_AFFILIATIONS) String includeRawAffiliations,
@DefaultValue("0") @FormDataParam(INCLUDE_RAW_CITATIONS) String includeRawCitations,
@DefaultValue("-1") @FormDataParam("start") int startPage,
Expand All @@ -346,7 +353,7 @@ public Response processStatelessFulltextAssetDocument(
@FormDataParam("segmentSentences") String segmentSentences,
@FormDataParam("teiCoordinates") List<FormDataBodyPart> coordinates) throws Exception {
return processStatelessFulltextAssetHelper(
inputStream, consolidateHeader, consolidateCitations,
inputStream, consolidateHeader, consolidateCitations, consolidateFunders,
includeRawAffiliations, includeRawCitations,
startPage, endPage, generateIDs, segmentSentences, coordinates
);
Expand All @@ -355,6 +362,7 @@ public Response processStatelessFulltextAssetDocument(
private Response processStatelessFulltextAssetHelper(InputStream inputStream,
String consolidateHeader,
String consolidateCitations,
String consolidateFunders,
String includeRawAffiliations,
String includeRawCitations,
int startPage,
Expand All @@ -364,14 +372,15 @@ private Response processStatelessFulltextAssetHelper(InputStream inputStream,
List<FormDataBodyPart> coordinates) throws Exception {
int consolHeader = validateConsolidationParam(consolidateHeader);
int consolCitations = validateConsolidationParam(consolidateCitations);
int consolFunders = validateConsolidationParam(consolidateFunders);
boolean includeRaw = validateIncludeRawParam(includeRawCitations);
boolean generate = validateGenerateIdParam(generateIDs);
boolean segment = validateGenerateIdParam(segmentSentences);

List<String> teiCoordinates = collectCoordinates(coordinates);

return restProcessFiles.processStatelessFulltextAssetDocument(
inputStream, consolHeader, consolCitations,
inputStream, consolHeader, consolCitations, consolFunders,
validateIncludeRawParam(includeRawAffiliations),
includeRaw,
startPage, endPage, generate, segment, teiCoordinates
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ public Response processStatelessHeaderDocument(
public Response processFulltextDocument(final InputStream inputStream,
final int consolidateHeader,
final int consolidateCitations,
final int consolidateFunders,
final boolean includeRawAffiliations,
final boolean includeRawCitations,
final int startPage,
Expand Down Expand Up @@ -196,6 +197,7 @@ public Response processFulltextDocument(final InputStream inputStream,
GrobidAnalysisConfig.builder()
.consolidateHeader(consolidateHeader)
.consolidateCitations(consolidateCitations)
.consolidateFunders(consolidateFunders)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCitations(includeRawCitations)
.startPage(startPage)
Expand Down Expand Up @@ -256,6 +258,7 @@ public Response processFulltextDocument(final InputStream inputStream,
public Response processStatelessFulltextAssetDocument(final InputStream inputStream,
final int consolidateHeader,
final int consolidateCitations,
final int consolidateFunders,
final boolean includeRawAffiliations,
final boolean includeRawCitations,
final int startPage,
Expand Down Expand Up @@ -298,6 +301,7 @@ public Response processStatelessFulltextAssetDocument(final InputStream inputStr
GrobidAnalysisConfig.builder()
.consolidateHeader(consolidateHeader)
.consolidateCitations(consolidateCitations)
.consolidateFunders(consolidateFunders)
.includeRawAffiliations(includeRawAffiliations)
.includeRawCitations(includeRawCitations)
.startPage(startPage)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ private static AbstractTrainer getTrainer(String model) {
trainer = new FigureTrainer();
} else if (model.equals("table")) {
trainer = new TableTrainer();
} else if (model.equals("funding-acknowledgement")) {
trainer = new FundingAcknowledgementTrainer();
} else {
throw new IllegalStateException("The model " + model + " is unknown.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,9 @@ public Boolean call() {
GrobidAnalysisConfig.builder()
.consolidateHeader(1)
.consolidateCitations(0)
.consolidateFunders(0)
.withPreprocessImages(true)
// .withSentenceSegmentation(true)
.withSentenceSegmentation(false)
.build();
String tei = engine.fullTextToTEI(this.pdfFile, config);
// write the result in the same directory
Expand Down

0 comments on commit 55216df

Please sign in to comment.