Skip to content

Commit

Permalink
[kbss-cvut/termit-ui#449] Support importing multilingual Excel withou…
Browse files Browse the repository at this point in the history
…t term references.
  • Loading branch information
ledsoft committed Jun 25, 2024
1 parent df2800d commit 839b850
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -53,12 +54,15 @@ public Vocabulary importVocabulary(ImportConfiguration config, ImportInput data)
}
final Vocabulary targetVocabulary = vocabularyDao.find(config.vocabularyIri()).orElseThrow(() -> NotFoundException.create(Vocabulary.class, config.vocabularyIri()));
try {
List<Term> terms = Collections.emptyList();
for (InputStream input : data.data()) {
final Workbook workbook = new XSSFWorkbook(input);
assert workbook.getNumberOfSheets() > 0;
final Sheet sheet = workbook.getSheetAt(0);
// TODO Reuse terms between internationalized sheets
final List<Term> terms = new LocalizedSheetImporter().resolveTermsFromSheet(sheet);
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
final Sheet sheet = workbook.getSheetAt(i);
// TODO Reuse terms between internationalized sheets
terms = new LocalizedSheetImporter(terms).resolveTermsFromSheet(sheet);
}
// TODO Parents vs children
terms.forEach(t -> termService.addRootTermToVocabulary(t, targetVocabulary));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import cz.cvut.kbss.jopa.model.MultilingualString;
import cz.cvut.kbss.jopa.vocabulary.DC;
import cz.cvut.kbss.jopa.vocabulary.SKOS;
import cz.cvut.kbss.termit.exception.importing.VocabularyImportException;
import cz.cvut.kbss.termit.model.Term;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
Expand All @@ -16,26 +15,39 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Properties;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;

class LocalizedSheetImporter {

private static final Logger LOG = LoggerFactory.getLogger(LocalizedSheetImporter.class);

private final List<Term> existingTerms;

private Map<String, Integer> attributeToColumn;
private String langTag;

LocalizedSheetImporter(List<Term> existingTerms) {
this.existingTerms = existingTerms;
}

List<Term> resolveTermsFromSheet(Sheet sheet) {
LOG.debug("Importing terms from sheet '{}'.", sheet.getSheetName());
final LanguageCode lang = resolveLanguage(sheet);
final String langTag = lang.name();
final Optional<LanguageCode> lang = resolveLanguage(sheet);
if (lang.isEmpty()) {
return existingTerms;
}
this.langTag = lang.get().name();
LOG.trace("Sheet '{}' mapped to language tage '{}'.", sheet.getSheetName(), langTag);
final Properties attributeMapping = new Properties();
final Map<String, Term> labelToTerm = new LinkedHashMap<>();
Expand All @@ -50,41 +62,66 @@ List<Term> resolveTermsFromSheet(Sheet sheet) {
}
for (int i = 1; i < sheet.getLastRowNum(); i++) {
final Row termRow = sheet.getRow(i);
final Term term = new Term();
Term term = existingTerms.size() >= i ? existingTerms.get(i - 1) : new Term();
final Optional<String> label = getAttributeValue(termRow, SKOS.PREF_LABEL);
if (label.isEmpty()) {
LOG.trace("Reached empty label column cell at row {}. Finished processing sheet.", i);
break;
}
term.setLabel(MultilingualString.create(label.get(), langTag));
getAttributeValue(termRow, SKOS.DEFINITION).ifPresent(
d -> term.setDefinition(MultilingualString.create(d, langTag)));
getAttributeValue(termRow, SKOS.SCOPE_NOTE).ifPresent(
sn -> term.setDescription(MultilingualString.create(sn, langTag)));
getAttributeValue(termRow, SKOS.ALT_LABEL).ifPresent(al -> term.setAltLabels(
splitIntoMultipleValues(al).stream().map(s -> MultilingualString.create(s, langTag)).collect(
Collectors.toSet())));
getAttributeValue(termRow, SKOS.HIDDEN_LABEL).ifPresent(hl -> term.setHiddenLabels(
splitIntoMultipleValues(hl).stream().map(s -> MultilingualString.create(s, langTag)).collect(
Collectors.toSet())));
getAttributeValue(termRow, SKOS.EXAMPLE).ifPresent(ex -> term.setExamples(
splitIntoMultipleValues(ex).stream().map(s -> MultilingualString.create(s, langTag)).collect(
Collectors.toSet())));
getAttributeValue(termRow, DC.Terms.SOURCE).ifPresent(src -> term.setSources(splitIntoMultipleValues(src)));
getAttributeValue(termRow, SKOS.NOTATION).ifPresent(nt -> term.setNotations(splitIntoMultipleValues(nt)));
getAttributeValue(termRow, DC.Terms.REFERENCES).ifPresent(
nt -> term.setProperties(Map.of(DC.Terms.REFERENCES, splitIntoMultipleValues(nt))));
mapRowToTerm(term, label.get(), termRow);
labelToTerm.put(label.get(), term);
}
return new ArrayList<>(labelToTerm.values());
}

private static LanguageCode resolveLanguage(Sheet sheet) {
private void mapRowToTerm(Term term, String label, Row termRow) {
initSingularMultilingualString(term::getLabel, term::setLabel).set(langTag, label);
getAttributeValue(termRow, SKOS.DEFINITION).ifPresent(
d -> initSingularMultilingualString(term::getDefinition, term::setDefinition).set(langTag, d));
getAttributeValue(termRow, SKOS.SCOPE_NOTE).ifPresent(
sn -> initSingularMultilingualString(term::getDescription, term::setDescription).set(langTag, sn));
getAttributeValue(termRow, SKOS.ALT_LABEL).ifPresent(al -> populatePluralMultilingualString(term::getAltLabels, term::setAltLabels, splitIntoMultipleValues(al)));
getAttributeValue(termRow, SKOS.HIDDEN_LABEL).ifPresent(hl -> populatePluralMultilingualString(term::getHiddenLabels, term::setHiddenLabels, splitIntoMultipleValues(hl)));
getAttributeValue(termRow, SKOS.EXAMPLE).ifPresent(ex -> populatePluralMultilingualString(term::getExamples, term::setExamples, splitIntoMultipleValues(ex)));
getAttributeValue(termRow, DC.Terms.SOURCE).ifPresent(src -> term.setSources(splitIntoMultipleValues(src)));
getAttributeValue(termRow, SKOS.NOTATION).ifPresent(nt -> term.setNotations(splitIntoMultipleValues(nt)));
getAttributeValue(termRow, DC.Terms.REFERENCES).ifPresent(
nt -> term.setProperties(Map.of(DC.Terms.REFERENCES, splitIntoMultipleValues(nt))));
}

private MultilingualString initSingularMultilingualString(Supplier<MultilingualString> getter,
Consumer<MultilingualString> setter) {
if (getter.get() == null) {
setter.accept(new MultilingualString());
}
return getter.get();
}

private void populatePluralMultilingualString(
Supplier<Set<MultilingualString>> getter, Consumer<Set<MultilingualString>> setter, Set<String> values) {
Set<MultilingualString> attValue = getter.get();
if (attValue == null) {
setter.accept(new HashSet<>());
attValue = getter.get();
}
for (String s : values) {
final Optional<MultilingualString> mls = attValue.stream().filter(m -> !m.contains(langTag)).findFirst();
if (mls.isPresent()) {
mls.get().set(langTag, s);
} else {
final MultilingualString newMls = MultilingualString.create(s, langTag);
attValue.add(newMls);
}
}
}

private static Optional<LanguageCode> resolveLanguage(Sheet sheet) {
final List<LanguageCode> codes = LanguageCode.findByName(sheet.getSheetName());
if (codes.isEmpty()) {
throw new VocabularyImportException("Unsupported sheet language " + sheet.getSheetName());
LOG.debug("No matching language found for sheet '{}'. Skipping it.", sheet.getSheetName());
return Optional.empty();
}
return codes.get(0);
return Optional.of(codes.get(0));
}

private Map<String, Integer> resolveAttributeColumns(Row attributes, Properties attributeMapping) {
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/attributes/cs.properties
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
http\://www.w3.org/2004/02/skos/core#prefLabel=Název
http\://www.w3.org/2004/02/skos/core#definition=Definice
http\://www.w3.org/2004/02/skos/core#scopeNote=Dopl?ující poznámka
http\://www.w3.org/2004/02/skos/core#scopeNote=Dopl\u0148ující poznámka
http\://www.w3.org/2004/02/skos/core#altLabel=Synonyma
http\://www.w3.org/2004/02/skos/core#hiddenLabel=Vyhledávací texty
http\://www.w3.org/2004/02/skos/core#example=P?íklady
http\://www.w3.org/2004/02/skos/core#example=P\u0159íklady
http\://www.w3.org/2004/02/skos/core#notation=Notace
http\://purl.org/dc/terms/source=Zdroj
http\://purl.org/dc/terms/references=Reference
Original file line number Diff line number Diff line change
Expand Up @@ -140,4 +140,58 @@ void importCreatesRootTermsWithPluralBasicAttributesFromEnglishSheet() {
assertEquals(Set.of("B"), building.getNotations());
assertEquals(Set.of("a56"), building.getProperties().get(DC.Terms.REFERENCES));
}

@Test
void importCreatesRootTermsWithBasicAttributesFromMultipleTranslationSheets() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));

final Vocabulary result = sut.importVocabulary(new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
new VocabularyImporter.ImportInput(Constants.MediaType.EXCEL,
Environment.loadFile(
"data/import-simple-en-cs.xlsx")));
assertEquals(vocabulary, result);
final ArgumentCaptor<Term> captor = ArgumentCaptor.forClass(Term.class);
verify(termService, times(2)).addRootTermToVocabulary(captor.capture(), eq(vocabulary));
assertEquals(2, captor.getAllValues().size());
final Optional<Term> building = captor.getAllValues().stream().filter(t -> "Building".equals(t.getLabel().get("en"))).findAny();
assertTrue(building.isPresent());
assertEquals("Budova", building.get().getLabel().get("cs"));
assertEquals("Definition of term Building", building.get().getDefinition().get("en"));
assertEquals("Definice pojmu budova", building.get().getDefinition().get("cs"));
assertEquals("Building scope note", building.get().getDescription().get("en"));
assertEquals("Doplňující poznámka pojmu budova", building.get().getDescription().get("cs"));
final Optional<Term> construction = captor.getAllValues().stream().filter(t -> "Construction".equals(t.getLabel().get("en"))).findAny();
assertTrue(construction.isPresent());
assertEquals("Stavba", construction.get().getLabel().get("cs"));
assertEquals("The process of building a building", construction.get().getDefinition().get("en"));
assertEquals("Proces výstavby budovy", construction.get().getDefinition().get("cs"));
}

@Test
void importCreatesRootTermsWithPluralBasicAttributesFromMultipleTranslationSheets() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));

final Vocabulary result = sut.importVocabulary(new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
new VocabularyImporter.ImportInput(Constants.MediaType.EXCEL,
Environment.loadFile(
"data/import-with-plural-atts-en-cs.xlsx")));
assertEquals(vocabulary, result);
final ArgumentCaptor<Term> captor = ArgumentCaptor.forClass(Term.class);
verify(termService).addRootTermToVocabulary(captor.capture(), eq(vocabulary));
assertEquals(1, captor.getAllValues().size());
final Term building = captor.getValue();
assertEquals("Budova", building.getLabel().get("cs"));
assertTrue(building.getAltLabels().stream().anyMatch(mls -> mls.get("en") != null && mls.get("en").equals("Structure")));
assertTrue(building.getAltLabels().stream().anyMatch(mls -> mls.get("en") != null && mls.get("en").equals("House")));
assertTrue(building.getAltLabels().stream().anyMatch(mls -> mls.get("cs") != null && mls.get("cs").equals("dům")));
assertTrue(building.getAltLabels().stream().anyMatch(mls -> mls.get("cs") != null && mls.get("cs").equals("stavba")));
assertTrue(building.getHiddenLabels().stream().anyMatch(mls -> mls.get("en") != null && mls.get("en").equals("bldng")));
assertTrue(building.getHiddenLabels().stream().anyMatch(mls -> mls.get("cs") != null && mls.get("cs").equals("barák")));
assertTrue(building.getExamples().stream().anyMatch(mls -> mls.get("en") != null && mls.get("en").equals("Dancing house")));
assertTrue(building.getExamples().stream().anyMatch(mls -> mls.get("cs") != null && mls.get("cs").equals("Tančící dům")));
assertEquals(Set.of("B"), building.getNotations());
assertEquals(Set.of("a56"), building.getProperties().get(DC.Terms.REFERENCES));
}
}

0 comments on commit 839b850

Please sign in to comment.