Skip to content

Commit

Permalink
[kbss-cvut/termit-ui#449] Resolve imported term identifier before per…
Browse files Browse the repository at this point in the history
…sisting it so that existing one can be removed.

If the template is used, it does not contain identifier column, so the id would be generated on persist. On repeated import, this leads to errors because the existing term is not removed before the new one is persisted.
  • Loading branch information
ledsoft committed Aug 29, 2024
1 parent 9087621 commit e65e256
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ public Vocabulary importVocabulary(ImportConfiguration config, ImportInput data)
}
final Vocabulary targetVocabulary = vocabularyDao.find(config.vocabularyIri()).orElseThrow(
() -> NotFoundException.create(Vocabulary.class, config.vocabularyIri()));
final String termNamespace = resolveVocabularyTermNamespace(targetVocabulary);
try {
List<Term> terms = Collections.emptyList();
Set<TermRelationship> rawDataToInsert = new HashSet<>();
Expand All @@ -114,17 +113,18 @@ public Vocabulary importVocabulary(ImportConfiguration config, ImportInput data)
continue;
}
final LocalizedSheetImporter sheetImporter = new LocalizedSheetImporter(
new LocalizedSheetImporter.Services(termService, languageService, idResolver),
prefixMap, terms, termNamespace);
new LocalizedSheetImporter.Services(termService, languageService),
prefixMap, terms);
terms = sheetImporter.resolveTermsFromSheet(sheet);
rawDataToInsert.addAll(sheetImporter.getRawDataToInsert());
}
terms.stream().filter(t -> t.getUri() != null && termService.exists(t.getUri())).forEach(t -> {
LOG.trace("Term {} already exists. Removing old version.", t);
termService.forceRemove(termService.findRequired(t.getUri()));
// Flush changes to prevent EntityExistsExceptions when term is already managed in PC as different type (Term vs TermInfo)
em.flush();
});
terms.stream().peek(t -> t.setUri(resolveTermIdentifier(targetVocabulary, t)))
.filter(t -> termService.exists(t.getUri())).forEach(t -> {
LOG.trace("Term {} already exists. Removing old version.", t);
termService.forceRemove(termService.findRequired(t.getUri()));
// Flush changes to prevent EntityExistsExceptions when term is already managed in PC as different type (Term vs TermInfo)
em.flush();
});
// Ensure all parents are saved before we start adding children
terms.stream().filter(t -> Utils.emptyIfNull(t.getParentTerms()).isEmpty())
.forEach(root -> {
Expand Down Expand Up @@ -173,6 +173,24 @@ private String resolveVocabularyTermNamespace(Vocabulary vocabulary) {
config.getNamespace().getTerm().getSeparator());
}

private URI resolveTermIdentifier(Vocabulary vocabulary, Term term) {
final String termNamespace = resolveVocabularyTermNamespace(vocabulary);
if (term.getUri() == null) {
return idResolver.generateDerivedIdentifier(vocabulary.getUri(),
config.getNamespace().getTerm().getSeparator(),
term.getLabel().get(config.getPersistence().getLanguage()));
}
if (term.getUri() != null && !term.getUri().toString().startsWith(termNamespace)) {
LOG.trace(
"Existing term identifier {} does not correspond to the expected vocabulary term namespace {}. Adjusting the term id.",
Utils.uriToString(term.getUri()), termNamespace);
return idResolver.generateDerivedIdentifier(vocabulary.getUri(),
config.getNamespace().getTerm().getSeparator(),
term.getLabel().get(config.getPersistence().getLanguage()));
}
return term.getUri();
}

/**
* Checks whether this importer supports the specified media type.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import cz.cvut.kbss.termit.dto.RdfsResource;
import cz.cvut.kbss.termit.exception.importing.VocabularyImportException;
import cz.cvut.kbss.termit.model.Term;
import cz.cvut.kbss.termit.service.IdentifierResolver;
import cz.cvut.kbss.termit.service.export.util.TabularTermExportUtils;
import cz.cvut.kbss.termit.service.language.LanguageService;
import cz.cvut.kbss.termit.service.repository.TermRepositoryService;
Expand Down Expand Up @@ -55,9 +54,6 @@ class LocalizedSheetImporter {
private final LanguageService languageService;
private final PrefixMap prefixMap;
private final List<Term> existingTerms;
private final IdentifierResolver idResolver;
// Namespace expected based on the vocabulary into which the terms will be imported
private final String expectedTermNamespace;

private Map<String, Integer> attributeToColumn;
private String langTag;
Expand All @@ -68,14 +64,11 @@ class LocalizedSheetImporter {
private final Set<URI> sheetIdentifiers = new HashSet<>();
private List<ExcelImporter.TermRelationship> rawDataToInsert;

LocalizedSheetImporter(Services services, PrefixMap prefixMap, List<Term> existingTerms,
String expectedTermNamespace) {
LocalizedSheetImporter(Services services, PrefixMap prefixMap, List<Term> existingTerms) {
this.termRepositoryService = services.termRepositoryService();
this.languageService = services.languageService();
this.prefixMap = prefixMap;
this.existingTerms = existingTerms;
this.idResolver = services.idResolver();
this.expectedTermNamespace = expectedTermNamespace;
existingTerms.stream().filter(t -> t.getUri() != null).forEach(t -> idToTerm.put(t.getUri(), t));
}

Expand Down Expand Up @@ -141,7 +134,7 @@ private void findTerms(Sheet sheet) {
final Row termRow = sheet.getRow(i);
Term term = existingTerms.size() >= i ? existingTerms.get(i - 1) : new Term();
getAttributeValue(termRow, JsonLd.ID).ifPresent(id -> {
term.setUri(resolveTermUri(id));
term.setUri(URI.create(prefixMap.resolvePrefixed(id)));
if (sheetIdentifiers.contains(term.getUri())) {
throw new VocabularyImportException(
"Sheet " + sheet.getSheetName() + " contains multiple terms with the same identifier: " + id,
Expand Down Expand Up @@ -171,31 +164,6 @@ private void findTerms(Sheet sheet) {
LOG.trace("Found {} term rows.", i - 1);
}

/**
* If an identifier column is found in the sheet, attempt to use it for term ids.
* <p>
* This methods first resolves possible prefixes and then ensures the identifier matches the expected namespace
* provided by the vocabulary into which we are importing. If it does not match, a new identifier is generated based
* on the expected namespace and the local name extracted from the identifier present in the sheet.
*
* @param id Identifier extracted from the sheet
* @return Valid term identifier matching target vocabulary
*/
private URI resolveTermUri(String id) {
// Handle prefix if it is used
id = prefixMap.resolvePrefixed(id);
final URI uriId = URI.create(id);
if (expectedTermNamespace.equals(IdentifierResolver.extractIdentifierNamespace(uriId))) {
return URI.create(id);
} else {
LOG.trace(
"Existing term identifier {} does not correspond to the expected vocabulary term namespace {}. Adjusting the term id.",
Utils.uriToString(uriId), expectedTermNamespace);
final String localName = IdentifierResolver.extractIdentifierFragment(uriId);
return idResolver.generateIdentifier(expectedTermNamespace, localName);
}
}

private void mapRowToTermAttributes(Term term, Row termRow) {
getAttributeValue(termRow, SKOS.DEFINITION).ifPresent(
d -> initSingularMultilingualString(term::getDefinition, term::setDefinition).set(langTag, d));
Expand All @@ -221,7 +189,8 @@ private void mapRowToTermAttributes(Term term, Row termRow) {
rtm -> mapSkosMatchProperties(term, SKOS.RELATED_MATCH, splitIntoMultipleValues(rtm)));
getAttributeValue(termRow, SKOS.EXACT_MATCH).ifPresent(
exm -> mapSkosMatchProperties(term, SKOS.EXACT_MATCH, splitIntoMultipleValues(exm)));
getAttributeValue(termRow, JsonLd.TYPE).flatMap(t -> resolveTermType(t, term)).ifPresent(t -> term.setTypes(Set.of(t)));
getAttributeValue(termRow, JsonLd.TYPE).flatMap(t -> resolveTermType(t, term))
.ifPresent(t -> term.setTypes(Set.of(t)));
resolveTermState(getAttributeValue(termRow, Vocabulary.s_p_ma_stav_pojmu).orElse(null), term).ifPresent(
term::setState);

Expand Down Expand Up @@ -373,7 +342,6 @@ private static Set<String> splitIntoMultipleValues(String value) {
.collect(Collectors.toSet());
}

record Services(TermRepositoryService termRepositoryService, LanguageService languageService,
IdentifierResolver idResolver) {
record Services(TermRepositoryService termRepositoryService, LanguageService languageService) {
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ private void prepareTermForPersist(Term instance, URI vocabularyUri) {
pruneEmptyTranslations(instance);
}

private URI generateIdentifier(URI vocabularyUri, MultilingualString multilingualString) {
private URI generateIdentifier(URI vocabularyUri, MultilingualString termLabel) {
return idResolver.generateDerivedIdentifier(vocabularyUri, config.getNamespace().getTerm().getSeparator(),
multilingualString.get(config.getPersistence().getLanguage()));
termLabel.get(config.getPersistence().getLanguage()));
}

private void addTermAsRootToGlossary(Term instance, URI vocabularyIri) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
Expand Down Expand Up @@ -133,7 +132,6 @@ void importThrowsVocabularyDoesNotExistExceptionWhenVocabularyIsNotFound() {
void importCreatesRootTermsWithBasicAttributesFromEnglishSheet() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -155,34 +153,10 @@ void importCreatesRootTermsWithBasicAttributesFromEnglishSheet() {
assertEquals("The process of building a building", construction.get().getDefinition().get("en"));
}

private void initIdentifierGenerator(String lang, boolean forChild) {
doAnswer(inv -> {
final Term t = inv.getArgument(0);
if (t.getUri() != null) {
return null;
}
t.setUri(URI.create(vocabulary.getUri().toString() + "/" + IdentifierResolver.normalize(
t.getLabel().get(lang))));
return null;
}).when(termService).addRootTermToVocabulary(any(Term.class), eq(vocabulary));
if (forChild) {
doAnswer(inv -> {
final Term t = inv.getArgument(0);
if (t.getUri() != null) {
return null;
}
t.setUri(URI.create(vocabulary.getUri().toString() + "/" + IdentifierResolver.normalize(
t.getLabel().get(lang))));
return null;
}).when(termService).addChildTerm(any(Term.class), any(Term.class));
}
}

@Test
void importCreatesRootTermsWithPluralBasicAttributesFromEnglishSheet() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down Expand Up @@ -217,7 +191,6 @@ void importCreatesRootTermsWithPluralBasicAttributesFromEnglishSheet() {
void importCreatesRootTermsWithBasicAttributesFromMultipleTranslationSheets() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down Expand Up @@ -248,7 +221,6 @@ void importCreatesRootTermsWithBasicAttributesFromMultipleTranslationSheets() {
void importCreatesRootTermsWithPluralBasicAttributesFromMultipleTranslationSheets() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down Expand Up @@ -285,7 +257,6 @@ void importCreatesRootTermsWithPluralBasicAttributesFromMultipleTranslationSheet
void importCreatesTermHierarchy() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, true);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -309,7 +280,6 @@ void importCreatesTermHierarchy() {
void importSavesRelationshipsBetweenTerms() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -331,7 +301,6 @@ void importSavesRelationshipsBetweenTerms() {
void importImportsTermsWhenAnotherLanguageSheetIsEmpty() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -357,7 +326,6 @@ void importImportsTermsWhenAnotherLanguageSheetIsEmpty() {
void importFallsBackToEnglishColumnLabelsForUnknownLanguages() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator("de", false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -383,7 +351,6 @@ void importSupportsTermIdentifiers() {
vocabulary.setUri(URI.create("http://example.com"));
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down Expand Up @@ -414,7 +381,6 @@ void importSupportsPrefixedTermIdentifiers() {
vocabulary.setUri(URI.create("http://example.com"));
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down Expand Up @@ -444,7 +410,6 @@ void importSupportsPrefixedTermIdentifiers() {
void importAdjustsTermIdentifiersToUseExistingVocabularyIdentifierAndSeparatorAsNamespace() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand All @@ -467,7 +432,6 @@ void importRemovesExistingInstanceWhenImportedTermAlreadyExists() {
vocabulary.setUri(URI.create("http://example.com"));
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);
final Term existingBuilding = Generator.generateTermWithId();
existingBuilding.setUri(URI.create("http://example.com/terms/building"));
final Term existingConstruction = Generator.generateTermWithId();
Expand All @@ -494,7 +458,6 @@ void importSupportsReferencesToOtherVocabulariesViaTermIdentifiersWhenReferenced
vocabulary.setUri(URI.create("http://example.com"));
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);
when(termService.exists(any())).thenReturn(false);
when(termService.exists(URI.create("http://example.com/another-vocabulary/terms/relatedMatch"))).thenReturn(
true);
Expand Down Expand Up @@ -524,7 +487,6 @@ void importSupportsReferencesToOtherVocabulariesViaTermIdentifiersWhenReferenced
void importResolvesTermStateAndTypesUsingLabels() {
when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true);
when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);
final Term type = Generator.generateTermWithId();
type.setUri(URI.create("http://onto.fel.cvut.cz/ontologies/ufo/object-type"));
type.setLabel(MultilingualString.create("Object Type", Constants.DEFAULT_LANGUAGE));
Expand Down Expand Up @@ -558,7 +520,6 @@ void importSetsConfiguredInitialTermStateWhenSheetDoesNotSpecifyIt() {
MultilingualString.create("Proposed term", Constants.DEFAULT_LANGUAGE), null,
"http://onto.fel.cvut.cz/ontologies/slovník/agendový/popis-dat/pojem/stav-pojmu");
when(languageService.getInitialTermState()).thenReturn(Optional.of(state));
initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false);

final Vocabulary result = sut.importVocabulary(
new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),
Expand Down

0 comments on commit e65e256

Please sign in to comment.