From e65e25652fef3f462d6474eb7371ae01640a52af Mon Sep 17 00:00:00 2001 From: Martin Ledvinka Date: Thu, 29 Aug 2024 14:45:40 +0200 Subject: [PATCH] [kbss-cvut/termit-ui#449] Resolve imported term identifier before persisting it so that existing one can be removed. If the template is used, it does not contain identifier column, so the id would be generated on persist. On repeated import, this leads to errors because the existing term is not removed before the new one is persisted. --- .../service/importer/excel/ExcelImporter.java | 36 ++++++++++++---- .../excel/LocalizedSheetImporter.java | 42 +++---------------- .../repository/TermRepositoryService.java | 4 +- .../importer/excel/ExcelImporterTest.java | 39 ----------------- 4 files changed, 34 insertions(+), 87 deletions(-) diff --git a/src/main/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporter.java b/src/main/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporter.java index 75d299c5e..a23228b71 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporter.java +++ b/src/main/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporter.java @@ -99,7 +99,6 @@ public Vocabulary importVocabulary(ImportConfiguration config, ImportInput data) } final Vocabulary targetVocabulary = vocabularyDao.find(config.vocabularyIri()).orElseThrow( () -> NotFoundException.create(Vocabulary.class, config.vocabularyIri())); - final String termNamespace = resolveVocabularyTermNamespace(targetVocabulary); try { List terms = Collections.emptyList(); Set rawDataToInsert = new HashSet<>(); @@ -114,17 +113,18 @@ public Vocabulary importVocabulary(ImportConfiguration config, ImportInput data) continue; } final LocalizedSheetImporter sheetImporter = new LocalizedSheetImporter( - new LocalizedSheetImporter.Services(termService, languageService, idResolver), - prefixMap, terms, termNamespace); + new LocalizedSheetImporter.Services(termService, languageService), + prefixMap, terms); terms = sheetImporter.resolveTermsFromSheet(sheet); rawDataToInsert.addAll(sheetImporter.getRawDataToInsert()); } - terms.stream().filter(t -> t.getUri() != null && termService.exists(t.getUri())).forEach(t -> { - LOG.trace("Term {} already exists. Removing old version.", t); - termService.forceRemove(termService.findRequired(t.getUri())); - // Flush changes to prevent EntityExistsExceptions when term is already managed in PC as different type (Term vs TermInfo) - em.flush(); - }); + terms.stream().peek(t -> t.setUri(resolveTermIdentifier(targetVocabulary, t))) + .filter(t -> termService.exists(t.getUri())).forEach(t -> { + LOG.trace("Term {} already exists. Removing old version.", t); + termService.forceRemove(termService.findRequired(t.getUri())); + // Flush changes to prevent EntityExistsExceptions when term is already managed in PC as different type (Term vs TermInfo) + em.flush(); + }); // Ensure all parents are saved before we start adding children terms.stream().filter(t -> Utils.emptyIfNull(t.getParentTerms()).isEmpty()) .forEach(root -> { @@ -173,6 +173,24 @@ private String resolveVocabularyTermNamespace(Vocabulary vocabulary) { config.getNamespace().getTerm().getSeparator()); } + private URI resolveTermIdentifier(Vocabulary vocabulary, Term term) { + final String termNamespace = resolveVocabularyTermNamespace(vocabulary); + if (term.getUri() == null) { + return idResolver.generateDerivedIdentifier(vocabulary.getUri(), + config.getNamespace().getTerm().getSeparator(), + term.getLabel().get(config.getPersistence().getLanguage())); + } + if (term.getUri() != null && !term.getUri().toString().startsWith(termNamespace)) { + LOG.trace( + "Existing term identifier {} does not correspond to the expected vocabulary term namespace {}. Adjusting the term id.", + Utils.uriToString(term.getUri()), termNamespace); + return idResolver.generateDerivedIdentifier(vocabulary.getUri(), + config.getNamespace().getTerm().getSeparator(), + term.getLabel().get(config.getPersistence().getLanguage())); + } + return term.getUri(); + } + /** * Checks whether this importer supports the specified media type. * diff --git a/src/main/java/cz/cvut/kbss/termit/service/importer/excel/LocalizedSheetImporter.java b/src/main/java/cz/cvut/kbss/termit/service/importer/excel/LocalizedSheetImporter.java index 325df13a6..6037978f4 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/importer/excel/LocalizedSheetImporter.java +++ b/src/main/java/cz/cvut/kbss/termit/service/importer/excel/LocalizedSheetImporter.java @@ -8,7 +8,6 @@ import cz.cvut.kbss.termit.dto.RdfsResource; import cz.cvut.kbss.termit.exception.importing.VocabularyImportException; import cz.cvut.kbss.termit.model.Term; -import cz.cvut.kbss.termit.service.IdentifierResolver; import cz.cvut.kbss.termit.service.export.util.TabularTermExportUtils; import cz.cvut.kbss.termit.service.language.LanguageService; import cz.cvut.kbss.termit.service.repository.TermRepositoryService; @@ -55,9 +54,6 @@ class LocalizedSheetImporter { private final LanguageService languageService; private final PrefixMap prefixMap; private final List existingTerms; - private final IdentifierResolver idResolver; - // Namespace expected based on the vocabulary into which the terms will be imported - private final String expectedTermNamespace; private Map attributeToColumn; private String langTag; @@ -68,14 +64,11 @@ class LocalizedSheetImporter { private final Set sheetIdentifiers = new HashSet<>(); private List rawDataToInsert; - LocalizedSheetImporter(Services services, PrefixMap prefixMap, List existingTerms, - String expectedTermNamespace) { + LocalizedSheetImporter(Services services, PrefixMap prefixMap, List existingTerms) { this.termRepositoryService = services.termRepositoryService(); this.languageService = services.languageService(); this.prefixMap = prefixMap; this.existingTerms = existingTerms; - this.idResolver = services.idResolver(); - this.expectedTermNamespace = expectedTermNamespace; existingTerms.stream().filter(t -> t.getUri() != null).forEach(t -> idToTerm.put(t.getUri(), t)); } @@ -141,7 +134,7 @@ private void findTerms(Sheet sheet) { final Row termRow = sheet.getRow(i); Term term = existingTerms.size() >= i ? existingTerms.get(i - 1) : new Term(); getAttributeValue(termRow, JsonLd.ID).ifPresent(id -> { - term.setUri(resolveTermUri(id)); + term.setUri(URI.create(prefixMap.resolvePrefixed(id))); if (sheetIdentifiers.contains(term.getUri())) { throw new VocabularyImportException( "Sheet " + sheet.getSheetName() + " contains multiple terms with the same identifier: " + id, @@ -171,31 +164,6 @@ private void findTerms(Sheet sheet) { LOG.trace("Found {} term rows.", i - 1); } - /** - * If an identifier column is found in the sheet, attempt to use it for term ids. - *

- * This methods first resolves possible prefixes and then ensures the identifier matches the expected namespace - * provided by the vocabulary into which we are importing. If it does not match, a new identifier is generated based - * on the expected namespace and the local name extracted from the identifier present in the sheet. - * - * @param id Identifier extracted from the sheet - * @return Valid term identifier matching target vocabulary - */ - private URI resolveTermUri(String id) { - // Handle prefix if it is used - id = prefixMap.resolvePrefixed(id); - final URI uriId = URI.create(id); - if (expectedTermNamespace.equals(IdentifierResolver.extractIdentifierNamespace(uriId))) { - return URI.create(id); - } else { - LOG.trace( - "Existing term identifier {} does not correspond to the expected vocabulary term namespace {}. Adjusting the term id.", - Utils.uriToString(uriId), expectedTermNamespace); - final String localName = IdentifierResolver.extractIdentifierFragment(uriId); - return idResolver.generateIdentifier(expectedTermNamespace, localName); - } - } - private void mapRowToTermAttributes(Term term, Row termRow) { getAttributeValue(termRow, SKOS.DEFINITION).ifPresent( d -> initSingularMultilingualString(term::getDefinition, term::setDefinition).set(langTag, d)); @@ -221,7 +189,8 @@ private void mapRowToTermAttributes(Term term, Row termRow) { rtm -> mapSkosMatchProperties(term, SKOS.RELATED_MATCH, splitIntoMultipleValues(rtm))); getAttributeValue(termRow, SKOS.EXACT_MATCH).ifPresent( exm -> mapSkosMatchProperties(term, SKOS.EXACT_MATCH, splitIntoMultipleValues(exm))); - getAttributeValue(termRow, JsonLd.TYPE).flatMap(t -> resolveTermType(t, term)).ifPresent(t -> term.setTypes(Set.of(t))); + getAttributeValue(termRow, JsonLd.TYPE).flatMap(t -> resolveTermType(t, term)) + .ifPresent(t -> term.setTypes(Set.of(t))); resolveTermState(getAttributeValue(termRow, Vocabulary.s_p_ma_stav_pojmu).orElse(null), term).ifPresent( term::setState); @@ -373,7 +342,6 @@ private static Set splitIntoMultipleValues(String value) { .collect(Collectors.toSet()); } - record Services(TermRepositoryService termRepositoryService, LanguageService languageService, - IdentifierResolver idResolver) { + record Services(TermRepositoryService termRepositoryService, LanguageService languageService) { } } diff --git a/src/main/java/cz/cvut/kbss/termit/service/repository/TermRepositoryService.java b/src/main/java/cz/cvut/kbss/termit/service/repository/TermRepositoryService.java index ab1022aae..8acf9a004 100644 --- a/src/main/java/cz/cvut/kbss/termit/service/repository/TermRepositoryService.java +++ b/src/main/java/cz/cvut/kbss/termit/service/repository/TermRepositoryService.java @@ -163,9 +163,9 @@ private void prepareTermForPersist(Term instance, URI vocabularyUri) { pruneEmptyTranslations(instance); } - private URI generateIdentifier(URI vocabularyUri, MultilingualString multilingualString) { + private URI generateIdentifier(URI vocabularyUri, MultilingualString termLabel) { return idResolver.generateDerivedIdentifier(vocabularyUri, config.getNamespace().getTerm().getSeparator(), - multilingualString.get(config.getPersistence().getLanguage())); + termLabel.get(config.getPersistence().getLanguage())); } private void addTermAsRootToGlossary(Term instance, URI vocabularyIri) { diff --git a/src/test/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporterTest.java b/src/test/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporterTest.java index 74bb3d7f5..6457a0f62 100644 --- a/src/test/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporterTest.java +++ b/src/test/java/cz/cvut/kbss/termit/service/importer/excel/ExcelImporterTest.java @@ -52,7 +52,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; @@ -133,7 +132,6 @@ void importThrowsVocabularyDoesNotExistExceptionWhenVocabularyIsNotFound() { void importCreatesRootTermsWithBasicAttributesFromEnglishSheet() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -155,34 +153,10 @@ void importCreatesRootTermsWithBasicAttributesFromEnglishSheet() { assertEquals("The process of building a building", construction.get().getDefinition().get("en")); } - private void initIdentifierGenerator(String lang, boolean forChild) { - doAnswer(inv -> { - final Term t = inv.getArgument(0); - if (t.getUri() != null) { - return null; - } - t.setUri(URI.create(vocabulary.getUri().toString() + "/" + IdentifierResolver.normalize( - t.getLabel().get(lang)))); - return null; - }).when(termService).addRootTermToVocabulary(any(Term.class), eq(vocabulary)); - if (forChild) { - doAnswer(inv -> { - final Term t = inv.getArgument(0); - if (t.getUri() != null) { - return null; - } - t.setUri(URI.create(vocabulary.getUri().toString() + "/" + IdentifierResolver.normalize( - t.getLabel().get(lang)))); - return null; - }).when(termService).addChildTerm(any(Term.class), any(Term.class)); - } - } - @Test void importCreatesRootTermsWithPluralBasicAttributesFromEnglishSheet() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -217,7 +191,6 @@ void importCreatesRootTermsWithPluralBasicAttributesFromEnglishSheet() { void importCreatesRootTermsWithBasicAttributesFromMultipleTranslationSheets() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -248,7 +221,6 @@ void importCreatesRootTermsWithBasicAttributesFromMultipleTranslationSheets() { void importCreatesRootTermsWithPluralBasicAttributesFromMultipleTranslationSheets() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -285,7 +257,6 @@ void importCreatesRootTermsWithPluralBasicAttributesFromMultipleTranslationSheet void importCreatesTermHierarchy() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, true); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -309,7 +280,6 @@ void importCreatesTermHierarchy() { void importSavesRelationshipsBetweenTerms() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -331,7 +301,6 @@ void importSavesRelationshipsBetweenTerms() { void importImportsTermsWhenAnotherLanguageSheetIsEmpty() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -357,7 +326,6 @@ void importImportsTermsWhenAnotherLanguageSheetIsEmpty() { void importFallsBackToEnglishColumnLabelsForUnknownLanguages() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator("de", false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -383,7 +351,6 @@ void importSupportsTermIdentifiers() { vocabulary.setUri(URI.create("http://example.com")); when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -414,7 +381,6 @@ void importSupportsPrefixedTermIdentifiers() { vocabulary.setUri(URI.create("http://example.com")); when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -444,7 +410,6 @@ void importSupportsPrefixedTermIdentifiers() { void importAdjustsTermIdentifiersToUseExistingVocabularyIdentifierAndSeparatorAsNamespace() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist), @@ -467,7 +432,6 @@ void importRemovesExistingInstanceWhenImportedTermAlreadyExists() { vocabulary.setUri(URI.create("http://example.com")); when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Term existingBuilding = Generator.generateTermWithId(); existingBuilding.setUri(URI.create("http://example.com/terms/building")); final Term existingConstruction = Generator.generateTermWithId(); @@ -494,7 +458,6 @@ void importSupportsReferencesToOtherVocabulariesViaTermIdentifiersWhenReferenced vocabulary.setUri(URI.create("http://example.com")); when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); when(termService.exists(any())).thenReturn(false); when(termService.exists(URI.create("http://example.com/another-vocabulary/terms/relatedMatch"))).thenReturn( true); @@ -524,7 +487,6 @@ void importSupportsReferencesToOtherVocabulariesViaTermIdentifiersWhenReferenced void importResolvesTermStateAndTypesUsingLabels() { when(vocabularyDao.exists(vocabulary.getUri())).thenReturn(true); when(vocabularyDao.find(vocabulary.getUri())).thenReturn(Optional.of(vocabulary)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Term type = Generator.generateTermWithId(); type.setUri(URI.create("http://onto.fel.cvut.cz/ontologies/ufo/object-type")); type.setLabel(MultilingualString.create("Object Type", Constants.DEFAULT_LANGUAGE)); @@ -558,7 +520,6 @@ void importSetsConfiguredInitialTermStateWhenSheetDoesNotSpecifyIt() { MultilingualString.create("Proposed term", Constants.DEFAULT_LANGUAGE), null, "http://onto.fel.cvut.cz/ontologies/slovnĂ­k/agendovĂ˝/popis-dat/pojem/stav-pojmu"); when(languageService.getInitialTermState()).thenReturn(Optional.of(state)); - initIdentifierGenerator(Constants.DEFAULT_LANGUAGE, false); final Vocabulary result = sut.importVocabulary( new VocabularyImporter.ImportConfiguration(false, vocabulary.getUri(), prePersist),