Skip to content

Commit

Permalink
Change options that control the "update from ontology".
Browse files Browse the repository at this point in the history
Change the OWLHelper.UpdateMode enum to remove the following values:

* DELETE_MISSING_SUBJECT,
* DELETE_OBSOLETE_SUBJECT,
* DELETE_MISSING_OBJECT,
* DELETE_OBSOLETE_OBJECT

and replace them with the following values instead:

* DELETE_MISSING,
* DELETE_OBSOLETE,
* SUBJECT_ONLY,
* OBJECT_ONLY.

The new logic is that the DELETE_MISSING and DELETE_OBSOLETE mode will
operate indistinctly on both the subject side and the object side of
mappings, unless either SUBJECT_ONLY or OBJECT_ONLY are also specified
to instruct the updater to only consider one side.

SUBJECT_ONLY and OBJECT_ONLY also apply to the other update modes
(UPDATE_LABEL and UPDATE_SOURCE). For example, with a mode set to
SUBJECT_ONLY|UPDATE_LABEL, only the subject label will ever be updated,
even if the object also exists in the ontology.

The change is reflected in SSSOM-CLI's --update-from-ontology option,
which now accepts 3 flags to dictate what should be updated:

* label (update the labels),
* source (update the sources),
* existence (delete mapping if the subject or object does not exist);

and 2 flags to dictate whether to check/update only the object or only
the subject:

* subject (only update subject's label and source, and delete if subject
  is missing);
* object (only update object's label and source, and delete if object is
  missing).
  • Loading branch information
gouttegd committed Jul 12, 2024
1 parent c88cbc0 commit 60f3ad8
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 67 deletions.
38 changes: 33 additions & 5 deletions cli/src/main/java/org/incenp/obofoundry/sssom/cli/SimpleCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ private void addExcludeRule(String[] args) {

private static class OntologyOptions {
@Option(names = "--update-from-ontology",
paramLabel = "ONTOLOGY[:subject,object]",
paramLabel = "ONTOLOGY[:subject,object,label,source,existence]",
description = "Update the set using data from the specified ontology.")
String[] ontologiesForUpdate;

Expand Down Expand Up @@ -346,19 +346,47 @@ private void postProcess(MappingSet ms) {

String[] parts = ontFile.split(":", 2);
if ( parts.length == 2 ) {
boolean replace = false;
EnumSet<UpdateMode> setMode = EnumSet.noneOf(UpdateMode.class);
for ( String flag : parts[1].split(",") ) {
switch ( flag ) {
case "subject":
mode.add(UpdateMode.DELETE_MISSING_SUBJECT);
mode.add(UpdateMode.DELETE_OBSOLETE_SUBJECT);
setMode.add(UpdateMode.ONLY_SUBJECT);
break;

case "object":
mode.add(UpdateMode.DELETE_MISSING_OBJECT);
mode.add(UpdateMode.DELETE_OBSOLETE_OBJECT);
setMode.add(UpdateMode.ONLY_OBJECT);
break;

case "label":
replace = true;
setMode.add(UpdateMode.UPDATE_LABEL);
break;

case "source":
replace = true;
setMode.add(UpdateMode.UPDATE_SOURCE);
break;

case "existence":
replace = true;
setMode.add(UpdateMode.DELETE_MISSING);
setMode.add(UpdateMode.DELETE_OBSOLETE);
break;
}
}

if ( setMode.contains(UpdateMode.ONLY_SUBJECT) && setMode.contains(UpdateMode.ONLY_OBJECT) ) {
// Accept "subject,object" as meaning that we want to check both sides
setMode.remove(UpdateMode.ONLY_SUBJECT);
setMode.remove(UpdateMode.ONLY_OBJECT);
}

if ( replace ) {
mode = setMode;
} else {
mode.addAll(setMode);
}
}

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -345,11 +345,13 @@ void testUpdateFromOntology() throws IOException {

// Deleting missing subjects
TestUtils.runCommand(0, new String[] { "exo2c.sssom.tsv" }, "exo2c-subject-checked-against-ont1.sssom.tsv",
new String[] { "--update-from-ontology", "../core/src/test/resources/owl/ont1.ofn:subject" });
new String[] { "--update-from-ontology",
"../core/src/test/resources/owl/ont1.ofn:subject,existence,label" });

// Deleting missing objects
TestUtils.runCommand(0, new String[] { "exo2c.sssom.tsv" }, "exo2c-object-checked-against-ont1.sssom.tsv",
new String[] { "--update-from-ontology", "../core/src/test/resources/owl/ont1.ofn:object" });
new String[] { "--update-from-ontology",
"../core/src/test/resources/owl/ont1.ofn:object,existence,label" });
}

@Test
Expand Down
85 changes: 45 additions & 40 deletions core/src/main/java/org/incenp/obofoundry/sssom/owl/OWLHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import java.util.ArrayList;
import java.util.EnumSet;
import java.util.function.Consumer;

import org.incenp.obofoundry.sssom.model.Mapping;
import org.incenp.obofoundry.sssom.model.MappingSet;
Expand Down Expand Up @@ -129,47 +130,24 @@ public static void updateMappingSet(MappingSet ms, OWLOntology ontology, String
* @param mode What to update in the mapping set.
*/
public static void updateMappingSet(MappingSet ms, OWLOntology ontology, String language, boolean langStrict, EnumSet<UpdateMode> mode) {
IRI ontologyIRI = ontology.getOntologyID().getOntologyIRI().orNull();
ArrayList<Mapping> mappings = new ArrayList<Mapping>();

for ( Mapping m : ms.getMappings() ) {
IRI subject = IRI.create(m.getSubjectId());
IRI object = IRI.create(m.getObjectId());
boolean keep = true;

if ( ontology.containsEntityInSignature(subject, Imports.INCLUDED) ) {
if ( isObsolete(ontology, subject) && mode.contains(UpdateMode.DELETE_OBSOLETE_SUBJECT) ) {
if ( !mode.contains(UpdateMode.ONLY_OBJECT) ) {
if ( !updateForEntity(m, IRI.create(m.getSubjectId()), ontology, language, langStrict, mode,
(s) -> m.setSubjectLabel(s),
(s) -> m.setSubjectSource(s)) ) {
keep = false;
}
if ( mode.contains(UpdateMode.UPDATE_LABEL) ) {
String label = getLabel(ontology, subject, language, langStrict);
if ( label != null ) {
m.setSubjectLabel(label);
}
}
if ( mode.contains(UpdateMode.UPDATE_SOURCE) && ontologyIRI != null ) {
m.setSubjectSource(ontologyIRI.toString());
}
}
else if ( mode.contains(UpdateMode.DELETE_MISSING_SUBJECT) ) {
keep = false;
}

if ( ontology.containsEntityInSignature(object, Imports.INCLUDED) ) {
if ( isObsolete(ontology, object) && mode.contains(UpdateMode.DELETE_OBSOLETE_OBJECT) ) {
if ( !mode.contains(UpdateMode.ONLY_SUBJECT) ) {
if ( !updateForEntity(m, IRI.create(m.getObjectId()), ontology, language, langStrict, mode,
(s) -> m.setObjectLabel(s), (s) -> m.setObjectSource(s)) ) {
keep = false;
}
if ( mode.contains(UpdateMode.UPDATE_LABEL) ) {
String label = getLabel(ontology, object, language, langStrict);
if ( label != null ) {
m.setObjectLabel(label);
}
}
if ( mode.contains(UpdateMode.UPDATE_SOURCE) && ontologyIRI != null ) {
m.setObjectSource(ontologyIRI.toString());
}
} else if ( mode.contains(UpdateMode.DELETE_MISSING_OBJECT) ) {
keep = false;
}

if ( keep ) {
Expand All @@ -180,6 +158,35 @@ else if ( mode.contains(UpdateMode.DELETE_MISSING_SUBJECT) ) {
ms.setMappings(mappings);
}

/*
* Helper method to update a single mapping against a given entity.
*/
private static boolean updateForEntity(Mapping mapping, IRI entity, OWLOntology ontology, String language,
boolean langStrict, EnumSet<UpdateMode> mode, Consumer<String> labelUpdater,
Consumer<String> sourceUpdater) {
boolean keep = true;
if ( ontology.containsEntityInSignature(entity, Imports.INCLUDED) ) {
if ( isObsolete(ontology, entity) && mode.contains(UpdateMode.DELETE_OBSOLETE) ) {
keep = false;
}
if ( mode.contains(UpdateMode.UPDATE_LABEL) ) {
String label = getLabel(ontology, entity, language, langStrict);
if ( label != null ) {
labelUpdater.accept(label);
}
}
if ( mode.contains(UpdateMode.UPDATE_SOURCE) ) {
IRI ontologyIRI = ontology.getOntologyID().getOntologyIRI().orNull();
if ( ontologyIRI != null ) {
sourceUpdater.accept(ontologyIRI.toString());
}
}
} else if ( mode.contains(UpdateMode.DELETE_MISSING) ) {
keep = false;
}
return keep;
}

/**
* Modes of operation for the
* {@link OWLHelper#updateMappingSet(MappingSet, OWLOntology, String, boolean, EnumSet)}
Expand All @@ -200,26 +207,24 @@ public enum UpdateMode {
UPDATE_SOURCE,

/**
* Removes any mapping whose subject does not exist in the ontology.
* Removes any mapping whose subject or object does not exist in the ontology.
*/
DELETE_MISSING_SUBJECT,
DELETE_MISSING,

/**
* Removes any mapping whose subject exists in the ontology but is marked as
* obsolete.
* Removes any mapping whose subject or object is marked as obsolete.
*/
DELETE_OBSOLETE_SUBJECT,
DELETE_OBSOLETE,

/**
* Removes any mapping whose object does not exist in the ontology.
* Only consider the subject side of a mapping.
*/
DELETE_MISSING_OBJECT,
ONLY_SUBJECT,

/**
* Removes any mapping whose object exists in the ontology but is marked as
* obsolete.
* Only consider the object side of a mapping.
*/
DELETE_OBSOLETE_OBJECT;
ONLY_OBJECT;

public static final EnumSet<UpdateMode> ALL = EnumSet.allOf(UpdateMode.class);
}
Expand Down
45 changes: 31 additions & 14 deletions core/src/site/apt/sssom-cli.apt
Original file line number Diff line number Diff line change
Expand Up @@ -116,26 +116,43 @@ sssom-cli --input set.sssom.tsv:metadata.yaml

The <<<--update-from-ontology>>> option allows checking and updating
the mapping set against an OWL ontology. It expects the filename of an
ontology in any format supported by the OWL API. If the subject
(respectively the object) of a mapping exists in the ontology, the
mapping’s <<<subject_label>>> (respectively <<<object_label>>>) field
will be updated if necessary to match the <<<rdfs:label>>> of the
corresponding entity in the ontology, and the <<<subject_source>>>
(respectively <<<object_source>>>) field will be set to the ontology’s
IRI.
ontology in any format supported by the OWL API.

The filename may be followed by a semi-colon and a list of
comma-separated flags (<<<:flag1,flag2,...>>>). There are currently
two available flags, which enable additional behaviours:
comma-separated flags (<<<:flag1,flag2,...>>>) which will control the
exact behaviour of the option.

[subject] Remove any mapping whose subject does not correspond to
an existing and non-deprecated entity in the ontology.
Available flags are:

[label] If the subject (respectively the object) of a mapping exists
in the ontology, the mapping’s <<<subject_label>>> (resp.
<<<object_label>>>) will be updated to match the
<<<rdfs:label>>> of the corresponding entity in the ontology.

[source] If the subject (respectively the object) of a mapping exists
in the ontology, the mapping’s <<<subject_source>>> (resp.
<<<object_source>>>) will be set to the ontology’s IRI.

[existence] If the subject or the object of a mapping does not exist
in the ontology or is deprecated, the mapping is removed
from the set.

[subject] Only consider the subject side of mappings when updating the
labels, the sources, and/or checking for existence.

[object] Only consider the object side of mappings when updating the
labels, the sources, and/or checking for existence.

[object] Remove any mapping whose object does not correspond to an
existing and non-deprecated entity in the ontology.

[]

If no flags are specified, the default flags are <<<label,source>>>.
If only a <<<subject>>> or <<<object>>> flag is specified, it is added
to the default flags (so, <<<:subject>>> is equivalent to
<<<:subject,label,source>>>). Any other flag resets the default flags;
so to check for existence in addition to updating the labels and the
sources, all corresponding flags must be explicitly specified
(<<<:existence,label,source>>>).

The <<<--update-from-ontology>>> option may be specified several times
to check a mapping set against several ontologies consecutively.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,11 +151,13 @@ void testDeleteMissingSubject() {
Assertions.assertEquals(2, ms.getMappings().size());

// Likewise if we select to remove OBSOLETE subjects
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE_SUBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false,
EnumSet.of(UpdateMode.DELETE_OBSOLETE, UpdateMode.ONLY_SUBJECT));
Assertions.assertEquals(2, ms.getMappings().size());

// Delete the second mapping
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_MISSING_SUBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false,
EnumSet.of(UpdateMode.DELETE_MISSING, UpdateMode.ONLY_SUBJECT));
Assertions.assertEquals(1, ms.getMappings().size());
Assertions.assertEquals(IRI_BASE + "no_label", ms.getMappings().get(0).getSubjectId());
}
Expand All @@ -171,11 +173,13 @@ void testDeleteMissingObject() {
Assertions.assertEquals(2, ms.getMappings().size());

// Likewise if we select to remove OBSOLETE objects
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE_OBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false,
EnumSet.of(UpdateMode.DELETE_OBSOLETE, UpdateMode.ONLY_OBJECT));
Assertions.assertEquals(2, ms.getMappings().size());

// Delete the second mapping
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_MISSING_OBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false,
EnumSet.of(UpdateMode.DELETE_MISSING, UpdateMode.ONLY_OBJECT));
Assertions.assertEquals(1, ms.getMappings().size());
Assertions.assertEquals(IRI_BASE + "no_label", ms.getMappings().get(0).getObjectId());
}
Expand All @@ -192,7 +196,7 @@ void testDeleteObsoleteSubject() {
factory.getOWLAnnotationProperty(OWLRDFVocabulary.OWL_DEPRECATED.getIRI()),
IRI.create(IRI_BASE + "no_label"), factory.getOWLLiteral(true)));

OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE_SUBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE));
Assertions.assertTrue(ms.getMappings().isEmpty());
}

Expand All @@ -208,7 +212,7 @@ void testDeleteObsoleteObject() {
factory.getOWLAnnotationProperty(OWLRDFVocabulary.OWL_DEPRECATED.getIRI()),
IRI.create(IRI_BASE + "no_label"), factory.getOWLLiteral(true)));

OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE_OBJECT));
OWLHelper.updateMappingSet(ms, ontology, null, false, EnumSet.of(UpdateMode.DELETE_OBSOLETE));
Assertions.assertTrue(ms.getMappings().isEmpty());
}

Expand Down

0 comments on commit 60f3ad8

Please sign in to comment.