From fe8f41f704e8802a6d1d30cc7107062d338489b0 Mon Sep 17 00:00:00 2001 From: Robert Vacareanu Date: Tue, 30 Apr 2024 09:46:41 -0700 Subject: [PATCH] Adapter for Scala Grounders (rvacareanu/grounder) (#887) ## Summary of Changes Added an Adapter for the Scala Grounders package. --------- Co-authored-by: Gus Hahn-Powell --- skema/text_reading/scala/build.sbt | 1 + .../grounding/GrounderFactory.scala | 9 +- .../grounding/GroundingCandidate.scala | 2 +- .../grounding/scala_grounders/Adapter.scala | 82 +++++++++++++++++++ .../scala_grounders/TestAdapter.scala | 62 ++++++++++++++ 5 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/scala_grounders/Adapter.scala create mode 100644 skema/text_reading/scala/src/test/scala/org/ml4ai/skema/grounding/scala_grounders/TestAdapter.scala diff --git a/skema/text_reading/scala/build.sbt b/skema/text_reading/scala/build.sbt index 95f9c6905db..2ee8cb8e4e7 100644 --- a/skema/text_reading/scala/build.sbt +++ b/skema/text_reading/scala/build.sbt @@ -37,6 +37,7 @@ libraryDependencies ++= { "com.lihaoyi" %% "requests" % "0.1.8", "io.cequence" %% "openai-scala-client" % "1.0.0.RC.1", "org.scalatest" %% "scalatest" % "3.0.9" % Test, + "org.clulab" %% "scala-grounders" % "0.0.35", ) } diff --git a/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GrounderFactory.scala b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GrounderFactory.scala index 5c31941192d..63a3fa50109 100644 --- a/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GrounderFactory.scala +++ b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GrounderFactory.scala @@ -5,6 +5,9 @@ import org.clulab.processors.Processor import org.clulab.processors.fastnlp.FastNLPProcessor import scala.collection.JavaConverters._ +import scala.io.Source +import org.ml4ai.skema.grounding.scala_grounders.ScalaGroundersAdapter + object GrounderFactory { @@ -52,7 +55,11 @@ object GrounderFactory { new PipelineGrounder(Seq(manualGrounder, grounder)) else grounder - case "manual" => manualGrounder + case "manual" => manualGrounder + case "scala-grounders" => + // Similar to `miraembeddings` + val ontologyFilePath = domainConfig.getString("ontologyPath") + ScalaGroundersAdapter.fromFile(groundingConceptsPath=ontologyFilePath) case other => throw new RuntimeException(s"$other - is not implemented as a grounding engine") } diff --git a/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GroundingCandidate.scala b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GroundingCandidate.scala index 69ae452689e..3d8116f3c75 100644 --- a/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GroundingCandidate.scala +++ b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/GroundingCandidate.scala @@ -9,7 +9,7 @@ import org.json4s.JsonDSL._ * @param concept instance returned by a grounder implementations * @param score of the grounding algorithm given to concept */ -case class GroundingCandidate(concept: GroundingConcept, score: Float) { +case class GroundingCandidate(concept: GroundingConcept, score: Float, details: Option[String] = None) { def toJValue: JValue = { ("groundingConcept" -> concept.toJValue) ~ diff --git a/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/scala_grounders/Adapter.scala b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/scala_grounders/Adapter.scala new file mode 100644 index 00000000000..01d4f7cc05c --- /dev/null +++ b/skema/text_reading/scala/src/main/scala/org/ml4ai/skema/text_reading/grounding/scala_grounders/Adapter.scala @@ -0,0 +1,82 @@ +package org.ml4ai.skema.grounding.scala_grounders + +import org.clulab.scala_grounders.grounding.GroundingConfig +import org.ml4ai.skema.text_reading.grounding.Grounder +import org.ml4ai.skema.text_reading.grounding.GroundingCandidate +import org.ml4ai.skema.text_reading.grounding.GroundingConcept +import com.typesafe.config.Config +import org.clulab.scala_grounders.grounding.SequentialGrounder +import org.clulab.scala_grounders.model.DKG +import org.clulab.scala_grounders.model.DKGSynonym +import com.typesafe.config.ConfigFactory +import org.clulab.scala_grounders.using +import org.clulab.scala_grounders.model.DKG +import scala.io.Source + + +/** + * This class adapts the data definitions from this project to work with scala-grounder's definition + * Concretely, the changes needed are: + * - SKEMA's GroundingConcept to scala-grounder's DKG (avalaible in `fromConceptToDKG`) + * - scala-grounder's DKG to SKEMA's GroundingConcept (avalaible in `fromDKGToConcept`) + * - Create the scala-grounder Grounder (`grounder = SequentialGrounder()`) + * - Changing `groundingCandidates` to call the right method from the scala-grounder side + * + * @param groundingConcepts -> The concepts which we will use to do the grounding + * Every candidate text for grounding (i.e. any text that we + * want to ground) will be grounded on these concepts + * (Note: depending on the implementation, it is possible that + * none of these groundingConcepts candidates are suitable, so + * we might not return anything; however, we will never return + * a concept that is outside this) + */ +class ScalaGroundersAdapter(groundingConcepts: Seq[GroundingConcept]) extends Grounder { + lazy val concepts = groundingConcepts.map(fromConceptToDKG) + lazy val grounder = SequentialGrounder().mkFast(concepts) + def groundingCandidates(texts: Seq[String], k: Int): Seq[Seq[GroundingCandidate]] = { + texts.map { text => + // TODO Maybe provide additional context (useful for NeuralGrounder) + grounder.ground(text, None, concepts, k) + .map { result => + GroundingCandidate(fromDKGToConcept(result.dkg), result.score, details = Some(result.groundingDetails.grounderName)) + } + .force.toSeq + } + } + + /** + * Transform a SKEMA's `GroundingConcept` to a scala-grounders' `DKG` + * They have similar meanings, so the map is 1:1 + * + * @param concept + * @return + */ + def fromConceptToDKG(concept: GroundingConcept): DKG = { + DKG(concept.id, concept.name, concept.description, concept.synonyms.map { synonyms => synonyms.map { s => DKGSynonym(s, None) } }.getOrElse(Seq.empty)) + } + + /** + * Transform a scala-grounder' `DKG` to SKEMA's `GroundingConcept` + * They have similar meanings, so the map is 1:1 + * + * @param dkg + * @return + */ + def fromDKGToConcept(dkg: DKG): GroundingConcept = { + GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None) + } + +} +/** + * Provide altenatives way of creating a `ScalaGroundersAdapter` + */ +object ScalaGroundersAdapter { + def apply(groundingConcepts: Seq[GroundingConcept]): ScalaGroundersAdapter = new ScalaGroundersAdapter(groundingConcepts) + def fromDkgs(dkgs: Seq[DKG]): ScalaGroundersAdapter = new ScalaGroundersAdapter(dkgs.map(dkg => GroundingConcept(dkg.id, dkg.name, dkg.description, Option(dkg.synonyms.map(_.value)), None))) + def fromFile(groundingConceptsPath: String): ScalaGroundersAdapter = { + val concepts = using(Source.fromFile(groundingConceptsPath)) { it => + ujson.read(it.mkString).arr.map(it => DKG.fromJson(it)) + } + ScalaGroundersAdapter.fromDkgs(concepts) + } +} diff --git a/skema/text_reading/scala/src/test/scala/org/ml4ai/skema/grounding/scala_grounders/TestAdapter.scala b/skema/text_reading/scala/src/test/scala/org/ml4ai/skema/grounding/scala_grounders/TestAdapter.scala new file mode 100644 index 00000000000..3a5beafbabb --- /dev/null +++ b/skema/text_reading/scala/src/test/scala/org/ml4ai/skema/grounding/scala_grounders/TestAdapter.scala @@ -0,0 +1,62 @@ +package org.ml4ai.skema.grounding.scala_grounders + +import org.ml4ai.skema.test.Test + +import java.nio.{Buffer, ByteBuffer, ByteOrder} + +import org.ml4ai.skema.text_reading.grounding.Grounder +import org.ml4ai.skema.text_reading.grounding.GroundingCandidate +import org.ml4ai.skema.text_reading.grounding.GroundingConcept + +/** + * + * Running command: + * sbt "testOnly org.ml4ai.skema.grounding.scala_grounders.TestAdapter" + */ +class TestAdapter extends Test { + + behavior of "ScalaGroundersAdapter" + + val gcs = Seq( + GroundingConcept( + id = "id1", + name = "dog", + description = Some("this is a cute dog"), + synonyms = None, + embedding = None + ), + GroundingConcept( + id = "id2", + name = "cat", + description = Some("this is a cute cat"), + synonyms = None, + embedding = None + ), + GroundingConcept( + id = "id3", + name = "dog cat", + description = Some("here we have a dog and a cat"), + synonyms = None, + embedding = None + ), + GroundingConcept( + id = "id4", + name = "cat", + description = Some("this is a cute cat"), + synonyms = None, + embedding = None + ), + ) + + val sga = new ScalaGroundersAdapter(gcs) + + val result = sga.groundingCandidates(Seq("dog"), 10).head + + // Check that the first one is a GroundingCandidate with id1 + it should "ground" in { + result.foreach(println) + result.head.concept.id should be ("id1") + } + + +}