Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Question Type Classification #455

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,8 @@ lazy val saulExamples = (project in file("saul-examples")).
ccgGroupId % "saul-pos-tagger-models" % "1.4",
ccgGroupId % "saul-er-models" % "1.8",
ccgGroupId % "saul-srl-models" % "1.3",
ccgGroupId % "saul-qaTypeClassification-models" % "2.0",
ccgGroupId % "qustionTypeClassification-resources" % "1.0",
"org.json" % "json" % "20140107",
"com.twitter" % "hbc-core" % "2.2.0",
"org.rogach" %% "scallop" % "2.0.5"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification

import edu.illinois.cs.cogcomp.annotation.Annotator
import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Constituent, SpanLabelView, TextAnnotation }
import edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager
import edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification.QuestionTypeClassificationClassifiers.{ CoarseTypeClassifier, FineTypeClassifier }

class QuestionTypeAnnotator(val finalViewName: String = "QUESTION_TYPE")
extends Annotator(finalViewName, Array(ViewNames.TOKENS, ViewNames.NER_CONLL,
ViewNames.SHALLOW_PARSE, ViewNames.POS, ViewNames.LEMMA)) {

override def initialize(rm: ResourceManager): Unit = {}

lazy val coarseClassifier = {
val c = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
c.modelDir = "models/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/models/"
c.load()
c
}

lazy val fineClassifier = {
val c = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
c.modelDir = "models/edu/illinois/cs/cogcomp/saulexamples/nlp/QuestionTypeClassification/models/"
c.load()
c
}

override def addView(ta: TextAnnotation): Unit = {
val question = QuestionTypeInstance(ta.getText, None, None, Some(ta))
QuestionTypeClassificationDataModel.question.populate(List(question)) // TODO: is this step necessary?
val view = new SpanLabelView(finalViewName, finalViewName, ta, 1.0)
val fineLabel = fineClassifier(question)
val fineScore = fineClassifier.classifier.scores(question).get(fineLabel)
val coarseLabel = coarseClassifier(question)
val coarseScore = coarseClassifier.classifier.scores(question).get(coarseLabel)
view.addConstituent(new Constituent(fineLabel, fineScore, finalViewName, ta, 0, ta.getTokens.length))
view.addConstituent(new Constituent(coarseLabel, coarseScore, finalViewName, ta, 0, ta.getTokens.length))
ta.addView(finalViewName, view)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ object QuestionTypeClassificationApp {
def evaluate(classifier: TypeClassifier) = {
populateInstances()
classifier.learn(20)
classifier.save()
classifier.test()
}

def classifySampleQuestions() = {
val coarseClassifier = new CoarseTypeClassifier(propertyList)
val coarseClassifier = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
coarseClassifier.load()
val fineClassifier = new FineTypeClassifier(propertyList)
val fineClassifier = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
fineClassifier.load()
import QuestionTypeClassificationSensors._
val rawQuestions = Seq(
Expand All @@ -49,53 +50,29 @@ object QuestionTypeClassificationApp {
pipeline.addView(ta, ViewNames.POS)
pipeline.addView(ta, ViewNames.SHALLOW_PARSE)
pipeline.addView(ta, ViewNames.NER_CONLL)
val questioin = QuestionTypeInstance(q, None, None, Some(ta))
val question = QuestionTypeInstance(q, None, None, Some(ta))
println(q)
println(coarseClassifier(questioin))
println(fineClassifier(questioin))
println(coarseClassifier(question))
println(fineClassifier(question))
}
}

val propertyList = List(
QuestionTypeClassificationDataModel.surfaceWords,
QuestionTypeClassificationDataModel.lemma,
QuestionTypeClassificationDataModel.pos,
QuestionTypeClassificationDataModel.chunks,
QuestionTypeClassificationDataModel.headChunks,
QuestionTypeClassificationDataModel.ner,
QuestionTypeClassificationDataModel.containsFoodterm,
QuestionTypeClassificationDataModel.containsMountain,
QuestionTypeClassificationDataModel.containsProfession,
QuestionTypeClassificationDataModel.numberNormalizer,
QuestionTypeClassificationDataModel.wordnetSynsetsFirstSense,
QuestionTypeClassificationDataModel.wordnetSynsetsAllSenses,
QuestionTypeClassificationDataModel.wordnetLexicographerFileNamesFirstSense,
QuestionTypeClassificationDataModel.wordnetLexicographerFileNamesAllSenses,
QuestionTypeClassificationDataModel.wordnetHypernymFirstSenseLexicographerFileNames,
QuestionTypeClassificationDataModel.wordnetHypernymAllSensesLexicographerFileNames,
QuestionTypeClassificationDataModel.wordnetHypernymsFirstSense,
QuestionTypeClassificationDataModel.wordnetHypernymsAllSenses,
QuestionTypeClassificationDataModel.wordnetPointersFirstSense,
QuestionTypeClassificationDataModel.wordnetSynonymsFirstSense,
QuestionTypeClassificationDataModel.wordnetSynonymsAllSenses,
QuestionTypeClassificationDataModel.wordnetSynonymsAllSenses,
QuestionTypeClassificationDataModel.wordGroups
)

def coarseClassifier(): Unit = {
val classifier = new CoarseTypeClassifier(propertyList)
val classifier = new CoarseTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
evaluate(classifier)
}

def fineClassifier(): Unit = {
val classifier = new FineTypeClassifier(propertyList)
val classifier = new FineTypeClassifier(QuestionTypeClassificationDataModel.propertyList)
evaluate(classifier)
}

def main(args: Array[String]): Unit = {
val parser = new ArgumentParser(args)
parser.experimentType() match {
case 1 => coarseClassifier()
case 1 =>
coarseClassifier()
fineClassifier()
case 2 => fineClassifier()
case 3 => classifySampleQuestions()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ object QuestionTypeClassificationDataModel extends DataModel {

val containsProfession = property(question) { x: QuestionTypeInstance =>
val lemmas = x.textAnnotationOpt.get.getView(ViewNames.LEMMA).getConstituents.asScala.map { _.getSurfaceForm }.toList
lemmas.exists(lemma => QuestionTypeClassificationSensors.professons.contains(lemma)).toString
lemmas.exists(lemma => QuestionTypeClassificationSensors.professions.contains(lemma)).toString
}

val containsFoodterm = property(question) { x: QuestionTypeInstance =>
Expand Down Expand Up @@ -144,4 +144,31 @@ object QuestionTypeClassificationDataModel extends DataModel {
val cons = x.textAnnotationOpt.get.getView(ViewNames.TOKENS).getConstituents.asScala.map { _.getSurfaceForm.toLowerCase.trim }.toSet
QuestionTypeClassificationSensors.wordGroupLists.collect { case (label, set) if set.intersect(cons).nonEmpty => label }
}

val propertyList = List(
surfaceWords,
lemma,
pos,
chunks,
headChunks,
ner,
containsFoodterm,
containsMountain,
containsProfession,
numberNormalizer,
wordnetSynsetsFirstSense,
wordnetSynsetsAllSenses,
wordnetLexicographerFileNamesFirstSense,
wordnetLexicographerFileNamesAllSenses,
wordnetHypernymFirstSenseLexicographerFileNames,
wordnetHypernymAllSensesLexicographerFileNames,
wordnetHypernymsFirstSense,
wordnetHypernymsAllSenses,
wordnetPointersFirstSense,
wordnetSynonymsFirstSense,
wordnetSynonymsAllSenses,
wordnetSynonymsAllSenses,
wordGroups
)

}
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ import java.io.File
import java.util.Properties

import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import edu.illinois.cs.cogcomp.core.io.LineIO
import edu.illinois.cs.cogcomp.nlp.common.PipelineConfigurator._
import edu.illinois.cs.cogcomp.saulexamples.nlp.TextAnnotationFactory

import scala.io.Source

object QuestionTypeClassificationSensors {
val dataFolder = "../data/QuestionTypeClassification/"
lazy val professons = Source.fromFile(new File(dataFolder + "prof.txt")).getLines().toSet
lazy val mountainKeywords = Source.fromFile(new File(dataFolder + "mount.txt")).getLines().toSet
lazy val foodKeywords = Source.fromFile(new File(dataFolder + "food.txt")).getLines().toSet

val resourceFolder = "lists/"
lazy val professions = openFileFromClassPath("prof.txt").toSet
lazy val mountainKeywords = openFileFromClassPath("mount.txt").toSet
lazy val foodKeywords = openFileFromClassPath("food.txt").toSet
lazy val pipeline = {
val settings = new Properties()
TextAnnotationFactory.disableSettings(settings, USE_SRL_NOM)
Expand All @@ -43,7 +43,8 @@ object QuestionTypeClassificationSensors {
}

def getInstances(fileName: String): List[QuestionTypeInstance] = {
val allLines = Source.fromFile(new File(dataFolder + fileName), "ISO-8859-1").getLines().toList
println("reading instances . . . ")
val allLines = openFileFromClassPath(fileName)
allLines.map { line =>
val split = line.split(" ")
val splitLabel = split(0).split(":")
Expand All @@ -57,18 +58,22 @@ object QuestionTypeClassificationSensors {
}
}

def getListOfFiles(dir: String): List[File] = {
val d = new File(dir)
if (d.exists && d.isDirectory) {
d.listFiles.filter(_.isFile).toList
} else {
List[File]()
}
import scala.collection.JavaConverters._

val fileList = List("At", "Why", "body", "currency", "eff", "last", "ord", "prod", "stand", "title",
"How", "abb", "cause", "date", "event", "letter", "other", "prof", "state", "unit",
"In", "act", "city", "def", "fast", "pastBe", "quot", "substance", "univ",
"InOn", "an", "code", "desc", "food", "loca", "peop", "religion", "symbol", "vessel",
"On", "anim", "color", "dimen", "group", "money", "perc", "singleBe", "tech", "weight",
"What", "art", "comp", "dise", "instrument", "mount", "plant", "speak", "temp", "word",
"Where", "be", "country", "dist", "job", "name", "popu", "speed", "term",
"Who", "big", "culture", "do", "lang", "num", "presentBe", "sport", "time")

def openFileFromClassPath(fileName: String): List[String] = {
LineIO.readFromClasspath(fileName).asScala.toList
}

lazy val wordGroupLists = {
val files = getListOfFiles(dataFolder + "publish/lists")
assert(files.nonEmpty, "list of files not found")
files.map { f: File => f.getName -> Source.fromFile(f).getLines().toSet.map { line: String => line.toLowerCase.trim } }
val wordGroupLists = {
fileList.map { f: String => f -> openFileFromClassPath(resourceFolder + f).toSet.map { line: String => line.toLowerCase.trim } }
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/** This software is released under the University of Illinois/Research and Academic Use License. See
* the LICENSE file in the root folder for details. Copyright (c) 2016
*
* Developed by: The Cognitive Computations Group, University of Illinois at Urbana-Champaign
* http://cogcomp.cs.illinois.edu/
*/
package edu.illinois.cs.cogcomp.saulexamples.nlp.QuestionTypeClassification

import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import org.scalatest._

class QuestionTypeAnnotatorTest extends FlatSpec with Matchers {

val questionTypeAnnotator = new QuestionTypeAnnotator()

"questionTypeClassifier " should " correctly add a view to TextAnnotation instances " in {

val rawQuestions = Seq(
"How's the weather in Champaign-Urbana?",
"How far is Champaign to Chicago?",
"Who found dinasours?", "Which day is Christmas?",
"What can be cured by cheap pizza?",
"What can be cured by cheese pizza?",
"Who is Michael?",
"When is Easter in 2017?"
)
import QuestionTypeClassificationSensors._
rawQuestions.foreach { q =>
val ta = pipeline.createBasicTextAnnotation("", "", q)
pipeline.addView(ta, ViewNames.LEMMA)
pipeline.addView(ta, ViewNames.POS)
pipeline.addView(ta, ViewNames.SHALLOW_PARSE)
pipeline.addView(ta, ViewNames.NER_CONLL)
questionTypeAnnotator.addView(ta)
ta.getAvailableViews.size() should be >= 7
println(ta.getView(questionTypeAnnotator.finalViewName))
}
}
}