Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Pairwise classifier for POS #246

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ lazy val commonSettings = Seq(
Resolver.mavenLocal,
"CogcompSoftware" at "http://cogcomp.cs.illinois.edu/m2repo/"
),
javaOptions ++= List("-Xmx6g"),
javaOptions ++= List("-Xmx6g", "-XX:+UseG1GC"),
libraryDependencies ++= Seq(
"edu.illinois.cs.cogcomp" % "LBJava" % "1.2.16" withSources,
"edu.illinois.cs.cogcomp" % "illinois-core-utilities" % cogcompNLPVersion withSources,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ object ConstrainedClassifier {
def constraint[HEAD <: AnyRef](f: HEAD => FirstOrderConstraint)(implicit headTag: ClassTag[HEAD]): LfsConstraint[HEAD] = {
val hash = f.hashCode()
ConstraintManager.getOrElseUpdate(hash, new LfsConstraint[HEAD] {
override def makeConstrainDef(x: HEAD): FirstOrderConstraint = f(x)
override def makeConstraintDef(x: HEAD): FirstOrderConstraint = f(x)
}).asInstanceOf[LfsConstraint[HEAD]]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,38 +96,39 @@ class FirstOrderConstraints(val r: FirstOrderConstraint) {

}

class LHSFirstOrderEqualityWithValueLBP(cls: Learner, t: AnyRef) {
class LHSFirstOrderEqualityWithValueLBP(learner: Learner, t: AnyRef) {

// probably we need to write here
// LHSFirstOrderEqualityWithValueLBP(cls : Learner, t : AnyRef) extends ConstraintTrait

val lbjRepr = new FirstOrderVariable(cls, t)
// This is the implicit variable in the ILP
val lbjVariable = new FirstOrderVariable(learner, t)

def is(v: String): FirstOrderConstraint = {
new FirstOrderEqualityWithValue(true, lbjRepr, v)
new FirstOrderEqualityWithValue(true, lbjVariable, v)
}

//TODO: not sure if this works correctly. Make sure it works.
def is(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = {
new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr)
new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable)
}

def isTrue: FirstOrderConstraint = is("true")

def isNotTrue: FirstOrderConstraint = is("false")

def isNot(v: String): FirstOrderConstraint = {
new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjRepr, v))
new FirstOrderNegation(new FirstOrderEqualityWithValue(true, lbjVariable, v))
}

def isNot(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = {
new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr))
new FirstOrderNegation(new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable))
}

def in(v: Array[String]): FirstOrderConstraint = {
val falseConstant = new FirstOrderDisjunction(new FirstOrderConstant(false), new FirstOrderConstant(false))
v.foldRight(falseConstant) { (value, newConstraint) =>
new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjRepr, value), newConstraint)
new FirstOrderDisjunction(new FirstOrderEqualityWithValue(true, lbjVariable, value), newConstraint)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ import scala.reflect.ClassTag

abstract class LfsConstraint[T <: AnyRef](implicit val tag: ClassTag[T]) {

def makeConstrainDef(x: T): FirstOrderConstraint
def makeConstraintDef(x: T): FirstOrderConstraint

def evalDiscreteValue(t: T): String = {
this.makeConstrainDef(t).evaluate().toString
this.makeConstraintDef(t).evaluate().toString
}

def apply(t: T) = makeConstrainDef(t)
def apply(t: T) = makeConstraintDef(t)

def transfer: ParameterizedConstraint = {
new ParameterizedConstraint() {
override def makeConstraint(__example: AnyRef): FirstOrderConstraint = {
val t: T = __example.asInstanceOf[T]
makeConstrainDef(t)
makeConstraintDef(t)
}

override def discreteValue(__example: AnyRef): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ trait DataModel {
r => r.to.tag.toString.equals(tag.toString) && r.from.tag.toString.equals(headTag.toString)
}
if (r.isEmpty) {
throw new Exception(s"Failed to found relations between $tag to $headTag")
throw new Exception(s"Failed to find relations between $tag to $headTag")
} else r flatMap (_.asInstanceOf[Edge[NEED, FROM]].backward.neighborsOf(t)) distinct
} else r flatMap (_.asInstanceOf[Edge[FROM, NEED]].forward.neighborsOf(t)) distinct
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ case class DiscreteProperty[T <: AnyRef](

private def _discreteValue(__example: AnyRef): String = {
val t: T = __example.asInstanceOf[T]
self.sensor(t).mkString("")
self.sensor(t)
}
}
case _ => new ClassifierContainsInLBP {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger

import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
import java.io.PrintStream

import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent }
import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner
import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner }
import edu.illinois.cs.cogcomp.saul.classifier.Learnable
import edu.illinois.cs.cogcomp.lbjava.classify.{ FeatureVector, ScoreSet }
import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderConstraint, OJalgoHook }
import edu.illinois.cs.cogcomp.lbjava.learn.{ Learner, SparseAveragedPerceptron, SparseNetworkLearner }
import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable }
import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._
import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors
import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSDataModel._

object POSClassifiers {
Expand All @@ -19,6 +27,31 @@ object POSClassifiers {
POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue
}

object POSMixedClassifier extends Learner {
override def write(out: PrintStream): Unit = ???

override def scores(exampleFeatures: Array[Int], exampleValues: Array[Double]): ScoreSet = ???

override def classify(exampleFeatures: Array[Int], exampleValues: Array[Double]): FeatureVector = ???

override def learn(exampleFeatures: Array[Int], exampleValues: Array[Double], exampleLabels: Array[Int], labelValues: Array[Double]): Unit = ???

override def discreteValue(obj: Object): String = {
val x = obj.asInstanceOf[Constituent]
if (BaselineClassifier.classifier.observed(wordForm(x)))
POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue
else
POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue
}
}

def POSClassifierScoreSet(x: Constituent): ScoreSet = {
if (BaselineClassifier.classifier.observed(wordForm(x)))
POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x)))
else
POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x))
}

object POSTaggerKnown extends Learnable[Constituent](POSDataModel) {
def label = POSLabel
override def feature = using(wordForm, baselineTarget, labelTwoBefore, labelOneBefore,
Expand All @@ -29,7 +62,6 @@ object POSClassifiers {
p.thickness = 2
baseLTU = new SparseAveragedPerceptron(p)
}
override val loggging = true
}

object POSTaggerUnknown extends Learnable[Constituent](POSDataModel) {
Expand All @@ -42,20 +74,78 @@ object POSClassifiers {
p.thickness = 4
baseLTU = new SparseAveragedPerceptron(p)
}
override val loggging = true
}

object BaselineClassifier extends Learnable[Constituent](POSDataModel) {
def label = POSLabel
override def feature = using(wordForm)
override lazy val classifier = new POSBaselineLearner()
override val loggging = true
}

object MikheevClassifier extends Learnable[Constituent](POSDataModel) {
def label = POSLabel
override def feature = using(wordForm)
override lazy val classifier = new MikheevLearner
override val loggging = true
}

// Pairwise classifier
object BaselineClassifierPair extends Learnable[(Constituent, Constituent)](POSDataModel) {
def label = POSLabelPair
override def feature = using(wordFormPair)
override lazy val classifier = new POSBaselineLearner()
}

object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) {
def label = POSLabelPair
override def feature = using(POSBaselineScoresPair)
override lazy val classifier = new SparseNetworkLearner
}

// def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence =>
// val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation)
// constituents.sliding(3).toList._forall { cons: List[Constituent] =>
// consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2))
// }
// }

val posLabels = List("#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR",
"JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO",
"UH", "UNKNOWN", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``")

def consecutiveLabelPairsAreConsistent(c1: Constituent, c2: Constituent, c3: Constituent): FirstOrderConstraint = {
posLabels._exists { label: String =>
posRightLabelIs(label, c1, c2) ==> posLeftLabelIs(label, c2, c3)
}
}

def posRightLabelIs(label: String, c1: Constituent, c2: Constituent) = {
posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSeparator + label) }
}

def posLeftLabelIs(label: String, c1: Constituent, c2: Constituent) = {
posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSeparator + l) }
}

object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) {
override def subjectTo = sentenceLabelsMatch
override val solver = new OJalgoHook
override val pathToHead = Some(POSDataModel.tokenPairToSentence)
}

def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence =>
// val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation, s.getSentenceId)
// val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y)
// constituents.sliding(2).toList._forall {
// case c1 :: c2 :: _ =>
// posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) }
// }
//constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } }
new FirstOrderConstant(true)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bhargav so I fixed the bug you pointer out but didn't help .. :-/
Another thing is I tested by replacing it with a trivial constraints new FirstOrderConstant(true) which should always be feasible; still returning a weird result ...

}

def posClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = {
(POSMixedClassifier on c1).is(l1) and
(POSMixedClassifier on c2).is(l2) and
(POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2)
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,28 @@
package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger

import edu.illinois.cs.cogcomp.core.datastructures.ViewNames
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent }
import edu.illinois.cs.cogcomp.lbj.pos.POSLabeledUnknownWordParser
import edu.illinois.cs.cogcomp.saul.datamodel.DataModel
import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ POSTaggerUnknown, POSTaggerKnown, BaselineClassifier }
import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSClassifiers.{ MikheevClassifier, POSTaggerUnknown, POSTaggerKnown, BaselineClassifier }

object POSDataModel extends DataModel {

val sentence = node[Sentence]

val tokens = node[Constituent]

val tokenPair = node[(Constituent, Constituent)]

val tokenToTokenPair = edge(tokens, tokenPair)
tokenToTokenPair.addSensor({ x: (Constituent) => (x, POSTaggerSensors.getConstituentAfter(x)) })

val tokenToSentence = edge(tokens, sentence)
tokenToSentence.addSensor({ x: (Constituent) => x.getTextAnnotation.getSentence(x.getSentenceId) })

val tokenPairToSentence = edge(tokenPair, sentence)
tokenPairToSentence.addSensor({ x: (Constituent, Constituent) => x._1.getTextAnnotation.getSentence(x._1.getSentenceId) })

import POSTaggerSensors._

val constituentAfter = edge(tokens, tokens)
Expand Down Expand Up @@ -187,4 +200,35 @@ object POSDataModel extends DataModel {

r + "-" + s + "-" + t
}
}

// Pairwise classifier properties
val POSLabelPair = property(tokenPair) { x: (Constituent, Constituent) =>
POSLabel(x._1) + labelSeparator + POSLabel(x._2)
}

val wordFormPair = property(tokenPair) { x: (Constituent, Constituent) =>
wordForm(x._1) + wordForm(x._2)
}

val POSBaselineScoresPair = property(tokenPair) { x: (Constituent, Constituent) =>
List(BaselineClassifier(x._1), BaselineClassifier(x._2), BaselineClassifier(x._1) + BaselineClassifier(x._2))
}

val POSKnownScoresPair = property(tokenPair) { x: (Constituent, Constituent) =>
val scoreSet1 = scoreSetToList(POSTaggerKnown.classifier.scores(x._1))
val scoreSet2 = scoreSetToList(POSTaggerKnown.classifier.scores(x._2))
scoreSet1 ++ scoreSet2
}

val POSUnknownScoresPair = property(tokenPair) { x: (Constituent, Constituent) =>
val scoreSet1 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._1))
val scoreSet2 = scoreSetToList(POSTaggerUnknown.classifier.scores(x._2))
scoreSet1 ++ scoreSet2
}

val POSCombinedScoresPair = property(tokenPair) { x: (Constituent, Constituent) =>
val scoreSet1 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._1))
val scoreSet2 = scoreSetToList(POSClassifiers.POSClassifierScoreSet(x._2))
scoreSet1 ++ scoreSet2
}
}
Loading