Skip to content
Larysa Visengeriyeva edited this page Jan 6, 2014 · 32 revisions
  • Constants: 1, +, "sameer"
  • Variables: x, y
  • Function Applications: f(x)
  • Typed Lambda Expressions: \lambda x: T . f x
  • Namespaces/Packages: nlp
  • Directives: @chain
  • Comments: //

Types can be expressions.

Keywords

  • val
  • lambda/defun
  • memberOf, isA, elemOf, in

Constants

Core Constants

  • arithematic: +,-,dot,*,/,
  • optimization: argmax
  • boolean: and, or, neg, =>
  • set: union, intersect, size, \
  • e_ or 1 or |1 (one-hot vector)
  • if-else
  • set: List -> Set
  • ,: List x T -> List
  • ,: T x T -> List[T]
  • ,: Set X Set -> Set
  • I: Bool -> Double
  • filter (or suchthat): TermSet X -> Bool -> TermSet

Derived Constants

  • min, max, logZ, sample
  • map, reduce, filter
  • "learn"
  • sum, prod
  • set_diff
  • switch/case
  • <=> (or ===)
  • <~~~ is same as e()*I()

Macros

  • conditioning: m | x=x1, w=w1 is same as m(_,x1,w1)

Data Types

  • strings
  • ints
  • doubles
  • bools
  • vectors \in Any^\inf
  • tuples
  • sets
  • lists
  • symbols

Example Program

MLN Example

val persons = set('Anna, 'Bob)

val smokes = isa persons -> bools
val cancer = isa persons -> bools
val friends = isa (persons,persons) -> bools
val weights = isa vectors

val f1 = (smokes, cancer) -> sum(persons, p => e('smokingIsBad)*I(smokes(p) |=> cancer(p))))

val f2 = (smokes, friends) -> sum((persons,persons), (p1, p2) => e('peerPressure)*I(friends(p1, p2) |=> (smokes(p1) <=> smokes(p2)))))

val model = (smokes, cancer, friends, weights) -> (f1(smokes,cancer) + f2(smokes,friends)) dot weights

val best = argmax model | weights <- (0.1, 0.3)

Ideal Infix version

persons = set('Anna, 'Bob)
S = persons -> bools
C = persons -> bools
F = (persons, persons) -> bools
param smokes : S
param cancer : C
param friends : F
param weights : vectors

fun pred(p:persons, smokes, cancer) = e('smokingIsBad)*I(smokes(p) |=> cancer(p))
fun f1 = sum (p: persons) => e('smokingIsBad)*I(smokes(p) |=> cancer(p))
fun f1p = sum (p: persons) => pred(p, _, _)

fun f2(smokes, friends) = {
  sum (p1: persons, p2: persons) => e('peerPressure)*I(friends(p1, p2) |=> (smokes(p1) <=> smokes(p2)))
}

fun model = (f1 + f2) dot weights

fun model2(smokes, cancer, friends, weights) = (f1(_, _) + f2(smokes, _)) dot weights

fun conditioned = model | weights <- (0.1, 0.3)

best = argmax conditioned @bruteForceMax

Question: How could we observe a few smokes,cancer etc atoms? Use "so that"?

Another MLN Example (annotation-heavy spec)

val persons = set('Anna, 'Bob)

@hidden val smokes = persons ~> boolean
@observed val friends= (persons, persons)~> boolean

@model
def mln={
 @weight 1.0
 val f1= friends('x, 'y) |=>(smokes('x)<=>smokes('y))
 @weight 1.5
 val f2= friends('x, 'y)
}
val best = argmax mln @Weights(0.1, 0.3)

Chunking Example

Words = Strings
Chunks = set("O", "B-VP", "B-NP", "B-PP", "I-VP", "I-NP", "I-PP", "B-SBAR", "I-SBAR", "B-ADJP", "I-ADJP")

param word : Seq(Words)
param chunk : Seq(Chunks)
param weights : Vectors

fun bias = sum (i : chunk.domain) => e('bias, chunk(i))
fun emission where chunk.domain == word.domain = sum (i : chunk.domain) => e('emission, chunk(i), word(i))
fun trans = sum (i: 0 .. chunk.length-1) => e('transmission, chunk(i), chunk(i+1))

fun model = (bias + emission + trans) dot weights

Example = Seq(Words),Seq(Chunk)
fun loss(data: Seq[Example],w: Vectors) =
    sum (d <- data) =>  (max model | word=d_1,weights=w) - (model | word=d_1,chunk=d_2,weights=w)
fun loss(data: Seq[Example],w: vectors) =
    sum (d <- data) => {
      conditioned = model | word=d_1,weights=w
      (max conditioned) - conditioned | chunk=d_2
    }
param data: Seq[Example]
fun predict = max(chunk st chunk.domain=word.domain -> model)
fun loss = sum (d <- data) => predict(d_1,_) - model(d_1,d_2,_)
fun learned = argmin(weights -> loss)
fun predictor = predict | weights=learned

MLN in Haskell

-- data type definition
type Person = String
data Observed = Observed {friends:: (Person,Person) -> Bool}
data Hidden = Hidden {smokes::Person -> Bool, cancer::Person -> Bool}

-- data lists 
persons = ["Anna","Bob"]
hidden = [Hidden s c | s <- funs(persons,bools), c <- funs(persons,bools)] -- this is the search space
 
-- model
mln(Observed friends, Hidden smokes cancer, weights) =
   let f1 = sum [ feat([1], ind $ not(smokes(p)) || cancer(p)) | p<-persons ]
       f2 = sum [ feat([2], ind $ not(friends(p1,p2)) || (smokes(p1) == smokes(p2))) | p1 <- persons, p2 <- persons ]   
   in dot(f1 + f2,weights)

-- observation
fr ("Anna","Bob") = True
fr _ = False 

-- weights
w = M.fromList [([1],2.0)]

-- inference
prediction1 = argmax (\y -> mln(Observed fr, y, w)) hidden 
prediction2 = argmax (\y -> mln(Observed fr, y, w)) [Hidden s c | Hidden s c <- hidden, s("Anna"),c("Anna")]  
prediction3 = argmax (\y -> mln(Observed fr, y, w)) [y | y <- hidden, smokes(y)("Anna"),cancer(y)("Anna")]  

Equivalent code in Scala

type Person = String

case class Observed(friends: (Person, Person) => Boolean)
case class Hidden(smokes: Person => Boolean, cancer: Person => Boolean)

val persons = Set('Anna, 'Bob)

def mln(friends: Observed, hidden: Hidden, weights: Vector) = {

  val smokes = hidden.smokes
  val cancer = hidden.cancer

  def f1 = sum (persons) { p => feat(1, indicator(smokes(p) -> cancer(p))) }

  def f2 = sum (cartesian(persons, persons)) { p =>
    friends(p._1, p._2) -> (smokes(p._1) <-> smokes(p._2)) }

  (f1 + f2) dot weights
}

// observation
val f  = p => p match {
  case ('Anna, 'Bob) => true
  case _ => false
}

val weights = Vector(2.0, 0)

val hidden = for(smokes <- funs(persons, bools);
  cancer <- funs(persons, bools))
yield Hidden(smokes, cancer)

val prediction1 = argmax hidden { y => mln(fr, y, weights)}

val prediction2 = argmax (hidden filter {h => h.smokes('Anna) && h.cancer('Anna)}) (y => mln(fr, y, weights))

Compilable equivalent code in Racket

(define persons (set 'Anna 'Bob))

(define (mln smokes cancer friends weights)
  (define f1 (sum persons (λ p (feature 1 (implies (smokes p) (cancer p))))))
  (define f2 (sum (cartesian-product (list persons persons)) 
                  (λ (p q) (feature 2 (implies (friends p q) 
                                               (iff (smokes p) (smokes q)))))))
  (dot weights (vector+ f1 f2)))

(define weights (list 2 0))

(define (friends x y)
  (if (and (equal? x 'Anna) (equal? y 'Bob)) true
      false))
      
(define all-smokes-cancer-functions
   (cartesian-product (funs persons booleans)
                      (funs persons booleans)))

(define prediction1 
  (argmax all-smokes-cancer-functions
          (λ (smokes cancer) (mln smokes cancer friends weights))))


(define prediction2 
  (argmax (filter all-smokes-cancer-functions
                  (λ (f1 f2) (and (f1 'Anna) (f2 'Anna))))
           (λ (smokes cancer) (mln smokes cancer friends weights))))

CRF in Haskell

-- Data structures
data Observed = Observed { word :: [String], tag :: [String]}
data Hidden = Hidden { chunk :: [String], ner :: [String] } 

-- Search space of hidden variable for a given observation
hidden x = let n = length(word(x)) in [Hidden chunk ner | chunk <- seqs ["VP","NP"] n, ner <- seqs ["PER","ORG"] n ]
 
-- Model
crf (Observed word tag, Hidden chunk ner, weights) = 
    let n =        length(word)
        bias =     sum [ ft(1,[chunk !! i],1.0) | i <- [0 .. n] ]
        emission = sum [ ft(2,[chunk !! i, word !! i], 1.0) | i <- [0 .. n] ] 
        trans =    sum [ ft(3,[chunk !! i, chunk !! (i + 1)], 1.0) | i <- [0 .. n - 1]]  
    in dot(bias + emission + trans,weights)

-- Predictor 
predict w x = argmax(\y -> crf(x,y,w)) (hidden x)

-- Loss, this is generic and can be re-used!
perceptronLoss trainset model weights space = 
     sum [(max (\y -> model(x,y,weights)) (space x)) - model(x,g,weights) | (x,g) <- trainset] 

-- training function, takes a training set and returns a predictor
train trainset = \x -> predict (argmin (\w -> perceptronLoss trainset crf w hidden) (vectors)) x 

CRF in native Scala

case class Observed(words:Seq[String], tags:Seq[String]) 
case class Hidden(chunks:Seq[String])
def space(x:Observed) = allSeqs(chunkLabels,x.words.length) 

def crf(x:Observed, y:Hidden, w:Vector, weights)  = {
     val n = x.words.size
     val f1 = sum { for (i <- 0 until n) yield feat(y.chunk(i)) }  
     val f2 = sum { for (i <- 0 until n - 1) yield feat(y.chunk(i), y.chunk(i+1)) } 
     (f1 + f2) dot weights
} 
 
def loss[X,Y](data:Seq[(X,Y)],model:(X,Y,Vector) => Double,weights:Vector,space:X=>Seq[Y]) = {
     sum{ for ((x,g) <- data) yield max(hidden(x)) { y => model(x,y,weights)} - model(x,g,weights) }
} 

def train(data:Seq[(X,Y)]) = argmin(vectors) { w => loss(data,crf,w, space) } 

def predict(x:Observed,w:Weights) = argmax(space(x)) { y => crf(x,y,w) }

def predictor(data:Seq[(X,Y)]) = x:Observed => predict(x,train(data))