Language Spec

Constants: 1, +, "sameer"
Variables: x, y
Function Applications: f(x)
Typed Lambda Expressions: \lambda x: T . f x
Namespaces/Packages: nlp
Directives: @chain
Comments: //

Types can be expressions.

Keywords

val
lambda/defun
memberOf, isA, elemOf, in

Constants

Core Constants

arithematic: +,-,dot,*,/,
optimization: argmax
boolean: and, or, neg, =>
set: union, intersect, size, \
e_ or 1 or |1 (one-hot vector)
if-else
set: List -> Set
,: List x T -> List
,: T x T -> List[T]
,: Set X Set -> Set
I: Bool -> Double
filter (or suchthat): TermSet X -> Bool -> TermSet

Derived Constants

min, max, logZ, sample
map, reduce, filter
"learn"
sum, prod
set_diff
switch/case
<=> (or ===)
<~~~ is same as e()*I()

Macros

conditioning: m | x=x1, w=w1 is same as m(_,x1,w1)

Data Types

strings
ints
doubles
bools
vectors \in Any^\inf
tuples
sets
lists
symbols

Example Program

MLN Example

val persons = set('Anna, 'Bob)

val smokes = isa persons -> bools
val cancer = isa persons -> bools
val friends = isa (persons,persons) -> bools
val weights = isa vectors

val f1 = (smokes, cancer) -> sum(persons, p => e('smokingIsBad)*I(smokes(p) |=> cancer(p))))

val f2 = (smokes, friends) -> sum((persons,persons), (p1, p2) => e('peerPressure)*I(friends(p1, p2) |=> (smokes(p1) <=> smokes(p2)))))

val model = (smokes, cancer, friends, weights) -> (f1(smokes,cancer) + f2(smokes,friends)) dot weights

val best = argmax model | weights <- (0.1, 0.3)

Ideal Infix version

persons = set('Anna, 'Bob)
S = persons -> bools
C = persons -> bools
F = (persons, persons) -> bools
param smokes : S
param cancer : C
param friends : F
param weights : vectors

fun pred(p:persons, smokes, cancer) = e('smokingIsBad)*I(smokes(p) |=> cancer(p))
fun f1 = sum (p: persons) => e('smokingIsBad)*I(smokes(p) |=> cancer(p))
fun f1p = sum (p: persons) => pred(p, _, _)

fun f2(smokes, friends) = {
  sum (p1: persons, p2: persons) => e('peerPressure)*I(friends(p1, p2) |=> (smokes(p1) <=> smokes(p2)))
}

fun model = (f1 + f2) dot weights

fun model2(smokes, cancer, friends, weights) = (f1(_, _) + f2(smokes, _)) dot weights

fun conditioned = model | weights <- (0.1, 0.3)

best = argmax conditioned @bruteForceMax

Question: How could we observe a few smokes,cancer etc atoms? Use "so that"?

Another MLN Example (annotation-heavy spec)

val persons = set('Anna, 'Bob)

@hidden val smokes = persons ~> boolean
@observed val friends= (persons, persons)~> boolean

@model
def mln={
 @weight 1.0
 val f1= friends('x, 'y) |=>(smokes('x)<=>smokes('y))
 @weight 1.5
 val f2= friends('x, 'y)
}
val best = argmax mln @Weights(0.1, 0.3)

Chunking Example

Words = Strings
Chunks = set("O", "B-VP", "B-NP", "B-PP", "I-VP", "I-NP", "I-PP", "B-SBAR", "I-SBAR", "B-ADJP", "I-ADJP")

param word : Seq(Words)
param chunk : Seq(Chunks)
param weights : Vectors

fun bias = sum (i : chunk.domain) => e('bias, chunk(i))
fun emission where chunk.domain == word.domain = sum (i : chunk.domain) => e('emission, chunk(i), word(i))
fun trans = sum (i: 0 .. chunk.length-1) => e('transmission, chunk(i), chunk(i+1))

fun model = (bias + emission + trans) dot weights

Example = Seq(Words),Seq(Chunk)
fun loss(data: Seq[Example],w: Vectors) =
    sum (d <- data) =>  (max model | word=d_1,weights=w) - (model | word=d_1,chunk=d_2,weights=w)
fun loss(data: Seq[Example],w: vectors) =
    sum (d <- data) => {
      conditioned = model | word=d_1,weights=w
      (max conditioned) - conditioned | chunk=d_2
    }
param data: Seq[Example]
fun predict = max(chunk st chunk.domain=word.domain -> model)
fun loss = sum (d <- data) => predict(d_1,_) - model(d_1,d_2,_)
fun learned = argmin(weights -> loss)
fun predictor = predict | weights=learned

MLN in Haskell

-- data type definition
type Person = String
data Observed = Observed {friends:: (Person,Person) -> Bool}
data Hidden = Hidden {smokes::Person -> Bool, cancer::Person -> Bool}

-- data lists 
persons = ["Anna","Bob"]
hidden = [Hidden s c | s <- funs(persons,bools), c <- funs(persons,bools)] -- this is the search space
 
-- model
mln(Observed friends, Hidden smokes cancer, weights) =
   let f1 = sum [ feat([1], ind $ not(smokes(p)) || cancer(p)) | p<-persons ]
       f2 = sum [ feat([2], ind $ not(friends(p1,p2)) || (smokes(p1) == smokes(p2))) | p1 <- persons, p2 <- persons ]   
   in dot(f1 + f2,weights)

-- observation
fr ("Anna","Bob") = True
fr _ = False 

-- weights
w = M.fromList [([1],2.0)]

-- inference
prediction1 = argmax (\y -> mln(Observed fr, y, w)) hidden 
prediction2 = argmax (\y -> mln(Observed fr, y, w)) [Hidden s c | Hidden s c <- hidden, s("Anna"),c("Anna")]  
prediction3 = argmax (\y -> mln(Observed fr, y, w)) [y | y <- hidden, smokes(y)("Anna"),cancer(y)("Anna")]

Equivalent code in Scala

type Person = String

case class Observed(friends: (Person, Person) => Boolean)
case class Hidden(smokes: Person => Boolean, cancer: Person => Boolean)

val persons = Set('Anna, 'Bob)

def mln(friends: Observed, hidden: Hidden, weights: Vector) = {

  val smokes = hidden.smokes
  val cancer = hidden.cancer

  def f1 = sum (persons) { p => feat(1, indicator(smokes(p) -> cancer(p))) }

  def f2 = sum (cartesian(persons, persons)) { p =>
    friends(p._1, p._2) -> (smokes(p._1) <-> smokes(p._2)) }

  (f1 + f2) dot weights
}

// observation
val f  = p => p match {
  case ('Anna, 'Bob) => true
  case _ => false
}

val weights = Vector(2.0, 0)

val hidden = for(smokes <- funs(persons, bools);
  cancer <- funs(persons, bools))
yield Hidden(smokes, cancer)

val prediction1 = argmax hidden { y => mln(fr, y, weights)}

val prediction2 = argmax (hidden filter {h => h.smokes('Anna) && h.cancer('Anna)}) (y => mln(fr, y, weights))

Compilable equivalent code in Racket

(define persons (set 'Anna 'Bob))

(define (mln smokes cancer friends weights)
  (define f1 (sum persons (λ p (feature 1 (implies (smokes p) (cancer p))))))
  (define f2 (sum (cartesian-product (list persons persons)) 
                  (λ (p q) (feature 2 (implies (friends p q) 
                                               (iff (smokes p) (smokes q)))))))
  (dot weights (vector+ f1 f2)))

(define weights (list 2 0))

(define (friends x y)
  (if (and (equal? x 'Anna) (equal? y 'Bob)) true
      false))
      
(define all-smokes-cancer-functions
   (cartesian-product (funs persons booleans)
                      (funs persons booleans)))

(define prediction1 
  (argmax all-smokes-cancer-functions
          (λ (smokes cancer) (mln smokes cancer friends weights))))


(define prediction2 
  (argmax (filter all-smokes-cancer-functions
                  (λ (f1 f2) (and (f1 'Anna) (f2 'Anna))))
           (λ (smokes cancer) (mln smokes cancer friends weights))))

CRF in Haskell

-- Data structures
data Observed = Observed { word :: [String], tag :: [String]}
data Hidden = Hidden { chunk :: [String], ner :: [String] } 

-- Search space of hidden variable for a given observation
hidden x = let n = length(word(x)) in [Hidden chunk ner | chunk <- seqs ["VP","NP"] n, ner <- seqs ["PER","ORG"] n ]
 
-- Model
crf (Observed word tag, Hidden chunk ner, weights) = 
    let n =        length(word)
        bias =     sum [ ft(1,[chunk !! i],1.0) | i <- [0 .. n] ]
        emission = sum [ ft(2,[chunk !! i, word !! i], 1.0) | i <- [0 .. n] ] 
        trans =    sum [ ft(3,[chunk !! i, chunk !! (i + 1)], 1.0) | i <- [0 .. n - 1]]  
    in dot(bias + emission + trans,weights)

-- Predictor 
predict w x = argmax(\y -> crf(x,y,w)) (hidden x)

-- Loss, this is generic and can be re-used!
perceptronLoss trainset model weights space = 
     sum [(max (\y -> model(x,y,weights)) (space x)) - model(x,g,weights) | (x,g) <- trainset] 

-- training function, takes a training set and returns a predictor
train trainset = \x -> predict (argmin (\w -> perceptronLoss trainset crf w hidden) (vectors)) x

CRF in native Scala

case class Observed(words:Seq[String], tags:Seq[String]) 
case class Hidden(chunks:Seq[String])
def space(x:Observed) = allSeqs(chunkLabels,x.words.length) 

def crf(x:Observed, y:Hidden, w:Vector, weights)  = {
     val n = x.words.size
     val f1 = sum { for (i <- 0 until n) yield feat(y.chunk(i)) }  
     val f2 = sum { for (i <- 0 until n - 1) yield feat(y.chunk(i), y.chunk(i+1)) } 
     (f1 + f2) dot weights
} 
 
def loss[X,Y](data:Seq[(X,Y)],model:(X,Y,Vector) => Double,weights:Vector,space:X=>Seq[Y]) = {
     sum{ for ((x,g) <- data) yield max(hidden(x)) { y => model(x,y,weights)} - model(x,g,weights) }
} 

def train(data:Seq[(X,Y)]) = argmin(vectors) { w => loss(data,crf,w, space) } 

def predict(x:Observed,w:Weights) = argmax(space(x)) { y => crf(x,y,w) }

def predictor(data:Seq[(X,Y)]) = x:Observed => predict(x,train(data))

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Language Spec

Keywords

Constants

Core Constants

Derived Constants

Macros

Data Types

Example Program

MLN Example

Ideal Infix version

Another MLN Example (annotation-heavy spec)

Chunking Example

MLN in Haskell

Equivalent code in Scala

Compilable equivalent code in Racket

CRF in Haskell

CRF in native Scala

Clone this wiki locally