Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ROUGE Score to TextAnalysis.jl #156

Merged
merged 25 commits into from
Jun 9, 2019
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/TextAnalysis.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ module TextAnalysis
export strip_numbers, strip_non_letters, strip_indefinite_articles, strip_definite_articles, strip_articles
export strip_prepositions, strip_pronouns, strip_stopwords, strip_sparse_terms, strip_frequent_terms, strip_html_tags
export SentimentAnalyzer
export jackknife_avg, listify_ngrams, weighted_lcs, FMeasureLCS
export rouge_l_summary, rouge_l_sentence, rouge_n

include("tokenizer.jl")
include("ngramizer.jl")
Expand All @@ -75,4 +77,7 @@ module TextAnalysis
include("sentiment.jl")
include("bayes.jl")
include("deprecations.jl")
include("utils.jl")
include("rouge.jl")
include("evaluate.jl")
djokester marked this conversation as resolved.
Show resolved Hide resolved
end
152 changes: 152 additions & 0 deletions src/rouge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#= ROUGE score implementation
djokester marked this conversation as resolved.
Show resolved Hide resolved
Lin C.Y. , 2004
Rouge: A package for automatic evaluation of summaries
Proceedings of the workshop on text summarization branches out (WAS 2004) (2004), pp. 25-26
Link to paper:
http://www.aclweb.org/anthology/W04-1013 =#

using TextAnalysis
using WordTokenizers
djokester marked this conversation as resolved.
Show resolved Hide resolved
function rouge_n(references, candidate, n, averaging = true)
#= It is a n-gram recall between a candidate summary
djokester marked this conversation as resolved.
Show resolved Hide resolved
and a set of reference summaries.

param references : list of reference strings
type references : Array{String,1}

param candidate : the candidate string
type (candidate) : Array{String,1}

n : length of ngram
type (n) : int

ngram_cand : list of ngrams in candidate
ngram_ref : list of ngrams in reference
r_lcs : recall factor
p_lcs : precision factor
rouge_recall : list containing all the rouge-n scores for
every reference against the candidate=#

ngram_cand = listify_ngrams(ngrams(StringDocument(candidate), n))
rouge_recall = []

for ref in references
matches = 0 #variable counting the no.of matching ngrams
ngram_ref = listify_ngrams(ngrams(StringDocument(ref), n))
print(ngram_ref)

for ngr in ngram_cand
if ngr in ngram_ref
matches += 1
end

end

push!(rouge_recall, matches/length(ngram_ref))

end

if averaging == true
rouge_recall = jackknife_avg(rouge_recall)
end

return(rouge_recall)

end


function rouge_l_sentence(references, candidate, beta=8, averaging = true)
#= It calculates the rouge-l score between the candidate
and the reference at the sentence level.

param references : list of reference strings
type references : Array{String,1}

param candidate : the candidate string
type (candidate) : Array{String,1}

param beta : user-defined parameter. Default value = 8
type (beta) : float

rouge_l_list : list containing all the rouge scores for
every reference against the candidate
r_lcs : recall factor
p_lcs : precision factor
score : rouge-l score between the reference sentence and
the candidate sentence =#

ngram_cand = tokenize(candidate)
rouge_l_list = []

for ref in references
ngram_ref = tokenize(ref)
r_lcs = weighted_lcs(ngram_ref, ngram_cand,true, false, sqrt)/length(ngram_ref)
p_lcs = weighted_lcs(ngram_ref, ngram_cand,true, false, sqrt)/length(ngram_cand)
score = FMeasureLCS(r_lcs, p_lcs, beta)
push!(rouge_l_list,score)

end
if averaging == true
rouge_l_list = jackknife_avg(rouge_l_list)
end
return rouge_l_list

end

function rouge_l_summary(references, candidate, beta, averaging=true)
#=It calculates the rouge-l score between the candidate
and the reference at the summary level.

param references : list of reference summaries. Each of the summaries
must be tokenized list of words
type (references) : list

param candidate : candidate summary tokenized into list of words
type (candidate) : list
param beta : user-defined parameter
type (beta) : float

rouge_l_list : list containing all the rouge scores for
every reference against the candidate

r_lcs : recall factor
p_lcs : precision factor
score : rouge-l score between a reference and the candidate
=#

rouge_l_list = []
cand_sent_list = split_sentences(candidate)

for ref in references
ref_sent_list = split_sentences(ref)
sum_value = 0

for ref_sent in ref_sent_list
l_ = []
arg1 = tokenize(ref)

for cand_sent in cand_sent_list
arg2 = tokenize(cand_sent)
d = tokenize(weighted_lcs(arg1, arg2, false, true, sqrt))
append!(l_,d)
end

print(l_)
sum_value = sum_value+length(unique(l_))

end

r_lcs = sum_value/length(tokenize(ref))
p_lcs = sum_value/length(tokenize(candidate))
score = FMeasureLCS(r_lcs, p_lcs, beta)
push!(rouge_l_list,score)

end

if averaging == true
rouge_l_list = jackknife_avg(rouge_l_list)
end

return rouge_l_list

end
158 changes: 158 additions & 0 deletions src/utils.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# JuliaText TextAnalysis.jl Utility Functions

function jackknife_avg(scores)

#= The jackknife is a resampling technique especially useful for variance and bias estimation.
Currently being used for averaging in ROUGE scores in evaluate.jl
:param scores: List of integers to average
:type scores: Array{Int64,1} =#

if length(collect(Set(scores))) == 1

#= In case the elements of the array are all equal=#
return scores[1]

else

#=store the maximum scores
from the m different sets of m-1 scores.
such that m is the len(score_list)=#

average = []

for i in scores
# dummy : list a particular combo of m-1 scores
dummy = [j for j in scores if i != j]
append!(average, max(dummy...))
end

return sum(average)/length(average)

end
end

function listify_ngrams(ngram_doc)
djokester marked this conversation as resolved.
Show resolved Hide resolved
flattened = []
for item in ngram_doc
for i in 1:item[2]
push!(flattened, item[1])
end
end
return flattened
end

function weighted_lcs(X, Y, weighted = true, return_string = false, f = sqrt)
#=This function returns the longest common subsequence
of two strings using the dynamic programming algorithm.
param X : first string in tokenized form
type (X) : Array{SubString{String},1}
param Y : second string in tokenized form
type (Y) : Array{SubString{String},1}
param weighted : Weighted LCS is done if weighted is True (default)
type (weighted) : Boolean
param return_string : Function returns weighted LCS length when set to False (default).
Function returns longest common substring when set to True.
type (return_string) : Boolean
param f: weighting function. The weighting function f must have the property
that f(x+y) > f(x) + f(y) for any positive integers x and y.
type (f) : generic function which takes a float as an input and returns a float.
=#

m, n = length(X), length(Y)
c_table = [zeros(n+1) for i in 1:m+1]
w_table = [zeros(n+1) for i in 1:m+1]

for i in 1:(m+1)

for j in 1:(n+1)

if i == 1 || j == 1
continue
djokester marked this conversation as resolved.
Show resolved Hide resolved

elseif X[i-1] == Y[j-1]

k = w_table[i-1][j-1]
if weighted == true
increment = (f(k+1)) - (f(k))
else
increment = 1
end
c_table[i][j] = c_table[i-1][j-1] + increment
w_table[i][j] = k + 1

else

if c_table[i-1][j] > c_table[i][j-1]
c_table[i][j] = c_table[i-1][j]
w_table[i][j] = 0 # no match at i,j
else
c_table[i][j] = c_table[i][j-1]
w_table[i][j] = 0 # no match at i,j
end

end

end

djokester marked this conversation as resolved.
Show resolved Hide resolved
end

lcs_length = (c_table[m+1][n+1])
if return_string == false
return lcs_length
end

if weighted == true
lcs_length = c_table[m][n]^(2)
end

lcs_length = round(Int64, lcs_length)
lcs_length = convert(Int64, lcs_length)
lcs = ["" for i in 1:(lcs_length+1)]
lcs[lcs_length+1] = ""
i = m+1
j = n+1
djokester marked this conversation as resolved.
Show resolved Hide resolved

while i>1 && j>1
if X[i-1] == Y[j-1]
lcs[lcs_length+1] = X[i-1]
i -= 1
j -= 1
lcs_length -= 1

elseif c_table[i-1][j] > c_table[i][j-1]
i -= 1
else
j -= 1
end
end

return (join(lcs, " ")) # the lcs string

end

function FMeasureLCS(RLCS, PLCS, beta=1)
djokester marked this conversation as resolved.
Show resolved Hide resolved
#=F-measure based on WLCS
djokester marked this conversation as resolved.
Show resolved Hide resolved

param beta : user defined parameter
type (beta) : float

param r_lcs : recall factor
type (r_lcs) : float

param p_lcs : precision factor
type (p_lcs) : float

score : f measure score between a candidate
and a reference
=#

try
djokester marked this conversation as resolved.
Show resolved Hide resolved
return ((1+beta^2)*RLCS*PLCS)/(RLCS+(beta^2)*PLCS)
catch ex
if ex isa DivideError
return 0
else
rethrow(ex)
end
end
end
19 changes: 19 additions & 0 deletions test/rouge.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
@testset "ROUGE" begin

candidate_sentence = "Brazil, Russia, China and India are growing nations"
candidate_summary = "Brazil, Russia, China and India are growing nations. They are all an important part of BRIC as well as regular part of G20 summits."

reference_sentences = ["Brazil, Russia, India and China are growing nations", "Brazil and India are two of the developing nations that are part of the BRIC"]
reference_summaries = ["Brazil, Russia, India and China are the next big poltical powers in the global economy. Together referred to as BRIC(S) along with South Korea.", "Brazil, Russia, India and China are together known as the BRIC(S) and have been invited to the G20 summit."]

@test rouge_l_summary(reference_summaries, candidate_summary, 8, true) == 0.42565327352779836

@test rouge_n(reference_summaries, candidate_summary, 1, true) == 0.5051282051282051
@test rouge_n(reference_summaries, candidate_summary, 2, true) == 0.1317241379310345

@test rouge_n(reference_sentences, candidate_sentence, 2, true) == 0.3492063492063492
@test rouge_n(reference_sentences, candidate_sentence, 2, true) == 0.6666666666666666

@test rouge_l_sentence(reference_sentences, candidate_sentence, 8, false) == 0.36164547980729794
djokester marked this conversation as resolved.
Show resolved Hide resolved

end