-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval.py
93 lines (81 loc) · 3.04 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import pickle
import os
import collections
import sys
sys.path.append('./pycocoevalcap')
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.meteor.meteor import Meteor
#from pycocoevalcap.cider.cider import Cider
class Evaluate(object):
def __init__(self):
self.scorers = [
(Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
(Meteor(), "METEOR"),
(Rouge(), "ROUGE_L")
]#, (Cider(), "CIDEr")
def convert(self, data):
if isinstance(data, basestring):
return data.encode('utf-8')
elif isinstance(data, collections.Mapping):
return dict(map(convert, data.items()))
elif isinstance(data, collections.Iterable):
return type(data)(map(convert, data))
else:
return data
def score(self, ref, hypo):
final_scores = {}
for scorer, method in self.scorers:
score, scores = scorer.compute_score(ref, hypo)
if type(score) == list:
for m, s in zip(method, score):
final_scores[m] = s
else:
final_scores[method] = score
return final_scores
def evaluate(self, get_scores=True, live=False, **kwargs):
if live:
temp_ref = kwargs.pop('ref', {})
cand = kwargs.pop('cand', {})
else:
reference_path = kwargs.pop('ref', '')
candidate_path = kwargs.pop('cand', '')
# load caption data
with open(reference_path, 'rb') as f:
temp_ref = pickle.load(f)
with open(candidate_path, 'rb') as f:
cand = pickle.load(f)
# make dictionary
hypo = {}
ref = {}
i = 0
for vid, caption in cand.items():
hypo[i] = [caption]
ref[i] = temp_ref[vid]
i += 1
# compute scores
final_scores = self.score(ref, hypo)
#"""
# print out scores
print ('Bleu_1:\t', final_scores['Bleu_1'])
print ('Bleu_2:\t', final_scores['Bleu_2'])
print ('Bleu_3:\t', final_scores['Bleu_3'])
print ('Bleu_4:\t', final_scores['Bleu_4'])
print ('METEOR:\t', final_scores['METEOR'])
print ('ROUGE_L:', final_scores['ROUGE_L'])
#print ('CIDEr:\t', final_scores['CIDEr'])
# """
if get_scores:
return final_scores
if __name__ == '__main__':
'''
cands = {'generated_description1': 'how are you', 'generated_description2': 'Hello how are you'}
refs = {'generated_description1': ['what are you', 'where are you'],
'generated_description2': ['Hello how are you', 'Hello how is your day']}
'''
with open(sys.argv[1]) as f:
cands = {'generated_description'+str(i):x.strip() for i,x in enumerate(f.readlines())}
with open(sys.argv[2]) as f:
refs = {'generated_description'+str(i):[x.strip()] for i,x in enumerate(f.readlines())}
x = Evaluate()
x.evaluate(live=True, cand=cands, ref=refs)