-
Notifications
You must be signed in to change notification settings - Fork 0
/
baseline_evaluation.py
124 lines (101 loc) · 4.54 KB
/
baseline_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from argparse import ArgumentParser
import sys
import os
import numpy as np
### read/write files ###
def read_synonyms(fn_synonyms, skip_first=False):
file_synonyms = open(fn_synonyms, "r")
# skip first line if it contains number of items or something like that
synonyms = file_synonyms.readlines()[1 if skip_first else 0:]
synonyms = list(map(lambda x: set(map(lambda y: int(y), x.split())), synonyms))
file_synonyms.close()
return synonyms
def write_file(fn_output, lines):
file_output = open(fn_output, "x")
file_output.writelines(lines)
file_output.flush()
file_output.close()
### precision, recall, f-scores ###
def filtered(synonyms, ground_truth):
return list(filter(lambda x: x in ground_truth, synonyms))
def precision(synonyms, ground_truth):
return (len(filtered(synonyms, ground_truth)) / len(synonyms)) if len(synonyms) > 0 else 0.0
def recall(synonyms, ground_truth):
return (len(filtered(synonyms, ground_truth)) / len(ground_truth)) if len(ground_truth) > 0 else 0.0
def pr(synonyms, ground_truth):
return [precision(synonyms, ground_truth), recall(synonyms, ground_truth)]
def f_score(p_vals, r_vals, beta=1.0, func_mean=np.mean):
beta2 = beta ** 2.0
p = func_mean(p_vals)
r = func_mean(r_vals)
return (1.0 + beta2) * (p * r / (beta2 * p + r))
def f1(p_vals, r_vals):
return f_score(p_vals, r_vals, beta=1.0)
def f05(p_vals, r_vals):
return f_score(p_vals, r_vals, beta=0.5)
def f2(p_vals, r_vals):
return f_score(p_vals, r_vals, beta=2.0)
### calculate p-r-values from list of synonyms ###
def calc_pr_vals(synonyms, ground_truth):
# number of p-r-values equals number of synonyms (top-k, variable minConf) for a fixed minSup
pr_vals = []
len_synonyms = len(synonyms)
last_precision = -1
last_recall = -1
for i in range(0, len_synonyms):
pr_val = pr(synonyms[:i + 1], ground_truth)
# always add only the best precision for each recall value:
# for one recall value, the first pr_val has the best precision,
# (while the last pr_val has the best recall for one precision)
if pr_val[1] != last_recall:
# new recall value
last_recall = pr_val[1]
# ------------------------------------------------------------------------------
# DON'T USE THIS PART:
# we only need the best precision for each recall value to build a p-r-curve
#
# check if last precision equals precision of current pr_val
#if pr_val[0] == last_precision:
# same precision value, pop last inserted pr_val because its recall is worse
#pr_vals.pop()
# ------------------------------------------------------------------------------
# trace current precision and add pr_val
last_precision = pr_val[0]
pr_vals.append(pr_val)
# calculate different f-scores
p_vals = list(map(lambda x: x[0], pr_vals))
r_vals = list(map(lambda x: x[1], pr_vals))
f05_score = f05(p_vals, r_vals)
f1_score = f1(p_vals, r_vals)
f2_score = f2(p_vals, r_vals)
# return a tuple of two lists: f-scores list and p-r-values list
return [f05_score, f1_score, f2_score], pr_vals
### main function ###
def main():
# parse arguments
parser = ArgumentParser()
parser.add_argument("INPUT_SYNONYMS", type=str, \
help="The synonyms file which was calculated by baseline.py.")
parser.add_argument("INPUT_GROUND_TRUTH", type=str, \
help="The ground-truth synonyms file e.g. which was generated by synonym_inject.py.")
args = parser.parse_args()
# check files
if not os.path.exists(args.INPUT_SYNONYMS):
print("INPUT_SYNONYMS invalid.")
sys.exit(1)
if not os.path.exists(args.INPUT_GROUND_TRUTH):
print("INPUT_GROUND_TRUTH invalid.")
sys.exit(1)
basename = os.path.splitext(os.path.basename(args.INPUT_SYNONYMS))[0]
dirname = os.path.dirname(args.INPUT_SYNONYMS)
fn_fscores = os.path.join(dirname, basename + "_fscores.txt")
fn_evaluation = os.path.join(dirname, basename + "_evaluation.txt")
# evaluate
synonyms = read_synonyms(args.INPUT_SYNONYMS, skip_first=False)
ground_truth = read_synonyms(args.INPUT_GROUND_TRUTH, skip_first=True)
fscores, pr_vals = calc_pr_vals(synonyms, ground_truth)
# write
write_file(fn_fscores, list(map(lambda x: str(x) + "\n", fscores)))
write_file(fn_evaluation, list(map(lambda x: str(x[0]) + "\t" + str(x[1]) + "\n", pr_vals)))
if __name__ == "__main__":
main()