This repository has been archived by the owner on Feb 25, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cpt.py
78 lines (67 loc) · 2.92 KB
/
cpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class CPT:
def __init__(self):
self.create_cpt();
def add_cpt_count(self, word):
pre_i = 0;
for cur_c in word:
cur_i = ord(cur_c) - 96;
self.cpt[pre_i][cur_i] = self.cpt[pre_i][cur_i] + 1;
pre_i = cur_i;
self.cpt[pre_i][0] = self.cpt[pre_i][0] + 1;
def create_cpt(self):
self.cpt = [[0.0] * 27 for i in range(27)];
#####################################
# Read dictionSample.txt and build the Conditional Probability Table (CPT).
with open("dictionSample.txt", "r") as f:
data = f.read().splitlines();
for row in data:
self.add_cpt_count(row);
for i in range(0, 27):
s = sum(self.cpt[i]);
for j in range(0, 27):
self.cpt[i][j] = self.cpt[i][j] / s;
def print_cpt(self):
print "% ` a b c d e f " \
"g h i j k l m n " \
"o p q r s t u v w x y z";
print "===========================================" \
"===========================================" \
"====================================================";
print "`", "|", "|".join(str("%.1f" % (p*100)).rjust(4, ' ') for p in self.cpt[0])
for i in range(1, 27):
print chr(i + 96), "|", \
"|".join(str("%.1f" % (p*100)).rjust(4, ' ') for p in self.cpt[i])
for i in range(0, 27):
s = sum(self.cpt[i]);
if abs(s - 1.0) > 0.01:
print "[ERROR] The conditional probability of Pr(*|%s) " \
"does not add up to 1 (actual: %f)." % (chr(i + 96), s);
def conditional_prob(self, v, given):
return self.cpt[ord(given) - 96][ord(v) - 96];
class CPT_2Order:
def __init__(self):
self.create_cpt();
def add_cpt_count(self, word):
ppp_i = 0;
pre_i = 0;
for cur_c in word:
cur_i = ord(cur_c) - 96;
self.cpt[ppp_i*27+pre_i][cur_i] = self.cpt[ppp_i*27+pre_i][cur_i] + 1;
ppp_i = pre_i;
pre_i = cur_i;
self.cpt[ppp_i*27+pre_i][0] = self.cpt[ppp_i*27+pre_i][0] + 1;
self.cpt[pre_i*27+0][0] = self.cpt[pre_i*27+0][0] + 1;
def create_cpt(self):
self.cpt = [[0.1] * 27 for i in range(27*27)];
#####################################
# Read dictionSample.txt and build the Conditional Probability Table (CPT).
with open("dictionSample.txt", "r") as f:
data = f.read().splitlines();
for row in data:
self.add_cpt_count(row);
for i in range(0, 27*27):
s = sum(self.cpt[i]);
for j in range(0, 27):
self.cpt[i][j] = self.cpt[i][j] / s;
def conditional_prob(self, v, given1, given2):
return self.cpt[(ord(given1) - 96) * 27 + ord(given2) - 96][ord(v) - 96];