-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
69 lines (47 loc) · 1.57 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
import torch.nn as nn
def pad_collate_reddit(batch):
target = [item[0] for item in batch]
post = [item[1] for item in batch]
lens = [len(x) for x in post]
post = nn.utils.rnn.pad_sequence(post, batch_first=True, padding_value=0)
target = torch.tensor(target)
lens = torch.tensor(lens)
return [target, post, lens]
def class_FScore(op, t, expt_type):
FScores = []
for i in range(expt_type):
opc = op[t == i]
tc = t[t == i]
TP = (opc == tc).sum()
FN = (tc > opc).sum()
FP = (tc < opc).sum()
GP = TP / (TP + FP + 1e-8)
GR = TP / (TP + FN + 1e-8)
FS = 2 * GP * GR / (GP + GR + 1e-8)
FScores.append(FS)
return FScores
def gr_metrics(op, t):
TP = (op == t).sum()
FN = (t > op).sum()
FP = (t < op).sum()
GP = TP / (TP + FP)
GR = TP / (TP + FN)
FS = 2 * GP * GR / (GP + GR)
OE = (t - op > 1).sum()
OE = OE / op.shape[0]
return GP, GR, FS, OE
def splits(df, dist_values):
df = df.sample(frac=1).reset_index(drop=True)
df = df.sort_values(by='label').reset_index(drop=True)
df_test = df[df['label'] == 0][0:dist_values[0]].reset_index(drop=True)
for i in range(1, 5):
df_test = df_test.append(df[df['label'] == i][0:dist_values[i]], ignore_index=True)
for i in range(5):
df.drop(df[df['label'] == i].index[0:dist_values[i]], inplace=True)
df = df.reset_index(drop=True)
return df, df_test
def make_31(five_class):
if five_class != 0:
five_class = five_class - 1
return five_class