-
Notifications
You must be signed in to change notification settings - Fork 0
/
knn_MAB.py
127 lines (113 loc) · 4.49 KB
/
knn_MAB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy as np
import pandas as pandas
import os
os.chdir('C:/Kaige_Research/Graph Learning/graph_learning_code/')
from utils import *
from synthetic_data import *
from primal_dual_gl import Primal_dual_gl
from sklearn.metrics.pairwise import rbf_kernel, euclidean_distances
class KNN_MAB():
def __init__(self, user_num, item_num, dimension, item_pool_size, alpha, K=10,jump_step=10, mode=1, true_user_features=None, true_graph=None):
self.user_num=user_num
self.item_num=item_num
self.item_features=None
self.dimension=dimension
self.alpha=alpha
self.item_pool_size=item_pool_size
self.K=K
self.mode=mode
self.true_user_features=true_user_features
self.true_graph=true_graph
self.served_user=[]
self.noisy_signal=None
self.denoised_signal=None
self.picked_items_per_user={}
self.payoffs_per_user={}
self.picked_items=[]
self.iteration=None
self.cov_matrix={}
self.bias={}
self.learned_user_features=np.zeros((self.user_num, self.dimension))
self.adj=np.identity(self.user_num)
self.lap=None
self.cum_regret=[0]
self.learning_error=[]
self.graph_error=[]
self.true_signal=None
self.jump_step=jump_step
self.learned_cluster_features=np.zeros((self.user_num, self.dimension))
def pick_item_and_payoff(self, user, item_pool, time):
mean=np.dot(self.item_features[item_pool], self.learned_cluster_features[user])
temp1=np.dot(self.item_features[item_pool], np.linalg.inv(self.cov_matrix[user]))
temp2=np.sum(temp1*self.item_features[item_pool], axis=1)*np.log(time+1)
var=np.sqrt(temp2)
pta=mean+self.alpha*var
picked_item=item_pool[np.argmax(pta)]
payoff=self.noisy_signal[picked_item, user]
self.payoffs_per_user[user].extend([payoff])
self.picked_items_per_user[user].extend([picked_item])
if picked_item not in self.picked_items:
self.picked_items.extend([picked_item])
else:
pass
self.avaiable_noisy_signal=self.noisy_signal[self.picked_items]
return picked_item, payoff
def knn_graph(self, time):
if (time%self.jump_step==0):
print('Update Graph')
if (self.mode==1) or (self.denoised_signal is None):
self.adj, self.lap=learn_knn_graph(self.avaiable_noisy_signal, self.user_num, k=self.K)
else:
self.adj, self.lap=learn_knn_graph(self.denoised_signal, self.user_num, k=self.K)
else:
pass
def knn_signal(self):
self.denoised_signal=learn_knn_signal(self.adj, self.avaiable_noisy_signal, len(self.avaiable_noisy_signal), self.user_num)
def update_cluster_features(self, user):
adj_copy=self.adj.copy()
np.fill_diagonal(adj_copy,1)
weights=adj_copy[user]
self.learned_cluster_features[user]=np.average(self.learned_user_features,axis=0, weights=weights)
def update_user_features(self, user, picked_item, payoff):
item_f=self.item_features[picked_item]
self.cov_matrix[user]+=np.outer(item_f, item_f)
self.bias[user]+=item_f*payoff
self.learned_user_features[user]=np.dot(np.linalg.inv(self.cov_matrix[user]), self.bias[user])
def find_regret(self, user, item_pool, payoff):
max_payoff=np.max(self.noisy_signal[item_pool][:,user])
regret=max_payoff-payoff
self.cum_regret.extend([self.cum_regret[-1]+regret])
def run(self, user_pool, item_pools, item_features, noisy_signal,true_signal, iteration):
self.iteration=iteration
self.noisy_signal=noisy_signal
self.true_signal=true_signal
self.item_features=item_features
for i in range(self.iteration):
print('KNN MAB Time ~~~~~~~~~~~~ ', i)
user=user_pool[i]
item_pool=item_pools[i]
if user not in self.served_user:
self.cov_matrix[user]=np.identity(self.dimension)
self.bias[user]=np.zeros(self.dimension)
self.picked_items_per_user[user]=[]
self.payoffs_per_user[user]=[]
self.served_user.extend([user])
else:
pass
picked_item, payoff=self.pick_item_and_payoff(user, item_pool, i)
self.knn_graph(i)
#self.knn_signal()
self.update_cluster_features(user)
self.update_user_features(user, picked_item, payoff)
self.find_regret(user, item_pool, payoff)
if self.true_user_features is not None:
error=np.linalg.norm(self.learned_user_features-self.true_user_features)
self.learning_error.extend([error])
else:
pass
if self.true_graph is not None:
error=np.linalg.norm(self.adj-self.true_graph)
self.graph_error.extend([error])
else:
pass
return self.cum_regret, self.adj, self.learned_user_features, self.learning_error,self.graph_error, self.denoised_signal