-
Notifications
You must be signed in to change notification settings - Fork 20
/
likely.js
474 lines (404 loc) · 13.9 KB
/
likely.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
/**
* Likely.js
*
* Javascript library for collaborative filtering / recommendation engine.
*/
var sylvester = require('sylvester');
// Learning parameters
var DESCENT_STEPS = 5000; // number of iterations to execute gradient descent
var ALPHA = 0.0005; // learning rate, should be small
var BETA = 0.0007; // regularization factor, should be small
var K = 5; // number of features to simulate
var MAX_ERROR = 0.0005; // threshold which, if reached, will stop descent automatically
/** Builds a complete model from the input array
*
* @param inputArray A two dimensional array of the input, where each cell is the rating given to Col by Row
* @param [optional] rowLabels A one dimensional array of string labels for each row of inputArray
* @param [optional] colLabels A one dimensional array of string labels for each column of inputArray
* @returns Model An instance of a Model object
*/
function buildModel(inputArray, rowLabels, colLabels)
{
return buildModelWithBias(inputArray, undefined, rowLabels, colLabels);
}
/** Builds a complete model from the input array with bias
*
* @param inputArray A two dimensional array of the input, where each cell is the rating given to Col by Row
* @param bias A two dimensional matrix of the bias of the inputArray
* @param [optional] rowLabels A one dimensional array of string labels for each row of inputArray
* @param [optional] colLabels A one dimensional array of string labels for each column of inputArray
* @returns Model An instance of a Model object
*/
function buildModelWithBias(inputArray, bias, rowLabels, colLabels)
{
var model = new Model($M(inputArray), rowLabels, colLabels);
model.estimated = train(sylvester.Matrix.create(inputArray), bias);
return model
}
/**
* Trains the model on the given input by composing two matrices P and Q which
* approximate the input through their product.
* @param inputMatrix A two dimensional array representing input values
* @returns Model A model entity, with estimated values based on the input
*/
function train(inputMatrix, bias)
{
N = inputMatrix.rows(); // number of rows
M = inputMatrix.cols(); // number of columns
// Generate random P and Q based on the dimensions of inputMatrix
var P_model = generateRandomMatrix(N, K);
var Q_model = generateRandomMatrix(K, M);
var i = 0
for(i = 0; i < DESCENT_STEPS; i++)
{
//console.log('------------------ Iteration --------------------');
// Calculate error
var error = calculateError(P_model.x(Q_model), inputMatrix, bias);
P_prime = P_model.elements;
Q_prime = Q_model.elements;
// For debugging
//console.log('P: ' + JSON.stringify(P_prime));
//console.log('Q: ' + JSON.stringify(Q_prime));
// Update P and Q to reduce error
for (var row = 0; row < N; row++)
{
for (var col = 0; col < M; col++)
{
for(var feature = 0; feature < K; feature++)
{
// update formulas will change values in the opposite direction of the gradient.
// P Update Formula
// p_ik = p_ik + alpha * (e_ij * q_kj - beta * p_ik)
// Reverse Gradient: alpha * e_ij * q_kj -- Note that we omit the 2* factor since it's not necessary for convergence.
// Regularization factor: alpha * beta * p_ik
var p_prev = P_prime[row][feature];
P_prime[row][feature] = P_prime[row][feature] +
ALPHA*(error.e(row+1, col+1)*Q_prime[feature][col] -
BETA * P_prime[row][feature]);
//console.log('P['+row+']['+feature+'] ('+p_prev+') <- ('+P_prime[row][feature]+')');
// Q Update Formula
// q_kj = q_kj + alpha x (e_ij x p_ik - beta x q_kj)
// Reverse Gradient: alpha * e_ij * p_ik -- Note that we omit the 2* factor since it's not necessary for convergence.
// Regularization factor: alpha * beta * q_kj
var q_prev = Q_prime[feature][col];
Q_prime[feature][col] = Q_prime[feature][col] +
ALPHA *(error.e(row+1, col+1)*P_prime[row][feature] -
BETA * Q_prime[feature][col]);
//console.log('Q['+feature+']['+col+'] ('+q_prev+') <- ('+Q_prime[feature][col]+')');
}
}
}
// if we've already reached the error threshold, no need to descend further
var totError = calculateTotalError(error);
if(totError < MAX_ERROR)
{
//console.log('Reached error threshold early, no more descent needed.');
break;
}
}
//console.log('Descent steps used: ' + i);
// produce the final estimation by multiplying P and Q
var finalModel = P_model.x(Q_model);
// if we were considering bias, we have to add it back in
if(bias)
{
// add back the overall average
finalModel = finalModel.map(function(x) { return x + bias.average; });
var finalElements = finalModel.elements;
// add back the row bias from each row
for(var i = 1; i <= finalModel.rows(); i++)
{
for(var j = 1; j <= finalModel.cols(); j++)
{
finalElements[i-1][j-1] += bias.rowBiases.e(i);
}
}
// add back the column bias from each column
for(var i = 1; i <= finalModel.rows(); i++)
{
for(var j = 1; j <= finalModel.cols(); j++)
{
finalElements[i-1][j-1] += bias.colBiases.e(j);
}
}
}
return finalModel;
}
/**
* Generates a random Matrix of size rows x columns.
* @param rows Number of rows in the random matrix
* @param columns Number of columns in the random matrix
* @returns Matrix A matrix of size rows x columns filled with random values.
*/
function generateRandomMatrix(rows, columns)
{
return sylvester.Matrix.Random(rows, columns);
}
/**
* Computes the error from model matrices P and Q against the given input.
* @param estimated A matrix of the estimated values.
* @param input A matrix of the input values
* @returns A matrix of size input.rows by input.columns where each entry is the difference between the input and estimated values.
*/
function calculateError(estimated, input, bias)
{
var adjustedInput = input.dup();
var adjustedElements = adjustedInput.elements;
// If bias adjustment is provided, adjust for it
if(bias)
{
// subtract the row and column bias from each row
for(var i = 0; i <= adjustedInput.rows()-1; i++)
{
for(var j = 0; j <= adjustedInput.cols()-1; j++)
{
if(adjustedElements[i][j] == 0) continue; // skip zeroes
adjustedElements[i][j] -= bias.average;
adjustedElements[i][j] -= bias.rowBiases.e(i+1);
adjustedElements[i][j] -= bias.colBiases.e(j+1);
}
}
}
var estimatedElements = estimated.elements;
// Error is (R - R')
// (but we ignore error on the zero entries since they are unknown)
for(var i = 0; i <= adjustedInput.rows()-1; i++)
{
for(var j = 0; j <= adjustedInput.cols()-1; j++)
{
if(adjustedElements[i][j] == 0) continue; // skip zeroes
adjustedElements[i][j] -= estimatedElements[i][j];
}
}
// Error is (R - R')
return adjustedInput;
}
/**
* Computes the total error based on a matrix of error values.
* @param estimated A matrix of the estimated values.
* @param input A matrix of the input values
* @returns float Total error, defined as the sum of the squared difference between the estimated and input values.
*/
function calculateTotalError(estimated, input)
{
return calculateTotalError(calculateError(estimated, input));
}
/**
* Computes the total error based on a matrix of error values.
* @param errorMatrix A matrix of the error values.
* @returns float Total error, defined as the sum of the squared errors.
*/
function calculateTotalError(errorMatrix)
{
var totError = 0.0;
for(var i = 1; i <= errorMatrix.rows(); i++)
{
for(var j = 1; j <= errorMatrix.cols(); j++)
{
totError += Math.pow(errorMatrix.e(i, j), 2);
}
}
return totError;
}
/**
* Computes the biases from a matrix of values.
* @param input A matrix of the input values
* @returns Bias Object containing the average, bias for rows and bias for columns.
*/
function calculateBias(input)
{
var inputMatrix = $M(input);
var average = calculateMatrixAverage(inputMatrix);
var rowAverages = calculateRowAverage(inputMatrix);
var colAverages = calculateColumnAverage(inputMatrix);
var rowBiases = new Array();
var colBiases = new Array();
// The row bias is the difference between the row average and the overall average
for(var i = 1; i <= rowAverages.dimensions().cols; i++)
{
rowBiases[i-1] = rowAverages.e(i) - average;
}
// the column bias is the difference between the column average and the overall average
for(var i = 1; i <= colAverages.dimensions().cols; i++)
{
colBiases[i-1] = colAverages.e(i) - average;
}
var biases = new Bias(average, $V(rowBiases), $V(colBiases));
return biases;
}
/**
* Bias representation object. Contains all bias elements.
*/
function Bias(average, rowBiases, colBiases) {
this.average = average; // Overall value average
this.rowBiases = rowBiases; // Bias for each row
this.colBiases = colBiases; // Bias for each column
}
/**
* Computes the overall average value from a matrix of values.
* @param input A matrix of the input values
* @returns float Average value.
*/
function calculateMatrixAverage(inputMatrix)
{
var cells = inputMatrix.rows() * inputMatrix.cols();
var sum = 0;
for(var i = 1; i <= inputMatrix.rows(); i++)
{
for(var j = 1; j <= inputMatrix.cols(); j++)
{
sum += inputMatrix.e(i, j);
}
}
return sum/cells;
}
/**
* Computes the average value for each column of a matrix of values.
* @param input A matrix of the input values
* @returns Vector Average value for each column. Vector[i] is the average for column i.
*/
function calculateColumnAverage(inputMatrix)
{
var rows = inputMatrix.rows();
var averages = new Array();
for(var i = 1; i <= inputMatrix.cols(); i++)
{
var sum = 0;
for(var j = 1; j <= inputMatrix.rows(); j++)
{
sum += inputMatrix.e(j, i);
}
averages[i-1] = sum/rows;
}
return $V(averages);
}
/**
* Computes the average value for each row of a matrix of values.
* @param input A matrix of the input values
* @returns Vector Average value for each row. Vector[i] is the average for row i.
*/
function calculateRowAverage(inputMatrix)
{
var cols = inputMatrix.cols();
var averages = new Array();
for(var i = 1; i <= inputMatrix.rows(); i++)
{
var sum = 0;
for(var j = 1; j <= inputMatrix.cols(); j++)
{
sum += inputMatrix.e(i, j);
}
averages[i-1] = sum/cols;
}
return $V(averages);
}
/**
* Model representation object. Contains both input and estimated values.
*/
function Model(inputMatrix, rowLabels, colLabels) {
this.rowLabels = rowLabels; // labels for the rows
this.colLabels = colLabels; // labels for the columns
this.input = inputMatrix; // input data
// estimated data, initialized to all zeros
this.estimated = sylvester.Matrix.Zeros(this.input.rows(),this.input.cols());
}
Model.prototype = {
/**
* Returns all items for a given row, sorted by rating.
* @param row The row to return values for
* @returns Array An array of arrays, each containing two entries. [0] is the index and [1] is the value, sorted by value.
*/
rankAllItems: function(row)
{
var rowIndex = row; // assume row is a number
// If we're using labels we have to look up the row index
if(this.rowLabels)
{
rowIndex = findInArray(this.rowLabels, row);
}
// estimates for this user
var ratingElements = this.estimated.row(rowIndex+1).elements;
// build a two dimensional array from the ratings and indexes
// [[index, rating], [index, rating]]
var outputArray = new Array();
for(var i=0; i<ratingElements.length; i++)
{
outputArray[i] = [i, ratingElements[i]];
// if we have column labels, use those
if(this.colLabels)
{
outputArray[i][0] = this.colLabels[i];
}
}
// Sort the array by index
return outputArray.sort(function(a, b) {return a[1] < b[1]})
},
/**
* Returns all items for the given row where there was no input value, sorted by rating.
* @param row The row to return values for
* @returns Array An array of arrays, each containing two entries. [0] is the index and [1] is the value, sorted by value.
*/
recommendations: function(row)
{
var recommendedItems = new Array();
var allItems = this.rankAllItems(row);
var rowIndex = row; // assume row is a number
// If we're using labels we have to look up the row index
if(this.rowLabels)
{
rowIndex = findInArray(this.rowLabels, row);
}
for(var i=0; i< allItems.length; i++)
{
// look up the value in the input
var colIndex = allItems[i][0];
// see if we're using column labels or not
if(this.colLabels)
{
colIndex = findInArray(this.colLabels, allItems[i][0]);
}
var inputRating = this.input.e(rowIndex+1, colIndex+1);
// if there was no rating its a recommendation so add it
if(inputRating == 0)
{
recommendedItems.push(allItems[i]);
}
}
return recommendedItems;
}
}
/**
* Finds the specified value in the array and returns the index. Returns -1 if not found.
* @param array The array to look for the value within
* @param value The value to look for
* @returns int Index of the value in the array. -1 if not found.
*/
function findInArray(array, value)
{
var index = -1;
for(var i=0;i<array.length;i++)
{
if(array[i] == value)
{
index = i;
break;
}
}
return index;
}
module.exports.buildModel = buildModel;
module.exports.buildModelWithBias = buildModelWithBias;
module.exports.generateRandomMatrix = generateRandomMatrix;
module.exports.calculateError = calculateError;
module.exports.calculateTotalError = calculateTotalError;
module.exports.calculateBias = calculateBias;
module.exports.calculateMatrixAverage = calculateMatrixAverage;
module.exports.calculateColumnAverage = calculateColumnAverage;
module.exports.calculateRowAverage = calculateRowAverage;
module.exports.DESCENT_STEPS = DESCENT_STEPS;
module.exports.ALPHA = ALPHA;
module.exports.BETA = BETA;
module.exports.K = K;
module.exports.MAX_ERROR = MAX_ERROR;
module.exports.Bias = Bias;
module.exports.Model = Model;