Merge pull request #59 from takuti/bpr

Implement BPR Matrix Factorization recommender
takuti · Mar 11, 2022 · 3d7ed2e · 3d7ed2e
2 parents bc06033 + ad98366
commit 3d7ed2e
Show file tree

Hide file tree

Showing 7 changed files with 180 additions and 2 deletions.
diff --git a/docs/src/collaborative_filtering.md b/docs/src/collaborative_filtering.md
@@ -86,6 +86,11 @@ MF
 
 MF is attractive in terms of not only efficiency but extensibility. In the above formulation, prediction for each user-item pair can be written by a simple vector product as $r_{u,i} = \mathbf{p}_u^{\mathrm{T}} \mathbf{q}_i$, and extension of the formula is also possible. That is, we can incorporate different features (e.g., biases and temporal factors) into the model as linear combinations. For example, let $\mu$ be a global mean of all elements in $R$, and $b_u, b_i$ be respectively a user and item bias term. Here, we assume that each observation can be represented as $r_{u,i} = \mu + b_u + b_i + \mathbf{p}_u^{\mathrm{T}} \mathbf{q}_i$. This formulation is known as [biased MF](https://ieeexplore.ieee.org/document/5197422), and it is possible to capture more information than the original MF even on the same set of events $\mathcal{S}$.
 
-Additionally, options for loss functions are also abundant. To give an example, [Chen et al.](https://arxiv.org/abs/1109.2271) showed various types of features and loss functions which can be incorporated into a MF scheme. An appropriate choice of their combinations is likely to lead surprisingly better accuracy compared to the classical MF.
+Additionally, options for loss functions are also abundant. To give an example, [Chen et al.](https://arxiv.org/abs/1109.2271) showed various types of features and loss functions which can be incorporated into a MF scheme. An appropriate choice of their combinations is likely to lead surprisingly better accuracy compared to the classical MF, and `Recommendation.jl` currently supports [Bayesian personalized ranking (BPR) loss](https://dl.acm.org/doi/10.5555/1795114.1795167) as an alternative option.
+
+```@docs
+BPRMatrixFactorization
+BPRMF
+```
 
 It should be noted that the technique has many limitations behind great success of MF-based recommendation. Since extension of MF only allows us to append new features as linear combinations, representing more complex relationships between features is not straightforward. As a consequence, modern recommenders tend to use more complex models which are higher dimensional and hard to optimize such as [tensor factorization](https://dl.acm.org/citation.cfm?id=1864727).
diff --git a/src/Recommendation.jl b/src/Recommendation.jl
@@ -29,6 +29,7 @@ include("model/user_knn.jl")
 include("model/item_knn.jl")
 include("model/svd.jl")
 include("model/matrix_factorization.jl")
+include("model/bpr_matrix_factorization.jl")
 include("model/factorization_machines.jl")
 
 include("metrics/base.jl")

diff --git a/src/model/bpr_matrix_factorization.jl b/src/model/bpr_matrix_factorization.jl
@@ -0,0 +1,101 @@
+export BPRMatrixFactorization, BPRMF
+
+"""
+    BPRMatrixFactorization(
+        data::DataAccessor,
+        n_factors::Integer
+    )
+
+Recommendation based on matrix factorization (MF) with Bayesian personalized ranking (BPR) loss. Number of factors ``k`` is configured by `n_factors`.
+
+- [BPR: Bayesian Personalized Ranking from Implicit Feedback](https://dl.acm.org/doi/10.5555/1795114.1795167)
+"""
+struct BPRMatrixFactorization <: Recommender
+    data::DataAccessor
+    n_factors::Integer
+    P::AbstractMatrix
+    Q::AbstractMatrix
+
+    function BPRMatrixFactorization(data::DataAccessor, n_factors::Integer)
+        n_users, n_items = size(data.R)
+        P = matrix(n_users, n_factors)
+        Q = matrix(n_items, n_factors)
+
+        new(data, n_factors, P, Q)
+    end
+end
+
+"""
+    BPRMF(
+        data::DataAccessor,
+        n_factors::Integer
+    )
+
+Alias of `BPRMatrixFactorization`.
+"""
+const BPRMF = BPRMatrixFactorization
+
+BPRMF(data::DataAccessor) = BPRMF(data, 20)
+
+isdefined(recommender::BPRMatrixFactorization) = isfilled(recommender.P)
+
+function fit!(recommender::BPRMatrixFactorization;
+              reg::Float64=1e-3, learning_rate::Float64=1e-3,
+              eps::Float64=1e-3, max_iter::Int=100,
+              random_init::Bool=false,
+              bootstrap_sampling::Bool=true)
+    if random_init
+        P = rand(Float64, size(recommender.P))
+        Q = rand(Float64, size(recommender.Q))
+    else
+        # initialize with small constants
+        P = ones(size(recommender.P)) * 0.1
+        Q = ones(size(recommender.Q)) * 0.1
+    end
+
+    samples = get_pairwise_preference_triples(recommender.data.R)
+
+    nnz = count(!iszero, recommender.data.R)
+    for _ in 1:max_iter
+        loss = 0.0
+
+        batch_size = if bootstrap_sampling
+            # optimize by SGD with bootstrap sampling; each step relies on
+            # a randomly drawn user-item-item triple, assuming `u` prefers `i` over `j`
+            # rather than sequentially iterating all possible samples.
+            # the total num of iterations linearly depends on the num of positive (nnz) samples
+            nnz
+        else
+            length(samples)
+        end
+
+        for idx in 1:batch_size
+            u, i, j = if bootstrap_sampling
+                rand(samples)  # random draw
+            else
+                samples[idx]
+            end
+
+            uv, iv, jv = P[u, :], Q[i, :], Q[j, :]
+
+            x_uij = dot(uv, iv) - dot(uv, jv)
+
+            sigmoid = 1 / (1 + exp(-x_uij))
+            loss += log(sigmoid)
+
+            P[u, :] = uv .+ learning_rate * ((1 - sigmoid) * (iv .- jv) .+ reg * uv)
+            Q[i, :] = iv .+ learning_rate * ((1 - sigmoid) * uv .+ reg * iv)
+            Q[j, :] = jv .+ learning_rate * ((1 - sigmoid) * -uv .+ reg * jv)
+        end
+
+        if abs(loss / nnz) < eps; break; end;
+    end
+
+    recommender.P[:] = P[:]
+    recommender.Q[:] = Q[:]
+end
+
+function predict(recommender::BPRMatrixFactorization, u::Integer, i::Integer)
+    validate(recommender)
+    dot(recommender.P[u, :], recommender.Q[i, :])
+end
diff --git a/src/utils.jl b/src/utils.jl
@@ -1,4 +1,4 @@
-export matrix, vector, isfilled, onehot, binarize_multi_label
+export matrix, vector, isfilled, get_pairwise_preference_triples, onehot, binarize_multi_label
 
 function matrix(m::Integer, n::Integer)
     Array{Union{Missing, AbstractFloat}}(missing, m, n)
@@ -12,6 +12,20 @@ function isfilled(a::AbstractArray)
     findfirst(v -> isa(v, Unknown), a) == nothing
 end
 
+"""
+    get_pairwise_preference_triples(R::AbstractMatrix) -> Vector{Tuple{Int, Int, Int}}
+
+Return user-item-item triples corresponding to a user-item matrix `R`
+(i.e., ``(u, i, j) \\in D_s`` in [BPR: Bayesian Personalized Ranking from Implicit Feedback](https://dl.acm.org/doi/10.5555/1795114.1795167)).
+In the pairwise item ranking context, each triple represents that user ``u`` prefers item ``i`` over ``j``.
+"""
+function get_pairwise_preference_triples(R::AbstractMatrix)
+    vcat(map(t -> vcat(collect(Iterators.product(t...))...),
+             filter(t -> length(t[2]) > 0 && length(t[3]) > 0,
+                    map(t -> ([t[1]], findall(!iszero, t[2]), findall(iszero, t[2])),
+                        enumerate(eachrow(R)))))...)
+end
+
 """
     onehot(value, value_set::AbstractVector) -> Vector{Float64}
 

diff --git a/test/model/test_bpr_matrix_factorization.jl b/test/model/test_bpr_matrix_factorization.jl
@@ -0,0 +1,45 @@
+function run(recommender::Type{T}, v) where {T<:Recommender}
+    m = [v 3 v 1 2 1 v 4
+         1 2 v v 3 2 v 3
+         v 2 3 3 v 5 v 1]
+    data = DataAccessor(isa(v, Unknown) ? m : sparse(m))
+
+    recommender = recommender(data, 2)
+    fit!(recommender, learning_rate=15e-4, max_iter=100, bootstrap_sampling=false)
+
+    # top-4 recommended item set should be same as CF/SVD-based recommender
+    rec = recommend(recommender, 1, 4, [i for i in 1:8])
+    @test Set([item for (item, score) in rec]) == Set([2, 5, 6, 8])
+end
+
+function test_bprmf()
+    println("-- Testing BPRMF-based (aliased) recommender")
+    run(BPRMF, nothing)
+    run(BPRMF, 0)
+end
+
+function test_bpr_matrix_factorization()
+    println("-- Testing BPR Matrix Factorization-based recommender")
+    run(BPRMatrixFactorization, nothing)
+    run(BPRMatrixFactorization, 0)
+end
+
+function test_bprmf_with_random_init(v)
+    m = [v 3 v 1 2 1 v 4
+         1 2 v v 3 2 v 3
+         v 2 3 3 v 5 v 1]
+    data = DataAccessor(isa(v, Unknown) ? m : sparse(m))
+
+    recommender = BPRMF(data, 2)
+    fit!(recommender, random_init=true)
+
+    rec = recommend(recommender, 1, 4, [i for i in 1:8])
+    @test size(rec, 1) == 4  # top-4 recos
+end
+
+test_bprmf()
+test_bpr_matrix_factorization()
+
+println("-- Testing BPR MF-based recommender with randomly initialized params")
+test_bprmf_with_random_init(nothing)
+test_bprmf_with_random_init(0)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -45,6 +45,7 @@ end
         include("model/test_item_knn.jl")
         include("model/test_svd.jl")
         include("model/test_matrix_factorization.jl")
+        include("model/test_bpr_matrix_factorization.jl")
         include("model/test_factorization_machines.jl")
     end
 end

diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -47,7 +47,18 @@ function test_binarize_multi_label()
     @test_throws ErrorException binarize_multi_label([1, 2, 3, 4], [1, 1, 2, 3, 4])
 end
 
+function test_uij_triples()
+    println("-- Testing user-item-item triples generator")
+    R = [1 0 3 0
+         0 2 3 4]
+    @test sort(get_pairwise_preference_triples(R)) == sort([
+        (1, 1, 2), (1, 1, 4), (1, 3, 2), (1, 3, 4),
+        (2, 2, 1), (2, 3, 1), (2, 4, 1)
+    ])
+end
+
 test_onehot_value()
 test_onehot_vector()
 test_onehot_matrix()
 test_binarize_multi_label()
+test_uij_triples()