Skip to content

Commit

Permalink
Merge pull request #60 from takuti/notebook
Browse files Browse the repository at this point in the history
Prepare for cross validation-based benchmarking
  • Loading branch information
takuti authored Apr 3, 2022
2 parents 3d7ed2e + 79a757a commit 6082408
Show file tree
Hide file tree
Showing 27 changed files with 246 additions and 174 deletions.
2 changes: 1 addition & 1 deletion docs/src/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ using Recommendation

data = load_movielens_100k()
recall = cross_validation(
1, # N-fold
3, # N-fold
Recall, # Metric
5, # Top-k
MostPopular, # Recommender
Expand Down
1 change: 1 addition & 0 deletions docs/src/evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Pages = ["evaluation.md"]

```@docs
cross_validation
leave_one_out
```

## Rating metrics
Expand Down
8 changes: 4 additions & 4 deletions src/base_recommender.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,17 @@ function fit!(recommender::Recommender; kwargs...)
error("fit! is not implemented for recommender type $(typeof(recommender))")
end

function recommend(recommender::Recommender, u::Integer, k::Integer, candidates::AbstractVector{T}) where {T<:Integer}
function recommend(recommender::Recommender, user::Integer, topk::Integer, candidates::AbstractVector{T}) where {T<:Integer}
d = Dict{T,AbstractFloat}()
for candidate in candidates
score = predict(recommender, u, candidate)
score = predict(recommender, user, candidate)
if isnan(score); continue; end
d[candidate] = score
end
ranked_recs = sort(collect(d), lt=((k1,v1), (k2,v2)) -> v1>v2 || ((v1==v2) && k1<k2))
ranked_recs[1:min(length(ranked_recs), k)]
ranked_recs[1:min(length(ranked_recs), topk)]
end

function predict(recommender::Recommender, u::Integer, i::Integer)
function predict(recommender::Recommender, user::Integer, item::Integer)
error("predict is not implemented for recommender type $(typeof(recommender))")
end
24 changes: 12 additions & 12 deletions src/baseline/co_occurrence.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,38 @@ export CoOccurrence
CoOccurrence(
data::DataAccessor,
i_ref::Integer
item_ref::Integer
)
Recommend items which are most frequently co-occurred with a reference item `i_ref`.
Recommend items which are most frequently co-occurred with a reference item `item_ref`.
"""
struct CoOccurrence <: Recommender
data::DataAccessor
i_ref::Integer
item_ref::Integer
scores::AbstractVector

function CoOccurrence(data::DataAccessor, i_ref::Integer)
function CoOccurrence(data::DataAccessor, item_ref::Integer)
n_items = size(data.R, 2)
new(data, i_ref, vector(n_items))
new(data, item_ref, vector(n_items))
end
end

isdefined(recommender::CoOccurrence) = isfilled(recommender.scores)

function fit!(recommender::CoOccurrence)
# bit vector representing whether the reference item `i_ref` is rated by a user or not
v_ref = (!iszero).(recommender.data.R[:, recommender.i_ref])
# bit vector representing whether the reference item `item_ref` is rated by a user or not
vec_ref = (!iszero).(recommender.data.R[:, recommender.item_ref])

# total num of ratings for the reference item
c = sum(v_ref)
c = sum(vec_ref)

# for each item `i`, count num of users who rated both `i` and `i_ref`
CC = vec(v_ref' * (!iszero).(recommender.data.R))
# for each item `i`, count num of users who rated both `i` and `item_ref`
CC = vec(vec_ref' * (!iszero).(recommender.data.R))

recommender.scores[:] = CC / c * 100.0
end

function predict(recommender::CoOccurrence, u::Integer, i::Integer)
function predict(recommender::CoOccurrence, user::Integer, item::Integer)
validate(recommender)
recommender.scores[i]
recommender.scores[item]
end
4 changes: 2 additions & 2 deletions src/baseline/item_mean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function fit!(recommender::ItemMean)
recommender.scores[:] = vec(mean(recommender.data.R, dims=1))
end

function predict(recommender::ItemMean, u::Integer, i::Integer)
function predict(recommender::ItemMean, user::Integer, item::Integer)
validate(recommender)
recommender.scores[i]
recommender.scores[item]
end
4 changes: 2 additions & 2 deletions src/baseline/most_popular.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function fit!(recommender::MostPopular)
recommender.scores[:] = vec(sum(!iszero, recommender.data.R, dims=1))
end

function predict(recommender::MostPopular, u::Integer, i::Integer)
function predict(recommender::MostPopular, user::Integer, item::Integer)
validate(recommender)
recommender.scores[i]
recommender.scores[item]
end
4 changes: 2 additions & 2 deletions src/baseline/threshold_percentage.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ function fit!(recommender::ThresholdPercentage)
recommender.scores[:] = vec(users_rated_higher ./ users_rated * 100.0)
end

function predict(recommender::ThresholdPercentage, u::Integer, i::Integer)
function predict(recommender::ThresholdPercentage, user::Integer, item::Integer)
validate(recommender)
recommender.scores[i]
recommender.scores[item]
end
4 changes: 2 additions & 2 deletions src/baseline/user_mean.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ function fit!(recommender::UserMean)
recommender.scores[:] = vec(sum(recommender.data.R, dims=2)) / size(recommender.data.R, 2)
end

function predict(recommender::UserMean, u::Integer, i::Integer)
function predict(recommender::UserMean, user::Integer, item::Integer)
validate(recommender)
recommender.scores[u]
recommender.scores[user]
end
43 changes: 42 additions & 1 deletion src/data_accessor.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
export DataAccessor
export create_matrix, set_user_attribute, get_user_attribute, set_item_attribute, get_item_attribute
export create_matrix, set_user_attribute, get_user_attribute, set_item_attribute, get_item_attribute, split_events

struct DataAccessor
events::Array{Event,1}
Expand Down Expand Up @@ -62,3 +62,44 @@ end
function get_item_attribute(data::DataAccessor, item::Integer)
get(data.item_attributes, item, [])
end

function split_events(data::DataAccessor, n_folds::Integer)
if n_folds < 2
error("`n_folds` must be greater than 1 to split the samples into train and test sets.")
end

events = shuffle(data.events)
n_events = length(events)

if n_folds > n_events
error("`n_folds = $n_folds` must be less than $n_events, the number of all samples.")
end

n_users, n_items = size(data.R)

step = convert(Integer, round(n_events / n_folds))

if n_folds == n_events
@info "Splitting $n_events samples for leave-one-out cross validation"
else
@info "Splitting $n_events samples for $n_folds-fold cross validation"
end

train_test_pairs = Array{Tuple{DataAccessor, DataAccessor},1}()

for (index, head) in enumerate(1:step:n_events)
tail = min(head + step - 1, n_events)

truth_events = events[head:tail]
truth_data = DataAccessor(truth_events, n_users, n_items)

train_events = vcat(events[1:head - 1], events[tail + 1:end])
train_data = DataAccessor(train_events, n_users, n_items)

push!(train_test_pairs, (train_data, truth_data))

@debug "fold#$index will test the samples in [$head, $tail]"
end

train_test_pairs
end
91 changes: 39 additions & 52 deletions src/evaluation/cross_validation.jl
Original file line number Diff line number Diff line change
@@ -1,46 +1,25 @@
export cross_validation
export cross_validation, leave_one_out

"""
cross_validation(
n_folds::Integer,
metric::Type{<:RankingMetric},
k::Integer,
topk::Integer,
recommender_type::Type{<:Recommender},
data::DataAccessor,
recommender_args...
)
Conduct `n_folds` cross validation for a combination of recommender `recommender_type` and ranking metric `metric`. A recommender is initialized with `recommender_args` and runs top-`k` recommendation.
"""
function cross_validation(n_folds::Integer, metric::Type{<:RankingMetric}, k::Integer, recommender_type::Type{<:Recommender}, data::DataAccessor, recommender_args...)

n_users, n_items = size(data.R)

events = shuffle(data.events)
n_events = length(events)

step = convert(Integer, round(n_events / n_folds))
accum = 0.0

for head in 1:step:n_events
tail = min(head + step - 1, n_events)

truth_events = events[head:tail]
truth_data = DataAccessor(truth_events, n_users, n_items)

train_events = vcat(events[1:head - 1], events[tail + 1:end])
train_data = DataAccessor(train_events, n_users, n_items)

# get recommender from the specified data type
function cross_validation(n_folds::Integer, metric::Type{<:RankingMetric}, topk::Integer, recommender_type::Type{<:Recommender}, data::DataAccessor, recommender_args...)
accum_accuracy = 0.0
for (train_data, truth_data) in split_events(data, n_folds)
recommender = recommender_type(train_data, recommender_args...)
fit!(recommender)

accuracy = evaluate(recommender, truth_data, metric(), k)
if isnan(accuracy); continue; end
accum += accuracy
accum_accuracy += evaluate(recommender, truth_data, metric(), topk)
end

accum / n_folds
accum_accuracy / n_folds
end

"""
Expand All @@ -55,32 +34,40 @@ end
Conduct `n_folds` cross validation for a combination of recommender `recommender_type` and accuracy metric `metric`. A recommender is initialized with `recommender_args`.
"""
function cross_validation(n_folds::Integer, metric::Type{<:AccuracyMetric}, recommender_type::Type{<:Recommender}, data::DataAccessor, recommender_args...)

n_users, n_items = size(data.R)

events = shuffle(data.events)
n_events = length(events)

step = convert(Integer, round(n_events / n_folds))
accum = 0.0

for head in 1:step:n_events
tail = min(head + step - 1, n_events)

truth_events = events[head:tail]
truth_data = DataAccessor(truth_events, n_users, n_items)

train_events = vcat(events[1:head - 1], events[tail + 1:end])
train_data = DataAccessor(train_events, n_users, n_items)

# get recommender from the specified data type
accum_accuracy = 0.0
for (train_data, truth_data) in split_events(data, n_folds)
recommender = recommender_type(train_data, recommender_args...)
fit!(recommender)

accuracy = evaluate(recommender, truth_data, metric())
if isnan(accuracy); continue; end
accum += accuracy
accum_accuracy = evaluate(recommender, truth_data, metric())
end
accum_accuracy / n_folds
end

accum / n_folds
"""
leave_one_out(
metric::Type{<:RankingMetric},
topk::Integer,
recommender_type::Type{<:Recommender},
data::DataAccessor,
recommender_args...
)
Conduct leave-one-out cross validation (LOOCV) for a combination of recommender `recommender_type` and accuracy metric `metric`. A recommender is initialized with `recommender_args` and runs top-`k` recommendation.
"""
function leave_one_out(metric::Type{<:RankingMetric}, topk::Integer, recommender_type::Type{<:Recommender}, data::DataAccessor, recommender_args...)
cross_validation(length(data.events), metric, topk, recommender_type, data, recommender_args...)
end

"""
leave_one_out(
metric::Type{<:AccuracyMetric},
recommender_type::Type{<:Recommender},
data::DataAccessor,
recommender_args...
)
Conduct leave-one-out cross validation (LOOCV) for a combination of recommender `recommender_type` and accuracy metric `metric`. A recommender is initialized with `recommender_args`.
"""
function leave_one_out(metric::Type{<:AccuracyMetric}, recommender_type::Type{<:Recommender}, data::DataAccessor, recommender_args...)
cross_validation(length(data.events), metric, recommender_type, data, recommender_args...)
end
39 changes: 20 additions & 19 deletions src/evaluation/evaluate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,38 @@ export evaluate
function evaluate(recommender::Recommender, truth_data::DataAccessor,
metric::AccuracyMetric)
validate(recommender, truth_data)
n_users, n_items = size(truth_data.R)

accum = 0.0
nonzero_indices = findall(!iszero, truth_data.R)

for u in 1:n_users
pred = zeros(n_items)
for i in 1:n_items
pred[i] = predict(recommender, u, i)
end
accum += measure(metric, truth_data.R[u, :], pred)
truth = zeros(length(nonzero_indices))
pred = zeros(length(nonzero_indices))
for (j, idx) in enumerate(nonzero_indices)
truth[j] = truth_data.R[idx]
pred[j] = predict(recommender, idx[1], idx[2])
end

# return average accuracy over the all target users
accum / n_users
measure(metric, truth, pred)
end

function evaluate(recommender::Recommender, truth_data::DataAccessor,
metric::RankingMetric, k::Integer=0)
metric::RankingMetric, topk::Integer)
validate(recommender, truth_data)
n_users, n_items = size(truth_data.R)

accum = 0.0
accum = Threads.Atomic{Float64}(0.0)

candidates = Array(1:n_items)
for u in 1:n_users
truth = [first(t) for t in sort(collect(zip(candidates, truth_data.R[u, :])), by=t->last(t), rev=true)]
recos = recommend(recommender, u, k, candidates)
pred = [first(t) for t in sort(recos, by=t->last(t), rev=true)]
accum += measure(metric, truth, pred, k)
Threads.@threads for u in 1:n_users
observed_items = findall(!iszero, truth_data.R[u, :])
if length(observed_items) == 0
@warn "user#$u does not have any test samples that are observed but are not used for training. $metric is default to 0.0"
continue
end
truth = [first(t) for t in sort(collect(zip(observed_items, truth_data.R[u, observed_items])), by=t->last(t), rev=true)]
candidates = findall(iszero, recommender.data.R[u, :]) # items that were unobserved as of building the model
pred = [first(item_score_pair) for item_score_pair in recommend(recommender, u, topk, candidates)]
Threads.atomic_add!(accum, measure(metric, truth, pred, topk))
end

# return average accuracy over the all target users
accum / n_users
accum[] / n_users
end
8 changes: 4 additions & 4 deletions src/metrics/accuracy.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ function measure(metric::RMSE, truth::AbstractVector, pred::AbstractVector)
if n != length(pred)
error("`truth` and `pred` have different size, which are $(n) and $(length(pred)), respectively")
end
if iszero(n)
0
if n == 0
0.0
else
sqrt(sum((truth - pred).^2) / n)
end
Expand All @@ -45,8 +45,8 @@ function measure(metric::MAE, truth::AbstractVector, pred::AbstractVector)
if n != length(pred)
error("`truth` and `pred` have different size, which are $(n) and $(length(pred)), respectively")
end
if iszero(n)
0
if n == 0
0.0
else
sum(abs.(truth - pred)) / n
end
Expand Down
Loading

0 comments on commit 6082408

Please sign in to comment.