-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
test_learning_to_rank.R
141 lines (125 loc) · 5.07 KB
/
test_learning_to_rank.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
context("Learning to rank")
# numerical tolerance to use when checking metric values
TOLERANCE <- 1e-06
test_that("learning-to-rank with lgb.train() works as expected", {
set.seed(708L)
data(agaricus.train, package = "lightgbm")
# just keep a few features,to generate an model with imperfect fit
train <- agaricus.train
train_data <- train$data[1L:6000L, 1L:20L]
dtrain <- lgb.Dataset(
train_data
, label = train$label[1L:6000L]
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
, ndcg_at = ndcg_at
, lambdarank_truncation_level = 3L
, learning_rate = 0.001
)
model <- lgb.train(
params = params
, data = dtrain
, nrounds = 10L
)
expect_true(lgb.is.Booster(model))
dumped_model <- jsonlite::fromJSON(
model$dump_model()
)
expect_equal(dumped_model[["objective"]], "lambdarank")
expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)
# check that evaluation results make sense (0.0 < nDCG < 1.0)
eval_results <- model$eval_train()
expect_equal(length(eval_results), length(eval_names))
for (result in eval_results) {
expect_true(result[["value"]] > 0.0 && result[["value"]] < 1.0)
expect_true(result[["higher_better"]])
expect_identical(result[["data_name"]], "training")
}
expect_identical(sapply(eval_results, function(x) {x$name}), eval_names)
expect_equal(eval_results[[1L]][["value"]], 0.775)
expect_true(abs(eval_results[[2L]][["value"]] - 0.745986) < TOLERANCE)
expect_true(abs(eval_results[[3L]][["value"]] - 0.7351959) < TOLERANCE)
})
test_that("learning-to-rank with lgb.cv() works as expected", {
set.seed(708L)
data(agaricus.train, package = "lightgbm")
# just keep a few features,to generate an model with imperfect fit
train <- agaricus.train
train_data <- train$data[1L:6000L, 1L:20L]
dtrain <- lgb.Dataset(
train_data
, label = train$label[1L:6000L]
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
, ndcg_at = ndcg_at
, lambdarank_truncation_level = 3L
, label_gain = "0,1,3"
)
nfold <- 4L
nrounds <- 10L
cv_bst <- lgb.cv(
params = params
, data = dtrain
, nrounds = nrounds
, nfold = nfold
, min_data = 1L
, learning_rate = 0.01
)
expect_is(cv_bst, "lgb.CVBooster")
expect_equal(length(cv_bst$boosters), nfold)
# "valid" should contain results for each metric
eval_results <- cv_bst$record_evals[["valid"]]
eval_names <- c("ndcg@1", "ndcg@2", "ndcg@3")
expect_identical(names(eval_results), eval_names)
# check that best score and iter make sense (0.0 < nDCG < 1.0)
best_iter <- cv_bst$best_iter
best_score <- cv_bst$best_score
expect_true(best_iter > 0L && best_iter <= nrounds)
expect_true(best_score > 0.0 && best_score < 1.0)
expect_true(abs(best_score - 0.75) < TOLERANCE)
# best_score should be set for the first metric
first_metric <- eval_names[[1L]]
expect_equal(best_score, eval_results[[first_metric]][["eval"]][[best_iter]])
for (eval_name in eval_names) {
results_for_this_metric <- eval_results[[eval_name]]
# each set of metrics should have eval and eval_err
expect_identical(names(results_for_this_metric), c("eval", "eval_err"))
# there should be one "eval" and "eval_err" per round
expect_equal(length(results_for_this_metric[["eval"]]), nrounds)
expect_equal(length(results_for_this_metric[["eval_err"]]), nrounds)
# check that evaluation results make sense (0.0 < nDCG < 1.0)
all_evals <- unlist(results_for_this_metric[["eval"]])
expect_true(all(all_evals > 0.0 & all_evals < 1.0))
}
# first and last value of each metric should be as expected
ndcg1_values <- c(0.675, 0.725, 0.65, 0.725, 0.75, 0.725, 0.75, 0.725, 0.75, 0.75)
expect_true(all(abs(unlist(eval_results[["ndcg@1"]][["eval"]]) - ndcg1_values) < TOLERANCE))
ndcg2_values <- c(
0.6556574, 0.6669721, 0.6306574, 0.6476294, 0.6629581,
0.6476294, 0.6629581, 0.6379581, 0.7113147, 0.6823008
)
expect_true(all(abs(unlist(eval_results[["ndcg@2"]][["eval"]]) - ndcg2_values) < TOLERANCE))
ndcg3_values <- c(
0.6484639, 0.6571238, 0.6469279, 0.6540516, 0.6481857,
0.6481857, 0.6481857, 0.6466496, 0.7027939, 0.6629898
)
expect_true(all(abs(unlist(eval_results[["ndcg@3"]][["eval"]]) - ndcg3_values) < TOLERANCE))
# check details of each booster
for (bst in cv_bst$boosters) {
dumped_model <- jsonlite::fromJSON(
bst$booster$dump_model()
)
expect_equal(dumped_model[["objective"]], "lambdarank")
expect_equal(dumped_model[["max_feature_idx"]], ncol(train_data) - 1L)
}
})