-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
cross_validation.R
72 lines (64 loc) · 2.14 KB
/
cross_validation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
library(lightgbm)
# load in the agaricus dataset
data(agaricus.train, package = "lightgbm")
data(agaricus.test, package = "lightgbm")
dtrain <- lgb.Dataset(agaricus.train$data, label = agaricus.train$label)
dtest <- lgb.Dataset.create.valid(dtrain, data = agaricus.test$data, label = agaricus.test$label)
nrounds <- 2L
param <- list(
num_leaves = 4L
, learning_rate = 1.0
, objective = "binary"
)
print("Running cross validation")
# Do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric
lgb.cv(
param
, dtrain
, nrounds
, nfold = 5L
, eval = "binary_error"
)
print("Running cross validation, disable standard deviation display")
# do cross validation, this will print result out as
# [iteration] metric_name:mean_value+std_value
# std_value is standard deviation of the metric
lgb.cv(
param
, dtrain
, nrounds
, nfold = 5L
, eval = "binary_error"
, showsd = FALSE
)
# You can also do cross validation with cutomized loss function
print("Running cross validation, with cutomsized loss function")
logregobj <- function(preds, dtrain) {
labels <- get_field(dtrain, "label")
preds <- 1.0 / (1.0 + exp(-preds))
grad <- preds - labels
hess <- preds * (1.0 - preds)
return(list(grad = grad, hess = hess))
}
# User-defined evaluation function returns a pair (metric_name, result, higher_better)
# NOTE: when you do customized loss function, the default prediction value is margin
# This may make built-in evalution metric calculate wrong results
# For example, we are doing logistic loss, the prediction is score before logistic transformation
# Keep this in mind when you use the customization, and maybe you need write customized evaluation function
evalerror <- function(preds, dtrain) {
labels <- get_field(dtrain, "label")
preds <- 1.0 / (1.0 + exp(-preds))
err <- as.numeric(sum(labels != (preds > 0.5))) / length(labels)
return(list(name = "error", value = err, higher_better = FALSE))
}
# train with customized objective
lgb.cv(
params = param
, data = dtrain
, nrounds = nrounds
, obj = logregobj
, eval = evalerror
, nfold = 5L
)