-
Notifications
You must be signed in to change notification settings - Fork 0
/
exp-iso1.R
113 lines (93 loc) · 3.51 KB
/
exp-iso1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# Simulation code corresponding to Figure 2 of the paper (Scenario 1)
library(devtools)
setwd("../npparam/")
load_all()
library(npparam)
source("../sim-npparam/synthetic.R")
source("../sim-npparam/estimators.R")
simulate.one <- function(n){
data <- lipschitz.fn(sample.size = n, sigma = 0.1)
cat("Fitting isotonic for sample size", n, "\n")
iso.est <- iso.lin.est(data$x, data$y)
cat("Fitting isotonic (CV) for sample size", n, "\n")
iso.est.cv <- iso.lin.est(data$x, data$y, run.optim = FALSE)
cat("Fitting sieve for sample size", n, "\n")
sieve.est <- .fit.sieve(data$x, data$y)
cat("Fitting GBM for sample size", n, "\n")
gbm.est <- .fit.gbm(data$x, data$y)
if(n > 2000){
cat("Skipping KRR for sample size", n, "\n")
krr.est <- NULL
cat("Skipping random forest for sample size", n, "\n")
rf.est <- NULL
}else{
cat("Fitting KRR for sample size", n, "\n")
krr.est <- .fit.krr(data$x, data$y)
cat("Fitting random forest for sample size", n, "\n")
rf.est <- .fit.random.forest(data$x, data$y)
}
out <- list("iso.est"=iso.est,
"iso.est.cv"=iso.est.cv,
"sieve.est"=sieve.est,
"krr.est"=krr.est,
"rf.est"=rf.est,
"gbm.est"=gbm.est)
out
}
run.experiment <- function(seed, n){
set.seed(seed)
test.x <- runif(1e4)
data <- lipschitz.fn(sample.size = 10, sigma = 0.1)
test.y <- data$fn(test.x)
df <- data.frame(matrix(ncol = 5, nrow = 0))
x <- c("n", "seed", "risk", "estimator", "L")
colnames(df) <- x
res <- replicate(10, simulate.one(n), simplify = FALSE)
est.risk <- unlist(lapply(res, function(m){
mean((predict(m$iso.est, test.x)-test.y)^2)}))
est.L <- unlist(lapply(res, function(m){m$iso.est$l.value}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="iso+lin",
L=est.L)
df <- rbind(df, out.df)
est.risk <- unlist(lapply(res, function(m){
mean((predict(m$iso.est.cv, test.x)-test.y)^2)}))
est.L <- unlist(lapply(res, function(m){m$iso.est.cv$l.value}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="iso+lin cv",
L=est.L)
df <- rbind(df, out.df)
est.risk <- unlist(lapply(res, function(m){
mean((m$sieve.est(test.x)-test.y)^2)}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="sieve",
L=0)
df <- rbind(df, out.df)
est.risk <- unlist(lapply(res, function(m){
mean((m$gbm.est(test.x)-test.y)^2)}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="gbm",
L=0)
df <- rbind(df, out.df)
if (n <= 2000){
est.risk <- unlist(lapply(res, function(m){
mean((m$krr.est(test.x)-test.y)^2)}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="krr",
L=0)
df <- rbind(df, out.df)
est.risk <- unlist(lapply(res, function(m){
mean((m$rf.est(test.x)-test.y)^2)}))
out.df <- data.frame(n=n, seed=seed, risk=est.risk, estimator="rf",
L=0)
df <- rbind(df, out.df)
}
return(df)
}
# call Rscript cross_validation.R 100 500
args <- commandArgs(trailingOnly = TRUE)
seed <- as.numeric(args[1]) # seed
cat("Base seed = ", seed, "\n")
sample.n <- as.numeric(args[2]) # seed
cat("Sample size = ", sample.n, "\n")
base.dir <- "../sim-npparam/results"
save.dir <- file.path(base.dir, "iso-exp1")
dir.create(save.dir)
out.df <- run.experiment(seed, sample.n)
filename <- paste0("seed", seed, "N", sample.n, ".RData")
save(out.df, file=file.path(save.dir, filename))