-
Notifications
You must be signed in to change notification settings - Fork 0
/
fwd_stepwise.R
113 lines (100 loc) · 2.47 KB
/
fwd_stepwise.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
###############################################################################
# Author: Eugene Chuvyrov
###############################################################################
setwd("C:\\Projects\\R")
pdata <- read.table(file="wines.data", sep = ";", header = TRUE)
#put predictors into X
X <- pdata[,1:11]
#put response into Y
Y <- pdata[,12]
#demean X and bring to unit variance
for(i in 1:8){
m <- sum(X[,i])
m <- m/length(X[,i])
X[,i] <- X[,i]-m
v <- var(X[,i])
X[,i] <- X[,i]/sqrt(v)
}
#put X into matrix form
X <- as.matrix(X)
#check covariance
cor(X)
#linear model on full set (train + test)
linMod <- lm(Y ~ X)
linMod
#begin forward step-wise regression modeling work
#pick the first variable
Index <- 1:nrow(X)
colIndex <- 1:11
seBest <- 1000000.0
seArray <- rep(0.0,7)
Xtemp <- X[,1]
nxval <- 12
#loop through all variables
for(iTry in 1:11){
Xtemp <- X[,iTry]
se <- 0.0
for(ixval in 1:nxval){
Iout <- which(Index%%nxval==(ixval-1))
XtempTemp <- Xtemp[-Iout]
Xnew <- Xtemp[Iout]
Ytemp <- Y[-Iout]
Ynew <- Y[Iout]
linMod <- lm(Ytemp ~ XtempTemp)
v <- as.array(linMod$coefficients)
yHat <- rep(0.0,length(Xnew))
for(i in 1:length(Xnew)){
yHat[i] <- v[1] + Xnew[i]*v[2]
}
dY <- yHat -Ynew
seTemp <- (1/length(Xnew))*sum(dY*dY)
se <- se + seTemp/nxval
}
#print(se)
if(se<seBest){
seBest <- se
iBest <- iTry
}
}
seArray[1] <- seBest
I <- iBest
#run through the same calculation for the next 10 variables
for(iStep in 1:10){
colSelection <- colIndex[-I]
seBest <- 1000000
for(iTry in 1:length(colSelection)){
iCols <- c(I,colSelection[iTry])
Xtemp <- as.matrix(X[,iCols])
se <- 0.0
for(ixval in 1:nxval){
Iout <- which(Index%%nxval==(ixval-1))
XtempTemp <- Xtemp[-Iout,]
Xnew <- Xtemp[Iout,]
Ytemp <- Y[-Iout]
Ynew <- Y[Iout]
linMod <- lm(Ytemp ~ XtempTemp)
v <- as.array(linMod$coefficients)
isize <- length(v) - 1
yHat <- rep(0.0,nrow(Xnew))
for(i in 1:nrow(Xnew)){
yHat[i] <- v[1]
for(j in 1:isize){
yHat[i] <- yHat[i] + Xnew[i,j]*v[j+1]
}
}
dY <- yHat - Ynew
seTemp <- ((1/nrow(Xnew))*sum(dY*dY))
se <- se + seTemp/nxval
}
#print(se)
if(se<seBest){
seBest <- se
iBest <- colSelection[iTry]
}
}
I <- c(I,iBest)
print(I)
seArray[iStep + 1] <- seBest
}
plot(sqrt(seArray))
points(sqrt(seArray), pch=".", cex=3, col=2)