library(makedummies)
setwd("/Users/skatayama/R") ##データは各自の設定下で読みこむ
##stringsAsFactorsをTrueにしておく.Factor型と読んでくれないので.
credit <- read.csv("Credit.csv",header=T,row.names=1,stringsAsFactors=T)
dcredit <- makedummies(credit)
y <- dcredit[,"Balance"]
rss0 <- sum((y-mean(y))^2)
rss <- rep(0,11)
for(j in 1:11){
x1 <- cbind(rep(1,length(y)),dcredit[,j])
x1 <- as.matrix(x1)
beta1 <- solve(t(x1)%*%x1)%*%t(x1)%*%y
rss[j] <- sum((y-x1%*%beta1)^2)
}
(m1 <- names(dcredit)[which.min(rss)])
##選ばれた順にオブジェクトmodelsに追加していく
models <- "Rating"
x1 <- as.matrix(cbind(rep(1,length(y)),dcredit[,m1]))
for(k in 1:10){
rss <- rep(0,11-k)
dcredit0 <- dcredit[,-which(names(dcredit) %in% models)]
for(j in 1:(11-k)){
x2 <- as.matrix(cbind(x1,dcredit0[,j]))
beta2 <- solve(t(x2)%*%x2)%*%t(x2)%*%y
rss[j] <- sum((y-x2%*%beta2)^2)
}
models <- c(models,names(dcredit0)[which.min(rss)])
x1 <- as.matrix(cbind(rep(1,length(y)),dcredit[,models]))
}
models
#Cp規準を最小にするモデルを求める
xf <- as.matrix(cbind(rep(1,length(y)),dcredit[,1:11]))
betaf <- solve(t(xf)%*%xf)%*%t(xf)%*%y
rssf <- sum((y-xf%*%betaf)^2)
sighat <- sum((y - xf%*%betaf)^2)/(400-11-1)
cp <- rep(0,12)
cp[1] <- rss0
cp[12] <- rssf + log(length(y))*sighat*11
for(j in 2:11){
x <- cbind(rep(1,length(y)),dcredit[,models[1:(j-1)]])
x <- as.matrix(x); beta <- solve(t(x)%*%x)%*%t(x)%*%y
rss <- sum((y-x%*%beta)^2)
cp[j] <- rss + log(length(y))*sighat*(j-1)
}
models[1:which.min(cp)]
credit <- read.csv("Credit.csv",header=T,row.names=1,stringsAsFactors=T)
dcredit <- makedummies(credit)
y <- dcredit[,"Balance"]
x <- dcredit[,1:11]
y <- y-mean(y)
x <- scale(x, center=T,scale=F)
lambda <- 0.5
b.ridge <- solve(t(x) %*% x + lambda * diag(11)) %*% t(x) %*% y
b.ridge
Income | -7.7995690 |
---|---|
Limit | 0.1897533 |
Rating | 1.1531803 |
Cards | 17.6082799 |
Age | -0.6184436 |
Education | -1.0525056 |
Gender | 10.3991254 |
Student | 419.7395264 |
Married | -8.8343121 |
Ethnicity_Asian | 16.7904729 |
Ethnicity_Caucasian | 9.8852659 |
ridge <- function(lambda){
solve(t(x)%*%x + lambda*diag(11))%*%t(x)%*%y
}
cand <- seq(0,1000,length=100) ##lambdaの候補を用意
sols <- sapply(cand, function(x){ridge(x)}) ##用意したlambdaに対する推定値を一気に計算
matplot(cand,t(sols),type="l")
matplot(cand,t(sols),type="l",ylim=c(-10,10)) ##yを制限して拡大してみる
cda <- function(y,x,lambda){
n <- dim(x)[1]; p <- dim(x)[2]
covx <- t(x)%*%x; covxy <- t(x)%*%y ##t(X)X and t(X)y
beta <- rep(0,p) ##betaの初期値
judge <- 100 ##収束を判定する
z <- rep(0,p); old <- rep(0,p)
repeat{
if(judge >= 0.001){
for(j in 1:p){
}
judge <- sum(abs(beta-old))
}else break
}
return(beta)
}
##toy data
x <- matrix(rnorm(100*20),100,20)
beta <- c(c(1,1,1),rep(0,17))
y <- x %*% beta + rnorm(100)
cda(y,x,15)
##solution path
lambdas <- seq(0.001,max(abs(t(x)%*%y)),length=100)
betas <- sapply(lambdas,function(lam){round(cda(y,x,lam),4)})
matplot(lambdas,t(betas),type="l",lwd=2,ylab="beta")