Friday, October 2, 2009

some R code for conference #2

### Conference de methode #2

library("arm")

###########
### 3.1 ###
###########
getwd()
setwd("/home/argon/ScPo")

# first try... does not work
#Pyth <- read.table("exercise2.1.dat")
#str(Pyth)
#Pyth


Pyth <- read.table("exercise2.1.dat", header=T)
str(Pyth)
Pyth



summary(Pyth[1:40,])

fit.Pyth <- lm(y ~ x1 + x2, data=Pyth[1:40,])
display(fit.Pyth)

## try to understand what kind of objects are fit.Pyth and coef(fit.Pyth)
attributes(fit.Pyth)
attributes(coef(fit.Pyth))
class(coef(fit.Pyth))
is.vector(coef(fit.Pyth))
# coef(fit.Pyth) is a named vector
coef(fit.Pyth)["x1"]
coef(fit.Pyth)[2]

## 2 graph windows instead of one
par(mfrow=c(1,2))


plot(Pyth$x1[1:40], Pyth$y[1:40], cex=.25)
# curve takes a function of 'x' as an argument
# here this function is given as the product of 2 vectors in the sense of the matrix multiplication
# the choice of the intercepts is explained below
curve(cbind(1,x,mean(Pyth$x2[1:40])) %*% coef(fit.Pyth), add=T )

plot(Pyth$x2[1:40], Pyth$y[1:40], cex=.25)
curve(cbind(1,mean(Pyth$x1[1:40]),x) %*% coef(fit.Pyth), add=T )

plot(Pyth$x1[1:40], Pyth$x2[1:40], cex=.25)

# residual plots versus x1, x2, and predicted values
summary(residuals(fit.Pyth))
str(residuals(fit.Pyth))

plot(Pyth$x1[1:40], residuals(fit.Pyth), cex=.25)
abline(c(0,0))
abline(c(sd(residuals(fit.Pyth)),0), lty=2)
abline(c(-sd(residuals(fit.Pyth)),0), lty=2)
# errors are bigger at both ends of x1

plot(Pyth$x2[1:40], residuals(fit.Pyth), cex=.25)
abline(c(0,0))
abline(c(sd(residuals(fit.Pyth)),0), lty=2)
abline(c(-sd(residuals(fit.Pyth)),0), lty=2)
# not very centered

plot(fitted.values(fit.Pyth)[1:40], residuals(fit.Pyth), cex=.25)
abline(c(0,0))
abline(c(sd(residuals(fit.Pyth)),0), lty=2)
abline(c(-sd(residuals(fit.Pyth)),0), lty=2)
# quite ok

pred.Pyth <- predict(fit.Pyth, Pyth[41:60,], interval="prediction", level=.95)
pred.Pyth

### choice of the intercepts for the two separate graphs of y against x1 and x2
# the graph of x1 against x2 as well as a regression of x1 on x2 seem to indicate that they are independant
# so it makes sense to use the coefficients of the regression of y against x1 and x2
# to draw two separate graphs of y on x1 and x2
# the slopes are given by the regression but the intercepts have to be recalculated
# for each graph we want the line to be centered.
#
# the fitted model is:
#

#
# the estimated coefficients are

# graph of y against x1:
# the line is of the form:
#

# we define
#

# we want the line to be centered in the sense that
#
# since there is a constant term in the regression, we know that:
#

# therefore:
#

# and we want to find c1 such that
#

# therefore we take
#


# for the graph of y against x2, it's symmetrical



###########
### 3.2 ###
###########

#(a)
# l(earnings)=\alpha + \beta*log(height) + error
# alpha_hat + beta_hat * log(66)=log(30000)
# beta_hat=.8
log(30000)-.8*log(66)
# alpha_hat=6.96
# 2*sigma_hat=1.1
# sigma_hat=.55

###########
### 3.3 ###
###########

var1 <- rnorm(1000, 0,1)
var2 <- rnorm(1000, 0,1)
display(lm(var2 ~ var1))

z.scores <- rep(NA,1000)
for (k in 1:1000){
var1 <- rnorm(1000, 0,1)
var2 <- rnorm(1000, 0,1)
fit <- lm(var2 ~ var1)
z.scores[k] <- coef(fit)[2]/se.coef(fit)[2]
}



length(which(abs(z.scores)>2))

No comments:

Post a Comment