Tuesday, October 6, 2009

pieces of R code for conference #3

########################
##### Exercise 4.1 #####
########################

### a
exp(-.25)
exp(.25)

### b
a <- -3.5
b <- 2
error.sd <- .25
earnings.n <- 1000

height.in <- rnorm(earnings.n, mean=69.1, sd=2.9)

l.height.in <- log(height.in)

l.weight.error <- rnorm(earnings.n, mean=0, sd=error.sd)

l.weight.lbs <- a + b*l.height.in + l.weight.error

display(lm(l.weight.lbs ~ l.height.in))

plot(l.height.in, l.weight.lbs, cex=.25,
xlab="log(height) in inches",
ylab="log(weight) in pounds")
curve(cbind(1,x) %*% c(a,b), add=T)
curve(cbind(1,x) %*% c(a+error.sd,b), add=T, lty=2)
curve(cbind(1,x) %*% c(a-error.sd,b), add=T, lty=2)

########################
##### Exercise 4.2 #####
########################
library ("foreign")
library("arm")

### a
Heights <- read.dta ("heights.dta")
str(Heights)
summary(Heights)

##### piece of code taken and adapted from earnings_setup.R #####

attach(Heights)

# create variables for age and ethnicity categories

age <- 90 - yearbn # survey was conducted in 1990
age[age<18] <- NA
age.category <- ifelse (age<35, 1, ifelse (age<50, 2, 3))
eth <- ifelse (race==2, 1, ifelse (hisp==1, 2, ifelse (race==1, 3, 4)))
male <- 2 - sex

# (for simplicity) remove cases with missing data
# and restrict to people with positive earnings born after 1925

ok <- !is.na (earn+height+sex+age) & earn>0 & yearbn>25
heights.clean <- as.data.frame (cbind (earn, height, sex, race, hisp, ed, age, age.category, eth, male)[ok,])
n <- nrow (heights.clean)

###############################
detach(Heights)
remove(age, age.category, eth, male)
attach(heights.clean)

### b
earnings.fit1 <- lm(earn ~ height)
display(earnings.fit1)

c.height <- height - mean(height)
earnings.fit2 <- lm(earn ~ c.height)
display(earnings.fit2)

### c
l.earn=log(earn)
l.height=log(height)
age2=age*age
earnings.fit3 <- lm(l.earn ~ male + height + age)
display(earnings.fit3, digits=4)

earnings.fit4 <- lm(l.earn ~ male + height + age + male:height)
display(earnings.fit4, digits=4)

earnings.fit5 <- lm(l.earn ~ male + c.height + age + male:c.height)
display(earnings.fit5, digits=4)

z.height=c.height/sd(height)
age.25 <- age-25
earnings.fit6 <- lm(l.earn ~ male + z.height + age.25 + male:z.height + male:age.25)
display(earnings.fit6, digits=4)

earnings.fit7 <- lm(l.earn ~ male + z.height + age.25 + male:z.height + male:age.25 + age.25:z.height)
display(earnings.fit7, digits=4)


########################
##### Exercice 4.3 #####
########################


plot(c(1.8,8), c(100,200),
xlab="age",
ylab="weight (in pounds)",
type='n'
)
curve(cbind(1,x) %*% c(161,2.6), add=T)
curve(cbind(1,x,x^2) %*% c(96.2,33.6,-3.2), add=T, col="red")
curve(cbind(1,(x>=3.0 & x<4.5),(x>=4.5 & x<6.5),(x>=6.5) ) %*% c(157.2,19.1, 27.2, 8.5), add=T, col="green")

No comments:

Post a Comment