Andrew Gelman's Sciences Po Course: Some R code for conference #6

#####################
### conference #6 ###
#####################

library("arm")

###########
### 7.1 ###
###########

### a

f.shots <- function(p.success=.6, hits.stop=2){
shots <- 0
missed <- 0
success <- 0
while (missed < hits.stop){
if (rbinom(1,1,p.success)==1) {
shots <- shots+1
success <- success + 1
missed <- 0
} else {
shots <- shots+1
missed <- missed + 1
}
}
c(shots=shots,success=success)
}
f.shots()

### b
sim.shots <- rep(NA,1000)
mytime <- proc.time()
for (i in 1:1000){
sim.shots[i] <- f.shots()["shots"]
}
proc.time() - mytime

mean(sim.shots)
sd(sim.shots)

# slightly faster with "replicate"
mytime <- proc.time()
sim.shots2 <- replicate(1000, f.shots()["shots"])
proc.time() - mytime

mean(sim.shots2)
sd(sim.shots2)

# theoretical results:

$\alpha=0.6$ is the probability of success
$m= 2 + \frac{\alpha(3-2\alpha)}{(1-\alpha)^2}$
$variance= 4 + \frac{m\alpha(6-4\alpha)}{(1-\alpha)^2} + \frac{\alpha(5-4\alpha)}{(1-\alpha)^2}$
$m=8.75, sd \approx 7.16$

sim.shots.success <- data.frame(shots=rep(NA,1000), success=rep(NA,1000))
for (i in 1:1000){
sim.shots.success[i,] <- f.shots()
}
str(sim.shots.success)
sim.shots.success[1:10,]
sim.shots.success$prop.success <- sim.shots.success$succes/sim.shots.success$shots

mean(sim.shots.success$shots)
mean(sim.shots.success$success)
sd(sim.shots.success$shots)
sd(sim.shots.success$success)

### c
plot(sim.shots.success$shots, sim.shots.success$prop.success)

#############
### BONUS ###
#############

### if what we want is the total number of non missed shots
f.hits <- function(p.success=.6, hits.stop=2){
hits <- 0
missed <- 0
while (missed < hits.stop){
if (rbinom(1,1,p.success)==1) {
hits <- hits+1
missed <- 0
} else {
missed <- missed + 1
}
}
hits
}
f.hits()

sim.hits <- rep(NA,10000)
mytime <- proc.time()
for (i in 1:10000){
sim.hits[i] <- f.hits()
}
proc.time() - mytime

mean(sim.hits)
sd(sim.hits)

## mat10000 <- matrix(c(.6,2), nrow=2, ncol=10000)
## mytime <- proc.time()
## sim.hits2 <- apply(mat10000, 2, f.hits)
## proc.time() - mytime

## mean(sim.hits2)
## sd(sim.hits2)

# theoretical results in that case:
# compute the probability to take n hits by induction
# p(0)=.4*.4
# a sequence leading to n hits starts either by 1 or by 0
# the set of sequences leading to n hits and starting with a 0 is the same as the set of sequences leading to n hits and starting with a 1, but adding a 0 at the beginning of the sequence
# n -> n+1
# the set of sequences leading to n+1 hits and starting with a 1 is the same as the set of sequences leading to n hits and starting with a 1, but adding a 1 at the beginning plus the set of sequences starting with a 1 but adding 10 at the beginning

$p_{n+1}=p_n*(0.6 + 0.6 \times 0.4)$
$p_n=p_0*\alpha^n$
$with ~~ p_0=0.4 \times 0.4 = 0.16$
$and ~~ \alpha=0.6 + 0.6 \times 0.4 = 0.84$

# let H be the random variable equal to the number of hits

$\mathbb{E}(H)=\sum_{n=0}^{+\infty} np(n)=p(0)\sum_{n=0}^{+\infty} n\alpha^n$
$\mathbb{E}(H)=p(0)\frac{\alpha}{(1-\alpha)^2}$
$\mathbb{E}(H)=5.25$

$\mathbb{V}(H)=\mathbb{E}(H^2)-\mathbb{E}(H)^2$
$\mathbb{E}(H^2)=p(0)\sum_{n=0}^{+\infty} n^2\alpha^n$
$\mathbb{E}(H^2)=p(0)\frac{\alpha(1+\alpha)}{(1-\alpha)^3}$
$\sigma(H)=5.73$

###########
### 7.3 ###
###########

# let's assume all random variables are independent, continuous and normally distributed

f.savings <- function(m.unit=5, sd.unit=4, m.market=40000, s.market=10000){
market.size <- round(rnorm(1, mean=m.market, sd=s.market),0)
if (market.size<0) {0 <- market.size}
savings <- rnorm(market.size, mean=m.unit, sd=sd.unit)
sum(savings)
}
f.savings()

total.savings <- rep(NA,1000)
for (i in 1:1000){
total.savings[i] <- f.savings()
}
mean(total.savings)
sd(total.savings)

# the order of magnitude should be about s.market*m.unit=10000*5

# another story:
# let's assume that there are 40000 firms
# each firm will independently enter the market with probability 1/2501
# each firm which enters the market will buy 2501 widgets
# otherwise it will buy 0 widgets

f.savings2 <- function(m.unit=5, sd.unit=4){
market.size <- 2501*rbinom(1,40000,1/2501)
savings <- rnorm(market.size, mean=m.unit, sd=sd.unit)
sum(savings)
}
f.savings2()
total.savings2 <- rep(NA,1000)
for (i in 1:1000){
total.savings2[i] <- f.savings2()
}
mean(total.savings2)
sd(total.savings2)

###########
### 7.8 ###
###########

unidim.sim <- function(estimate, sd.estimate, df, n){
X <- rchisq(n, df=df)
rnorm(n, mean=estimate, sd=sd.estimate*sqrt(df/X))
}
unidim.sim(estimate=600, sd.estimate=400, df=50, n=5)

plot(unidim.sim(estimate=600, sd.estimate=400, df=50, n=1000), unidim.sim(estimate=3, sd.estimate=1, df=100, n=1000))

cost.eff <- rep(NA,1000)
for (i in 1:1000){
cost.eff[i] <- mean(unidim.sim(estimate=600, sd.estimate=400, df=50, n=1000))/mean(unidim.sim(estimate=3, sd.estimate=1, df=100, n=1000))
}

hist(cost.eff)
quantile(cost.eff,probs=c(.025, .25, .75, .975))

cost.eff2 <- rep(NA,1000)
for (i in 1:1000){
cost.eff2[i] <- mean(unidim.sim(estimate=600, sd.estimate=400, df=50, n=1000))/mean(unidim.sim(estimate=3, sd.estimate=2, df=100, n=1000))
}

hist(cost.eff2)
quantile(cost.eff2,probs=c(.025, .25, .75, .975))

Andrew Gelman's Sciences Po Course

Friday, November 6, 2009

Some R code for conference #6

No comments:

Post a Comment

Followers

Blog Archive

Contributors