4 Finding the Variance and Weights

mobility <- read.csv("http://www.stat.cmu.edu/~cshalizi/mreg/15/lectures/24--25/mobility2.csv")
plot(Mobility ~ Population, data=mobility, log="x", ylim=c(0,0.5))

mobility$MobSE <- with(mobility, sqrt(Mobility*(1-Mobility)/Population))
plot(Mobility ~ Commute, data=mobility,
xlab="Fraction of workers with short commutes",
ylab="Rate of economic mobility", pch=19, cex=0.2)
with(mobility, segments(x0=Commute, y0=Mobility+2*MobSE,
x1=Commute, y1=Mobility-2*MobSE, col="blue"))
mob.lm <- lm(Mobility ~ Commute, data=mobility)
mob.wlm <- lm(Mobility ~ Commute, data=mobility, weight=1/MobSE^2)
abline(mob.lm)
abline(mob.wlm, col="blue")

Figure 10: Mobility versus the fraction of workers with short commute, with ±2standard deviation error bars (vertical blue bars), and the OLS linear fit (black line) andweighted least squares (blue line). Note that the error bars for some larger communitiesare smaller than the diameter of the dots.

plot(x,residuals(fit.ols)^2,ylab="squared residuals")
curve((1+x^2/2)^2,col="grey",add=TRUE)
var1 <- smooth.spline(x=x, y=log(residuals(fit.ols)^2), cv=TRUE)
grid.x <- seq(from=min(x),to=max(x),length.out=300)
lines(grid.x, exp(predict(var1,x=grid.x)$y))


fit.wls1 <- lm(y~x,weights=1/exp(var1$y))
coefficients(fit.wls1)
var2 <- smooth.spline(x=x, y=log(residuals(fit.wls1)^2), cv=TRUE)

fit.wls2 <- lm(y~x,weights=1/exp(var2$y))
coefficients(fit.wls2)
var3 <- smooth.spline(x=x, y=log(residuals(fit.wls2)^2), cv=TRUE)

fit.wls3 <- lm(y~x,weights=1/exp(var3$y))
coefficients(fit.wls3)

var4 <- smooth.spline(x=x, y=log(residuals(fit.wls3)^2), cv=TRUE)
fit.wls4 <- lm(y~x,weights=1/exp(var4$y))
coefficients(fit.wls4)



fit.wls1 <- lm(y~x,weights=1/exp(var1$y))
par(mfrow=c(1,2))
plot(x,y)
abline(a=3,b=-2,col="grey")
abline(fit.ols,lty="dashed")
abline(fit.wls1,lty="dotted")
plot(x,(residuals(fit.ols))^2,ylab="squared residuals")
points(x,residuals(fit.wls1)^2,pch=15)
lines(grid.x, exp(predict(var1,x=grid.x)$y))
var2 <- smooth.spline(x=x, y=log(residuals(fit.wls1)^2), cv=TRUE)
curve((1+x^2/2)^2,col="grey",add=TRUE)
lines(grid.x, exp(predict(var2,x=grid.x)$y),lty="dotted")
par(mfrow=c(1,1))

iterative.wls <- function(x,y,tol=0.01,max.iter=100) {
iteration <- 1
old.coefs <- NA
regression <- lm(y~x)
coefs <- coefficients(regression)
while (is.na(old.coefs) ||
((max(abs(coefs - old.coefs)) > tol) && (iteration < max.iter))) {
variance <- smooth.spline(x=x, y=log(residuals(regression)^2), cv=TRUE)
old.coefs <- coefs
iteration <- iteration+1
regression <- lm(y~x,weights=1/exp(variance$y))
coefs <- coefficients(regression)
}
return(list(regression=regression,variance=variance,iterations=iteration))

}

4 Finding the Variance and Weights

猜你喜欢