4 Finding the Variance and Weights

 mobility <- read.csv("http://www.stat.cmu.edu/~cshalizi/mreg/15/lectures/24--25/mobility2.csv")
      plot(Mobility ~ Population, data=mobility, log="x", ylim=c(0,0.5))
      
      mobility$MobSE <- with(mobility, sqrt(Mobility*(1-Mobility)/Population))
      plot(Mobility ~ Commute, data=mobility,
           xlab="Fraction of workers with short commutes",
           ylab="Rate of economic mobility", pch=19, cex=0.2)
      with(mobility, segments(x0=Commute, y0=Mobility+2*MobSE,
                              x1=Commute, y1=Mobility-2*MobSE, col="blue"))
      mob.lm <- lm(Mobility ~ Commute, data=mobility)
      mob.wlm <- lm(Mobility ~ Commute, data=mobility, weight=1/MobSE^2)
      abline(mob.lm)
      abline(mob.wlm, col="blue")
      

      





Figure 10: Mobility versus the fraction of workers with short commute, with ±2standard deviation error bars (vertical blue bars), and the OLS linear fit (black line) andweighted least squares (blue line). Note that the error bars for some larger communitiesare smaller than the diameter of the dots.







 plot(x,residuals(fit.ols)^2,ylab="squared residuals")
      curve((1+x^2/2)^2,col="grey",add=TRUE)
      var1 <- smooth.spline(x=x, y=log(residuals(fit.ols)^2), cv=TRUE)
      grid.x <- seq(from=min(x),to=max(x),length.out=300)
      lines(grid.x, exp(predict(var1,x=grid.x)$y))
      
      
      fit.wls1 <- lm(y~x,weights=1/exp(var1$y))
      coefficients(fit.wls1)
      var2 <- smooth.spline(x=x, y=log(residuals(fit.wls1)^2), cv=TRUE)
      
      fit.wls2 <- lm(y~x,weights=1/exp(var2$y))
      coefficients(fit.wls2)
      var3 <- smooth.spline(x=x, y=log(residuals(fit.wls2)^2), cv=TRUE)
      
      fit.wls3 <- lm(y~x,weights=1/exp(var3$y))
      coefficients(fit.wls3)
      
      var4 <- smooth.spline(x=x, y=log(residuals(fit.wls3)^2), cv=TRUE)
      fit.wls4 <- lm(y~x,weights=1/exp(var4$y))
      coefficients(fit.wls4)
      
      
      
      fit.wls1 <- lm(y~x,weights=1/exp(var1$y))
      par(mfrow=c(1,2))
      plot(x,y)
      abline(a=3,b=-2,col="grey")
      abline(fit.ols,lty="dashed")
      abline(fit.wls1,lty="dotted")
      plot(x,(residuals(fit.ols))^2,ylab="squared residuals")
      points(x,residuals(fit.wls1)^2,pch=15)
      lines(grid.x, exp(predict(var1,x=grid.x)$y))
      var2 <- smooth.spline(x=x, y=log(residuals(fit.wls1)^2), cv=TRUE)
      curve((1+x^2/2)^2,col="grey",add=TRUE)
      lines(grid.x, exp(predict(var2,x=grid.x)$y),lty="dotted")
      par(mfrow=c(1,1))
      
      
   




iterative.wls <- function(x,y,tol=0.01,max.iter=100) {
iteration <- 1
old.coefs <- NA
regression <- lm(y~x)
coefs <- coefficients(regression)
while (is.na(old.coefs) ||
((max(abs(coefs - old.coefs)) > tol) && (iteration < max.iter))) {
variance <- smooth.spline(x=x, y=log(residuals(regression)^2), cv=TRUE)
old.coefs <- coefs
iteration <- iteration+1
regression <- lm(y~x,weights=1/exp(variance$y))
coefs <- coefficients(regression)
}
return(list(regression=regression,variance=variance,iterations=iteration))

}


猜你喜欢

转载自blog.csdn.net/taojiea1014/article/details/80530699