EM,SEM算法操作实例:《Statistical Analysis with Missing Data》习题9.1 & 9.2

一、题目

Example 9.1 & 9.2

重现书中Example 9.1与9.2。

先贴出SEM算法:

SEM

SEM

下面是Example 9.1与Example 9.2原例:

Example 9.1



Example 9.2



二、解答

a)Example 9.1

赋一些初值:

y1 <- 38
y2 <- 34

eps <- 1e-30     # 当新旧两次\theta的差距小于eps时,迭代停止
theta_t <- 0     # 参数\theta的初值
theta_diff <- 1  # 由于下面使用了while语句,所以需要给两个\theta的差赋初值

下面开始进行第一次迭代循环,估计参数并实时输出,直至新旧两次 θ \theta 的差距小于eps时,迭代停止。

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  theta_t <- theta_t_new
  print(theta_t)
}

经过EM算法迭代,估计出的 θ \theta

(theta_hat <- theta_t)

下面进行DM的估计(注意:这里设置的eps误差是 1 0 8 10^{-8} ,而前面是 1 0 30 10^{-30} 。因为必须要前面EM的精度要远高于后面SEM的精度,后面估计DM时,才能稳定收敛,不然SEM只有中间一部分稳定。同样的在做EX9.2时,也用了相同的处理方法):

# 设置初值
eps <- 1e-8
theta_t <- 0
theta_diff <- 1

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  
  DM <- (theta_t_new - theta_hat) / (theta_t - theta_hat) # 这一步是相对于前面EM多出来的一步
  theta_t <- theta_t_new
  print(DM) # 看DM的迭代过程
}

最后我们进行 V c o m V_{com} 的估计,结果与书上一致。

扫描二维码关注公众号,回复: 4304849 查看本文章
y3_hat <- 125 * (theta_hat / 4) / (theta_hat / 4 + 0.5)
V_com <- theta_hat * (1 - theta_hat) / (y1 + y2 + y3_hat)
(V_obs <- V_com / (1 - DM))

下面计算进行logit变换后,重新估计参数与 V o b s V_{obs}

# 定义变换函数
logit <- function(x) return(log(x / (1 - x)))

# 重新估计参数,直接进行变换
(logit(theta_hat))

重新进行SEM迭代与估计:

# 参数初始化
eps <- 1e-8
theta_t <- 0
theta_diff <- 1

while(theta_diff > eps) {
  y3_t <- 125 * (theta_t / 4) / (theta_t / 4 + 0.5)
  theta_t_new <- (34 + y3_t) / (38 + 34 + y3_t)
  theta_diff <- abs(theta_t_new - theta_t)
  
  DM_logit <- (logit(theta_t_new) - logit(theta_hat)) / (logit(theta_t) - logit(theta_hat)) # 注意,这里全部进行变换
  theta_t <- theta_t_new
  print(DM_logit)
}

y3_hat <- 125 * (theta_hat / 4) / (theta_hat / 4 + 0.5)

# 要格外注意,这里有Fisher信息量的变换公式,是需要除以变换函数导数的平方
V_com <- 1 / (y1 + y2 + y3_hat) / (theta_hat * (1 - theta_hat)) 
(V_obs <- V_com / (1 - DM_logit))

最后的估计结果与书上相差一个量级,感觉可能是书上写错了。

b)Example 9.2

首先加载两个在计算过程中需要用到的两个包:

library(matlib) # sweep operator 需要用到
library(gtools) # permutations 需要用到

初始化一些参数,与EM里面涉及到的变换需要用到的函数。

# 定义y1, y2
y1 <- c(8, 6, 11, 22, 14, 17, 18, 24, 19, 23, 26, 40, 4, 4, 5, 6, 8, 10)
y2 <- c(59, 58, 56, 53, 50, 45, 43, 42, 39, 38, 30, 27, rep(NA, 6))
ind_na <- which(is.na(y2))

# 初始化参数
n <- length(y1)
p <- 2
eps <- 1e-20

# 定义转换函数
Z <- function(rho) {
  return(0.5 * log((1 + rho) / (1 - rho)))
}

# 定义逆变换函数
Z_inv <- function(z) {
  return((exp(2 * z) - 1) / (exp(2 * z) + 1))
}

EM计算构建函数(按照课本第十一章高维正态的公式进行编写):

EM1 <- function(param) {
  # 转化参数,主要需要注意\rho,\sigma_1,\sigma_2之间的关系
  param_trans <- param
  param_trans[3] <- exp(param[3])
  param_trans[5] <- exp(param[4])
  param_trans[4] <- Z_inv(param[5]) * (param_trans[3] * param_trans[5]) ^ (1/2)

  # 输入参数
  param_mat <- matrix(nrow = p + 1, ncol = p + 1)
  param_mat[lower.tri(param_mat, diag = T)] <- c(-1, param_trans)
  param_mat[upper.tri(param_mat)] <- param_mat[lower.tri(param_mat)]
  
  # sweep operator
  swp_result <- swp(param_mat, 2)
  
  y2_new <- y2
  y2_new[ind_na] <- swp_result[1, 3] + swp_result[2, 3] * y1[ind_na] 
  
  mu1 <- mean(y1)
  mu2 <- mean(y2_new)
  
  sigma11 <- cov(y1, y1) * (n - 1) / n
  sigma12 <- cov(y1, y2_new) * (n - 1) / n
  sigma22 <- cov(y2_new, y2_new) * (n - 1) / n + swp_result[3, 3] * 6 / n # 后面那块儿很关键,之前忘记加了,结果一直输出错误
  
  mu2_diff <- abs(param_trans[2] - mu2)
  
  # 参数转换
  param <- c(mu1, mu2, log(sigma11), log(sigma22), Z(sigma12 * (sigma11 * sigma22) ^ (-1/2)))
  
  return(list(param = param, mu2_diff = mu2_diff))
}

进行参数估计(如果中间加个print显示迭代输出过程,可以发现 μ 1 \mu_1 log ( σ 11 ) \text{log}(\sigma_{11}) 一步就会收敛):

param <- 1:5
mu2_diff <- 1
while(mu2_diff > eps) {
  EM_result <- EM1(param)
  param <- EM_result$param
  
  # print(param)
  mu2_diff <- EM_result$mu2_diff
}

(final_param <- param)

输出结果与书上一致。下面进行DM的估计以及 Δ V \Delta V^* 的估计:

SEM <- function(param, i, j) {
  # 转化参数
  param_trans <- param
  param_trans[3] <- exp(param[3])
  param_trans[5] <- exp(param[4])
  param_trans[4] <- Z_inv(param[5]) * (param_trans[3] * param_trans[5]) ^ (1/2)
  
  # 输入参数
  param_mat <- matrix(nrow = p + 1, ncol = p + 1)
  param_mat[lower.tri(param_mat, diag = T)] <- c(-1, param_trans)
  param_mat[upper.tri(param_mat)] <- param_mat[lower.tri(param_mat)]
  
  # sweep operator
  swp_result <- swp(param_mat, 2)
  
  y2_new <- y2
  y2_new[ind_na] <- swp_result[1, 3] + swp_result[2, 3] * y1[ind_na]
  
  mu1 <- mean(y1)
  mu2 <- mean(y2_new)
  
  sigma11 <- cov(y1, y1) * (n - 1) / n
  sigma12 <- cov(y1, y2_new) * (n - 1) / n
  sigma22 <- cov(y2_new, y2_new) * (n - 1) / n + swp_result[3, 3] * 6 / n
  
  mu2_diff <- abs(param_trans[2] - mu2)
  param <- c(mu1, mu2, log(sigma11), log(sigma22), Z(sigma12 * (sigma11 * sigma22) ^ (-1/2)))
  
  # SEM核心添加的步骤
  theta_t_i <- final_param
  theta_t_i[i] <- param[i]
  theta_t1_i_j <- EM1(theta_t_i)$param[j]
  r_ij <- (theta_t1_i_j - final_param[j]) / (param[i] - final_param[i])
  
  return(list(param = param, mu2_diff = mu2_diff, r_ij = r_ij))
}


DM <- function(x) {
  i <- x[1]
  j <- x[2]
  
  param_all <- param <- 1:5
  mu2_diff <- 1
  eps <- 1e-5
  
  while(mu2_diff > eps) {
    EM_result <- SEM(param, i, j)
    param <- EM_result$param
    
    # print(EM_result$r_ij)
    mu2_diff <- EM_result$mu2_diff
  }
  return(EM_result$r_ij)
}

查看最终输出的DM矩阵:

ind_param <- permutations(5, 2, 1:5, repeats = TRUE)
ind3 <- c(2, 4, 5)
(DM_mat <- matrix(apply(ind_param, 1, DM), 5, byrow = TRUE)[ind3, ind3])

找到SEM的原始论文,核对后发现DM估计与论文上的结果一致。

G1 <- matrix(c(4.9741, 0, 0, 0.1111), 2)
G2 <- matrix(c(-5.0387, 0, 0, 0.0890, 0, -0.0497), 2)
G3 <- matrix(c(6.3719, 0, 0, 0, 0.1111, -0.0497, 0, -0.0497, 0.0556), 3)

(Delta_V_star <- (G3 - t(G2) %*% solve(G1) %*% G2) %*% DM_mat %*% solve(diag(p + 1) - DM_mat))
# round(Delta_V_star, 3)

最后计算 Δ V \Delta V^* ,与书上结果几乎一致。(理论上应该是对称矩阵,显示有一点点不对称应该是由于计算精度造成,保留小数点后3位看起来就是对称的了)

猜你喜欢

转载自blog.csdn.net/weixin_41929524/article/details/84644542
9.2