# w = vae_2d_ckpt["state_dict"][key_2d]
conv2d = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=0)
conv3d = nn.Conv3d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=0)
#conv2d weight
w_2d = conv2d.weight.data
w_3d = conv3d.weight.data
print(w_2d.shape)
print(w_3d.shape)
# kernel_matrix = (num_kernels, num_filters_perkernel, (Time,) Height, Width)
w_3d = torch.zeros(w_3d.shape, dtype=w_2d.dtype)
w_3d = w_2d.unsqueeze(2).expand_as(w_3d) # average initialization
w_3d[:,:,-1,:,:] = w_2d # tail initialization
w_3d[:, :, w_3d.shape[2]//2, :, :] = w_2d # center initialization
print(w_3d.shape)
conv3d.weight.data = w_3d