API_Net官方代码之创建模型

导入包：

import torch
from torch import nn
from torchvision import models
import numpy as np
from skimage import io

设置device：

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

1）计算批量样本通过resnet101后输出的批量向量的距离：

计算方法是两个向量之间对应元素相减，再平方，再求和，使用完全平方公式，（a-b)² = (a²-2ab+b²)，因此可以通过展开计算，因为通过展开计算能够批量计算，输入为batch_size个vector，输出为batch_sizebatch_size的矩阵，表示两两向量之间的距离。
并且下面的式子在计算时用到了broadcast，第一项计算后是bb大小，后面的两个均为一维向量，通过view增加一维后与第一项相加，就可以广播到增加的维度上。view通过将一个变为列向量，一个为行向量实现了对应相加。

def pdist(vectors):#计算每两个特征向量之间的距离
    distance_matrix = -2 * vectors.mm(torch.t(vectors)) + vectors.pow(2).sum(dim=1).view(1, -1) + vectors.pow(2).sum(
        dim=1).view(-1, 1)
    return distance_matrix

2）建立模型：

此模型包含三个函数，init()、forward()、get_pairs()，接下来依次介绍：

init()函数：

class API_Net(nn.Module):
    def __init__(self):
        super(API_Net, self).__init__()

        resnet101 = models.resnet101(pretrained=True)
        layers = list(resnet101.children())[:-2]
        #AdaptiveAvgPool2d(output_size=(1, 1)), Linear(in_features=2048, out_features=1000, bias=True)]
        #layers最后输出为 2048层layers
        self.conv = nn.Sequential(*layers)
        self.avg = nn.AvgPool2d(kernel_size=14, stride=1) #2048vectors
        
        self.map1 = nn.Linear(2048 * 2, 512)#共同向量
        self.map2 = nn.Linear(512, 2048)
        
        self.fc = nn.Linear(2048, 200)
        self.drop = nn.Dropout(p=0.5)
        self.sigmoid = nn.Sigmoid()

forward()函数：

    def forward(self, images, targets=None, flag='train'):
        conv_out = self.conv(images) #(b, 2048, 14, 14)
        pool_out = self.avg(conv_out).squeeze() #vector (b,2048)

        if flag == 'train':
            intra_pairs, inter_pairs, \
                    intra_labels, inter_labels = self.get_pairs(pool_out, targets)
        #返回的是intra_pairs (4,2)，第一列为原向量的索引，1234，第二列为类内与之对应的最相近的向量的索引，
		#intra_labels为(4,2)，其第一列为intra_pairs的第一列向量对应的labels，其第二列为intra_pairs的第二列向量对应的labels
'''
下方是将同类和不同类的放在一块，同类的第一列与不同类的第一列叠加起来，luo起来，形成批次，也就是4张图，生成4个向量，这四个向量对应与之同类的有4
个，可能在这里解释不通，因为样本少，但是在每类不止一个样本的时候就能找到每个向量对应的类内距离最近的向量，同样也能找到其对应的不同类4个向量
'''
            features1 = torch.cat([pool_out[intra_pairs[:, 0]], pool_out[inter_pairs[:, 0]]], dim=0)
            #主向量，也就是intra_pairs以及inter_pairs的左边一列索引对应的向量
            features2 = torch.cat([pool_out[intra_pairs[:, 1]], pool_out[inter_pairs[:, 1]]], dim=0)
            #与主向量对应的向量，也就是intra_pairs以及inter_pairs的右边一列索引对应的向量
            #如此操作是为了批量计算，一行内有两个向量，分别来自feature1中的与feature2中的对应的向量
            labels1 = torch.cat([intra_labels[:, 0], inter_labels[:, 0]], dim=0)#其对应的是feature1的向量的标签（8，1）
            labels2 = torch.cat([intra_labels[:, 1], inter_labels[:, 1]], dim=0)#其对应的是feature2的向量的标签（8，1）


            mutual_features = torch.cat([features1, features2], dim=1) #（8，）
            map1_out = self.map1(mutual_features)
            map2_out = self.drop(map1_out)
            map2_out = self.map2(map2_out)#（8，2048）

			#上面每对形成一个mutual_feature，下面一个mutual_feature形成两个gate
            gate1 = torch.mul(map2_out, features1)#点cheng
            gate1 = self.sigmoid(gate1)

            gate2 = torch.mul(map2_out, features2)
            gate2 = self.sigmoid(gate2)

            features1_self = torch.mul(gate1, features1) + features1 
            features1_other = torch.mul(gate2, features1) + features1

            features2_self = torch.mul(gate2, features2) + features2
            features2_other = torch.mul(gate1, features2) + features2

            logit1_self = self.fc(self.drop(features1_self))
            logit1_other = self.fc(self.drop(features1_other))
            logit2_self = self.fc(self.drop(features2_self))
            logit2_other = self.fc(self.drop(features2_other))

            return logit1_self, logit1_other, logit2_self, logit2_other, labels1, labels2
            #其中feature1对应着labels1，同时feature2对应着labels2
        elif flag == 'val':
            return self.fc(pool_out)

get_pairs()函数：

此函数的重点是一批次图像经过网络得到的batch个vectors，计算其间距离，在类间以及类内距离最近的一个vector组成pair，因此对于每个vector，都对应两个vector，分别是类内距离最近、类间距离最近的vector。对于类别数为n_class，每类样本数为n_sampler，总共样本为n_classn_sampler，那么就一共有2n_class*n_sampler对。

== 此处如果不懂的话，可以通过画图可见==
在这里插入图片描述
其中每张图与类内以及类间都有比较，也就是最后得到的self_feature是(8,200)前四个是类内的差异得到的特征，后四个是类间的差异得到的特征。

    def get_pairs(embeddings, labels): 
  
 		# embeddings是一个batch中的vectors，而labels是标记,如batch=4，以下按照4来推算
	    distance_matrix = pdist(embeddings).detach().cpu().numpy()
	    #4个向量间的距离，4*4,并且这个计算不参与梯度传播，将其从计算图上detach下来
	
	    labels = labels.detach().cpu().numpy().reshape(-1, 1)
	    num = labels.shape[0]  # number of labels

		#类内的vector，根据距离找每个vector对应的vector
	    dia_inds = np.diag_indices(num)
	    lb_eqs = (labels == labels.T) #同类的为T，包含对角线上也为T，类间的也就是不同标签的为F 
	    lb_eqs[dia_inds] = False #得到的全是相同的类，此处的操作是刨除自身与自身的距离
	    dist_same = distance_matrix.copy() 
	    dist_same[lb_eqs == False] = np.inf 
	    #将类间向量的距离都设置为无穷大，也就是不干扰类内的向量的选择，类内向量的选择为距离与其最小的一个，因为同类中可能有多个向量
	    intra_idxs = np.argmin(dist_same, axis=1) #对于类内的向量找与其距离最近的向量，得到的是其索引，其中dist_same的第k行代表第k
	    #个vector与第0～n个vector之间的距离，因此每行求解最小距离vector对应的label就是与其成对的vector。
	
	    #类间的vector，就是与类内的反过来，因为不同标签的是F，同标签的是T 
	    dist_diff = distance_matrix.copy()
	    lb_eqs[dia_inds] = True#对角线的设置为T ，此时所有的T 对应的均是类内的
	    dist_diff[lb_eqs == True] = np.inf#类内的距离全设置为inf
	    inter_idxs = np.argmin(dist_diff, axis=1)
	
		#下面的是将类内的对与类间的对分别统计其索引，也就是vector在batch_size中的索引，以及对应的真实标签
	    intra_pairs = np.zeros([embeddings.shape[0], 2])  #(4,2)
	    inter_pairs = np.zeros([embeddings.shape[0], 2]) #(4,2)
	    intra_labels = np.zeros([embeddings.shape[0], 2])
	    inter_labels = np.zeros([embeddings.shape[0], 2])
	    for i in range(embeddings.shape[0]):
	        #类内：两竖列，第一列代表原vector，第二列代表其成对的vector索引
	        intra_labels[i, 0] = labels[i] # 1，2，3，4
	        intra_labels[i, 1] = labels[intra_idxs[i]]#intra_idxs对应的也是batch_size中的索引，然后对应到labels中，因为labels中也是按照batch_size的顺序排列的
	        intra_pairs[i, 0] = i
	        intra_pairs[i, 1] = intra_idxs[i]
	
	        #类间
	        inter_labels[i, 0] = labels[i]
	        inter_labels[i, 1] = labels[inter_idxs[i]]
	        inter_pairs[i, 0] = i
	        inter_pairs[i, 1] = inter_idxs[i]
	
	    intra_labels = torch.from_numpy(intra_labels).long().to(device)#刚才的那些计算都不在计算图上，并且都在cpu上进行
	    intra_pairs = torch.from_numpy(intra_pairs).long().to(device)
	    inter_labels = torch.from_numpy(inter_labels).long().to(device)
	    inter_pairs = torch.from_numpy(inter_pairs).long().to(device)
	
	    return intra_pairs, inter_pairs, intra_labels, inter_labels  #返回的是intra_pairs (4,2)，第一列为原向量的索引，1234，第二列为类内与之对应的最相近的向量的索引，2，3，4，5
	#intra_labels为(4,2)，其第一列为intra_pairs的第一列向量对应的labels，其第二列为intra_pairs的第二列向量对应的labels