2021SC@SDUSC 项目实训-算法关键内容的实现

2021SC@SDUSC


一、算法代码实现(关键部分)

上篇文章介绍了两个算法,这些算法的实现方式还是比较有难度的。亲和图的实现方式我个人选用的是pandas.Panel(),这个存储结构在新版本中已经弃用,但是使用起来还不错,作为多维存储结构很方便。下面展示几个核心内容的代码内容

def AffinityFunPanelInit(nodeList,
                         labeledFeatureMatrix,
                         unlabeledFeatureMatrix,
                         labeledAQIDict):#AQI亲和图的创建
    
    tempMatrixDict = OrderedDict()

    # construct labeled AQI array
    # has nothing to do with loops
    AQIList = labeledAQIDict.values()
    labeledAQIDiffArray = commonDiffMatrixInit(AQIList, AQIList).ravel()
    funDict = {
    
    

        'commonDiffMatrixInit': commonDiffMatrixInit,
        'geoDiffMatrixInit': geoDiffMatrixInit
    }

    def fff(feature):
    #for feature in labeledFeatureMatrix.columns:

        lList = list(labeledFeatureMatrix[feature])
        uList = list(unlabeledFeatureMatrix[feature])
        
        # function pointer
        if feature == 'rowCol':
            funChosen = 'geoDiffMatrixInit'
            lList = list(map(eval, lList))
            uList = list(map(eval, uList))
        else:
            funChosen = 'commonDiffMatrixInit'

        tempMatrix = np.vstack( ( np.hstack([funDict[funChosen](lList, lList),
                                             funDict[funChosen](uList, lList)]),
                                  np.hstack([funDict[funChosen](lList, uList),
                                             funDict[funChosen](uList, uList)]) ) )
        
        # get (slope, intercept) from linear regression
        labeledFeatureDiffArray = funDict[funChosen](lList, lList).ravel()

        # linear regression
        #print(type(labeledFeatureDiffArray))
        model = LinearRegression(fit_intercept=False).fit(labeledFeatureDiffArray.reshape((-1, 1)), labeledAQIDiffArray)

        entityLinearizeFun = np.vectorize(linearizeFun, otypes=[np.float])

        tempMatrix = entityLinearizeFun(tempMatrix, model.coef_, model.intercept_)
        
        tempMatrix = NORMALIZE_FACTOR * (tempMatrix - tempMatrix.min()) / (tempMatrix.max() - tempMatrix.min())

        tempMatrixList = pd.DataFrame(tempMatrix,
                                               index = nodeList,
                                               columns = nodeList,
                                               dtype = float)
        return tempMatrixList

    #tempMatrixList = accelerator.pool_obj.map(fff, list(labeledFeatureMatrix.columns))
    tempMatrixList = map(fff, list(labeledFeatureMatrix.columns))
    print(tempMatrixList)
    tempMatrixDict = dict(zip(list(labeledFeatureMatrix.columns), tempMatrixList))
    #print(tempMatrixDict)
    
    #print(tempMatrixDict)
    tempMatrixPanel = pd.Panel(tempMatrixDict)

    # for key in tempMatrixPanel.keys():
    #     if key == 'rowCol':
    #         tempMatrixPanel[key] = tempMatrixPanel[key] * 10
    return tempMatrixPanel

#!/usr/bin/python

# libs
import pandas as pd
from collections import OrderedDict
    # absolute function
import math
import time


def AQInf(labeledList,
          unlabeledList,
          timeStamp,
          labeledAQIDict,
          labeledFeatureMatrix,
          unlabeledFeatureMatrix):#AQInf推理模型

    # initialize Pu
    unlabeledDistriMatrix = unlabeledDistriMatrixInit(unlabeledList, MAX_AQI + 1)
    #print(unlabeledDistriMatrix)
    # construct labeledDistriMatrix Pv from labeledAQIDict
    labeledDistriMatrix = labeledDistriMatrixInit(labeledAQIDict, MAX_AQI + 1)

    # construct the node list
    nodeList = labeledList + unlabeledList

    # construct AffinityFunMatrix list from featureDict list
    AffinityFunPanel = AffinityFunPanelInit(nodeList,
                                            labeledFeatureMatrix,
                                            unlabeledFeatureMatrix,
                                            labeledAQIDict)
    #print(AffinityFunPanel)

    # initialize feature weight matrix
    featureWeightMatrixDict = OrderedDict()

    for feature in labeledFeatureMatrix.columns:
        featureWeightMatrixDict[feature] = stringIndexMatrixInit(nodeList, nodeList, 1.0)

    featureWeightPanel = pd.Panel(featureWeightMatrixDict)

    # update weight matrix
    weightMatrix = weightMatrixUpdate(featureWeightPanel, AffinityFunPanel)
    #print(weightMatrix)
    # weightMatrixFilter(weightMatrix, labeledList)

    # calculate old entropy H(Pu)
    lastUnlabeledDistriEntropy = matrixEntropyFun(unlabeledDistriMatrix)

    # assign the old entropy H(Pu) to diff(H(Pu)) and start iteration
    unlabeledDistriEntropyDiff = lastUnlabeledDistriEntropy

    # iteration starts:
    while unlabeledDistriEntropyDiff > CONV_THRESHOLD:

        # update Pik matrix
        featureWeightPanelUpdate(featureWeightPanel, weightMatrix, AffinityFunPanel)
        # update weight matrix
        weightMatrix = weightMatrixUpdate(featureWeightPanel, AffinityFunPanel)
        weightMatrixFilter(weightMatrix, labeledList)

        # update Pu through harmonic function
        unlabeledDistriMatrix = harmonicFun(weightMatrix,
                                            labeledDistriMatrix,
                                            list(unlabeledDistriMatrix.columns))
        #print(unlabeledDistriMatrix)
        # compute entropy of unlabeled distribution matrix
        unlabeledDistriEntropy = matrixEntropyFun(unlabeledDistriMatrix)

        # update the loop conditional statement
        unlabeledDistriEntropyDiff = math.fabs(unlabeledDistriEntropy - lastUnlabeledDistriEntropy)
        lastUnlabeledDistriEntropy = unlabeledDistriEntropy

    return unlabeledDistriMatrix



    for currentTimeStamp in timeStampList:#GEM算法

        # update the 2 node list each time stamp
        tempLabeledList = [ element for element in labeledList if element[0] == currentTimeStamp ]
        #currentLabeledList += tempLabeledList
        superCurrentLabeledList = tempLabeledList

        superLeftUnlabeledList = [ element for element in unlabeledList if element[0] == currentTimeStamp ]

        # update the 2 feature DataFrame
        tempLabeledFeatureMatrix = labeledFeatureTimeStampPanel[currentTimeStamp].copy()
        tempLabeledFeatureMatrix.index = tempLabeledList
        superLabeledFeatureMatrix = superLabeledFeatureMatrix.append(tempLabeledFeatureMatrix)

        superUnlabeledFeatureMatrix = unlabeledFeatureTimeStampPanel[currentTimeStamp].copy()
        superUnlabeledFeatureMatrix.index = superLeftUnlabeledList
        print(superUnlabeledFeatureMatrix)

        # update the labeled AQI dict each time stamp
        tempLabeledAQIDict = OrderedDict( zip( tempLabeledList,
                                               labeledAQITable[ currentTimeStamp : currentTimeStamp ].
                                               values.ravel().tolist() ) )
        #print(labeledAQITable[ currentTimeStamp : currentTimeStamp ].
        #                                       values.ravel().tolist())
        superLabeledAQIDict.update(tempLabeledAQIDict)
        funcstarttime = time.time()
        # for each unlabeled nodes: do GEM
        count_iter = 0
        minEntropyUnlabeled = 0
        minEntropyUnlabeledAQI = 0

        for superRank in range(numToBeRecommend):

            currentLabeledList = superCurrentLabeledList.copy()
            labeledAQIDict = superLabeledAQIDict.copy()
            leftUnlabeledList = superLeftUnlabeledList.copy()
            labeledFeatureMatrix = superLabeledFeatureMatrix.copy()
            unlabeledFeatureMatrix = superUnlabeledFeatureMatrix.copy()
            count_iter = 0
            for currentRank in range(len(leftUnlabeledList), 0, -1):
                count_iter = count_iter +1
                print(count_iter)
                print("------------------------start time: %d------------------------" % funcstarttime)
                unlabeledDistriMatrix = AQInf(currentLabeledList,
                                              leftUnlabeledList,
                                              currentTimeStamp,
                                              labeledAQIDict,
                                              labeledFeatureMatrix,
                                              unlabeledFeatureMatrix)

                nowtime = time.time() - funcstarttime
                funcstarttime = time.time()
                print("------------------------AQInf time: %d------------------------" % nowtime)
                # select the unlabedled node with the min entropy
                (minEntropyUnlabeled, minEntropyUnlabeledAQI) = minEntropyNodeInfer(unlabeledDistriMatrix)
                print("minimum entropy unlabeled: {}".format(minEntropyUnlabeled))
                # give the rank value reversely
                #rankTable[ minEntropyUnlabeled[1] ][currentTimeStamp] = currentRank

                # turn unlabeled to labeled
                currentLabeledList.append(minEntropyUnlabeled)

                # update the labeled AQI dict
                labeledAQIDict[minEntropyUnlabeled] = minEntropyUnlabeledAQI
                #print(minEntropyUnlabeledAQI)

                # exclude the labeled node from the unlabeled list
                leftUnlabeledList.remove(minEntropyUnlabeled)

                # update the 2 feature panels
                labeledFeatureMatrix = labeledFeatureMatrix.append(unlabeledFeatureMatrix[minEntropyUnlabeled : minEntropyUnlabeled])
                unlabeledFeatureMatrix.drop(minEntropyUnlabeled, inplace = True)

            # add the least rank node to final rank table
            rankTable[minEntropyUnlabeled[1]][currentTimeStamp] = superRank + 1

            rankedList.append(minEntropyUnlabeled[1])

            superCurrentLabeledList.append(minEntropyUnlabeled)
            superLabeledAQIDict[minEntropyUnlabeled] = minEntropyUnlabeledAQI
            superLeftUnlabeledList.remove(minEntropyUnlabeled)
            superLabeledFeatureMatrix = superLabeledFeatureMatrix.append(
                labeledFeatureMatrix[minEntropyUnlabeled: minEntropyUnlabeled])
            superUnlabeledFeatureMatrix.drop(minEntropyUnlabeled, inplace=True)


猜你喜欢

转载自blog.csdn.net/m0_46306466/article/details/125231565