2021SC@SDUSC
文章目录
一、算法代码实现(关键部分)
上篇文章介绍了两个算法,这些算法的实现方式还是比较有难度的。亲和图的实现方式我个人选用的是pandas.Panel(),这个存储结构在新版本中已经弃用,但是使用起来还不错,作为多维存储结构很方便。下面展示几个核心内容的代码内容
def AffinityFunPanelInit(nodeList,
labeledFeatureMatrix,
unlabeledFeatureMatrix,
labeledAQIDict):#AQI亲和图的创建
tempMatrixDict = OrderedDict()
# construct labeled AQI array
# has nothing to do with loops
AQIList = labeledAQIDict.values()
labeledAQIDiffArray = commonDiffMatrixInit(AQIList, AQIList).ravel()
funDict = {
'commonDiffMatrixInit': commonDiffMatrixInit,
'geoDiffMatrixInit': geoDiffMatrixInit
}
def fff(feature):
#for feature in labeledFeatureMatrix.columns:
lList = list(labeledFeatureMatrix[feature])
uList = list(unlabeledFeatureMatrix[feature])
# function pointer
if feature == 'rowCol':
funChosen = 'geoDiffMatrixInit'
lList = list(map(eval, lList))
uList = list(map(eval, uList))
else:
funChosen = 'commonDiffMatrixInit'
tempMatrix = np.vstack( ( np.hstack([funDict[funChosen](lList, lList),
funDict[funChosen](uList, lList)]),
np.hstack([funDict[funChosen](lList, uList),
funDict[funChosen](uList, uList)]) ) )
# get (slope, intercept) from linear regression
labeledFeatureDiffArray = funDict[funChosen](lList, lList).ravel()
# linear regression
#print(type(labeledFeatureDiffArray))
model = LinearRegression(fit_intercept=False).fit(labeledFeatureDiffArray.reshape((-1, 1)), labeledAQIDiffArray)
entityLinearizeFun = np.vectorize(linearizeFun, otypes=[np.float])
tempMatrix = entityLinearizeFun(tempMatrix, model.coef_, model.intercept_)
tempMatrix = NORMALIZE_FACTOR * (tempMatrix - tempMatrix.min()) / (tempMatrix.max() - tempMatrix.min())
tempMatrixList = pd.DataFrame(tempMatrix,
index = nodeList,
columns = nodeList,
dtype = float)
return tempMatrixList
#tempMatrixList = accelerator.pool_obj.map(fff, list(labeledFeatureMatrix.columns))
tempMatrixList = map(fff, list(labeledFeatureMatrix.columns))
print(tempMatrixList)
tempMatrixDict = dict(zip(list(labeledFeatureMatrix.columns), tempMatrixList))
#print(tempMatrixDict)
#print(tempMatrixDict)
tempMatrixPanel = pd.Panel(tempMatrixDict)
# for key in tempMatrixPanel.keys():
# if key == 'rowCol':
# tempMatrixPanel[key] = tempMatrixPanel[key] * 10
return tempMatrixPanel
#!/usr/bin/python
# libs
import pandas as pd
from collections import OrderedDict
# absolute function
import math
import time
def AQInf(labeledList,
unlabeledList,
timeStamp,
labeledAQIDict,
labeledFeatureMatrix,
unlabeledFeatureMatrix):#AQInf推理模型
# initialize Pu
unlabeledDistriMatrix = unlabeledDistriMatrixInit(unlabeledList, MAX_AQI + 1)
#print(unlabeledDistriMatrix)
# construct labeledDistriMatrix Pv from labeledAQIDict
labeledDistriMatrix = labeledDistriMatrixInit(labeledAQIDict, MAX_AQI + 1)
# construct the node list
nodeList = labeledList + unlabeledList
# construct AffinityFunMatrix list from featureDict list
AffinityFunPanel = AffinityFunPanelInit(nodeList,
labeledFeatureMatrix,
unlabeledFeatureMatrix,
labeledAQIDict)
#print(AffinityFunPanel)
# initialize feature weight matrix
featureWeightMatrixDict = OrderedDict()
for feature in labeledFeatureMatrix.columns:
featureWeightMatrixDict[feature] = stringIndexMatrixInit(nodeList, nodeList, 1.0)
featureWeightPanel = pd.Panel(featureWeightMatrixDict)
# update weight matrix
weightMatrix = weightMatrixUpdate(featureWeightPanel, AffinityFunPanel)
#print(weightMatrix)
# weightMatrixFilter(weightMatrix, labeledList)
# calculate old entropy H(Pu)
lastUnlabeledDistriEntropy = matrixEntropyFun(unlabeledDistriMatrix)
# assign the old entropy H(Pu) to diff(H(Pu)) and start iteration
unlabeledDistriEntropyDiff = lastUnlabeledDistriEntropy
# iteration starts:
while unlabeledDistriEntropyDiff > CONV_THRESHOLD:
# update Pik matrix
featureWeightPanelUpdate(featureWeightPanel, weightMatrix, AffinityFunPanel)
# update weight matrix
weightMatrix = weightMatrixUpdate(featureWeightPanel, AffinityFunPanel)
weightMatrixFilter(weightMatrix, labeledList)
# update Pu through harmonic function
unlabeledDistriMatrix = harmonicFun(weightMatrix,
labeledDistriMatrix,
list(unlabeledDistriMatrix.columns))
#print(unlabeledDistriMatrix)
# compute entropy of unlabeled distribution matrix
unlabeledDistriEntropy = matrixEntropyFun(unlabeledDistriMatrix)
# update the loop conditional statement
unlabeledDistriEntropyDiff = math.fabs(unlabeledDistriEntropy - lastUnlabeledDistriEntropy)
lastUnlabeledDistriEntropy = unlabeledDistriEntropy
return unlabeledDistriMatrix
for currentTimeStamp in timeStampList:#GEM算法
# update the 2 node list each time stamp
tempLabeledList = [ element for element in labeledList if element[0] == currentTimeStamp ]
#currentLabeledList += tempLabeledList
superCurrentLabeledList = tempLabeledList
superLeftUnlabeledList = [ element for element in unlabeledList if element[0] == currentTimeStamp ]
# update the 2 feature DataFrame
tempLabeledFeatureMatrix = labeledFeatureTimeStampPanel[currentTimeStamp].copy()
tempLabeledFeatureMatrix.index = tempLabeledList
superLabeledFeatureMatrix = superLabeledFeatureMatrix.append(tempLabeledFeatureMatrix)
superUnlabeledFeatureMatrix = unlabeledFeatureTimeStampPanel[currentTimeStamp].copy()
superUnlabeledFeatureMatrix.index = superLeftUnlabeledList
print(superUnlabeledFeatureMatrix)
# update the labeled AQI dict each time stamp
tempLabeledAQIDict = OrderedDict( zip( tempLabeledList,
labeledAQITable[ currentTimeStamp : currentTimeStamp ].
values.ravel().tolist() ) )
#print(labeledAQITable[ currentTimeStamp : currentTimeStamp ].
# values.ravel().tolist())
superLabeledAQIDict.update(tempLabeledAQIDict)
funcstarttime = time.time()
# for each unlabeled nodes: do GEM
count_iter = 0
minEntropyUnlabeled = 0
minEntropyUnlabeledAQI = 0
for superRank in range(numToBeRecommend):
currentLabeledList = superCurrentLabeledList.copy()
labeledAQIDict = superLabeledAQIDict.copy()
leftUnlabeledList = superLeftUnlabeledList.copy()
labeledFeatureMatrix = superLabeledFeatureMatrix.copy()
unlabeledFeatureMatrix = superUnlabeledFeatureMatrix.copy()
count_iter = 0
for currentRank in range(len(leftUnlabeledList), 0, -1):
count_iter = count_iter +1
print(count_iter)
print("------------------------start time: %d------------------------" % funcstarttime)
unlabeledDistriMatrix = AQInf(currentLabeledList,
leftUnlabeledList,
currentTimeStamp,
labeledAQIDict,
labeledFeatureMatrix,
unlabeledFeatureMatrix)
nowtime = time.time() - funcstarttime
funcstarttime = time.time()
print("------------------------AQInf time: %d------------------------" % nowtime)
# select the unlabedled node with the min entropy
(minEntropyUnlabeled, minEntropyUnlabeledAQI) = minEntropyNodeInfer(unlabeledDistriMatrix)
print("minimum entropy unlabeled: {}".format(minEntropyUnlabeled))
# give the rank value reversely
#rankTable[ minEntropyUnlabeled[1] ][currentTimeStamp] = currentRank
# turn unlabeled to labeled
currentLabeledList.append(minEntropyUnlabeled)
# update the labeled AQI dict
labeledAQIDict[minEntropyUnlabeled] = minEntropyUnlabeledAQI
#print(minEntropyUnlabeledAQI)
# exclude the labeled node from the unlabeled list
leftUnlabeledList.remove(minEntropyUnlabeled)
# update the 2 feature panels
labeledFeatureMatrix = labeledFeatureMatrix.append(unlabeledFeatureMatrix[minEntropyUnlabeled : minEntropyUnlabeled])
unlabeledFeatureMatrix.drop(minEntropyUnlabeled, inplace = True)
# add the least rank node to final rank table
rankTable[minEntropyUnlabeled[1]][currentTimeStamp] = superRank + 1
rankedList.append(minEntropyUnlabeled[1])
superCurrentLabeledList.append(minEntropyUnlabeled)
superLabeledAQIDict[minEntropyUnlabeled] = minEntropyUnlabeledAQI
superLeftUnlabeledList.remove(minEntropyUnlabeled)
superLabeledFeatureMatrix = superLabeledFeatureMatrix.append(
labeledFeatureMatrix[minEntropyUnlabeled: minEntropyUnlabeled])
superUnlabeledFeatureMatrix.drop(minEntropyUnlabeled, inplace=True)