把之前的计算信息熵的代码给打包为一个class,创建实例也成功了,但是在将dataframe对象传入方法时却报错了
代码如下:
class entropy:
def __init__(self,datafr):
self.datafr=datafr
def quchonghanshu(one_list):
return list(set(one_list))
def lisan_NO_imformation_S(self,jieguo,string,attributes_num):
all_sum=np.array(self.datafr[string]).shape[0]#参数在这里修改,要求得到这一columns的
temp_dic={}
for i in jieguo:
count=0
temp_dic[i]=count
for s in range(all_sum):
if self.datafr.ix[s,string]==i:
count=count+1
if count==0:
count=1
temp_dic[i]=count
countlog=0
for i in jieguo:
countlog=countlog+temp_dic[i]/all_sum*math.log(temp_dic[i]/all_sum,
attributes_num)
countlog=-countlog
return countlog
def lisan_condition_imformation_S(self,attributes_value,condition_string,
result_string,result_count):
temp_list=[]
for k in range(len(self.datafr.index)):
if self.datafr.ix[k,condition_string]==attributes_value:
temp_list.append(self.datafr.ix[k,result_string])
series=pd.Series(temp_list)
dataframe=pd.DataFrame(series,columns=[result_string])
temp_list=quchonghanshu(temp_list)
iu=lisan_NO_imformation_S(temp_list,dataframe,result_string,result_count)
return iu
def lisan_imformation_S(self,condition_string,result_string,result1_count):
all_sum1=np.array(self.datafr[condition_string]).shape[0]
temp_dic1={}
for i in quchonghanshu(list(self.datafr[condition_string])):
count1=0
for s in range(all_sum1):
if self.datafr.ix[s,condition_string]==i:
count1=count1+1
temp_dic1[i]=count1
count1_average=0
for i in quchonghanshu(list(self.datafr[condition_string])):
count1_average=count1_average+temp_dic1[i]/all_sum1*lisan_condition_imformation_S(self.datafr,i,condition_string,result_string,result1_count)
return count1_average