版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/eye_water/article/details/80552121
在看这篇文章之前如果知道基尼指数的计算公式,就可以接着往下看,如果不知道基尼指数的计算公式,可以看看我之前写的一篇文章决策树的生成–Python代码实现,只需把用Python代码计算基尼指数这部分看完即可
直接上代码吧
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
float Gini(int train_data[2][2][2], int* target_data, int total_size, int index);
int main()
{
int train_data[2][2][2] = {{{1, 1}, {1, 0}}, {{1, 1}, {1, 0}}};/*数据集合,划分为两个子集
{{1, 1}, {1, 0}}和{{1, 1}, {1, 0}}*/
int target_data[2] = {0, 1};//类别 0和1
int total_size, index, local_gini[2];
float gini;
total_size = sizeof(train_data) / sizeof(train_data[0][0]);//数据集合包含的例子
printf("total_size = %d\n", total_size);
for(index=0; index<2; index++){
gini += Gini(train_data, target_data, total_size, index);//计算两个集合内部的基尼指数与相应的系数相乘,相加即可
}
printf("gini = %f\n", gini);
}
float Gini(int train_data[2][2][2], int* target_data, int total_size, int index)
{
int i, j;
int target_classes, count, group_size;
group_size = sizeof(train_data[index]) / sizeof(train_data[index][0]);//子集中包含的例子
target_classes = sizeof(target_data) / sizeof(target_data[0]);//计算有几种类别
float probablity[target_classes], local_probablity, ratio, local_gini=0;
for(i=0; i<target_classes; i++){
count = 0;
for(j=0; j<group_size; j++){
if(train_data[index][j][1] == target_data[i])
count++;
}
printf("count = %d\n", count);
local_probablity = float(count)/float(group_size);
probablity[i] = local_probablity;
printf("probablity = %f\n", probablity[i]);
}//计算概率,并保存在数组里面,方便以后计算
for(i=0; i<target_classes; i++){
local_gini += probablity[i] * (1.0 - probablity[i]);
}//基尼指数计算公式
printf("local_gini = %f\n", local_gini);
ratio = float(group_size) / float(total_size);//系数-->子集中的元素占集合中所有例元素的比例
local_gini = ratio * local_gini;//相乘
printf("ratio * local_gini = %f\n", local_gini);
return local_gini;
}
运行结果
total_size = 4
class = 0
count = 1
probablity = 0.500000
class = 1
count = 1
probablity = 0.500000
local_gini = 0.500000
ratio * local_gini = 0.250000
class = 0
count = 1
probablity = 0.500000
class = 1
count = 1
probablity = 0.500000
local_gini = 0.500000
ratio * local_gini = 0.250000
gini = 0.500000
验证if(train_data[index][j][1] == target_data[i])
可以把train_data[index][j][1]
更换为train_data[index][j][0]
此时计算结果为0