c语言计算基尼指数

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/eye_water/article/details/80552121

在看这篇文章之前如果知道基尼指数的计算公式,就可以接着往下看,如果不知道基尼指数的计算公式,可以看看我之前写的一篇文章决策树的生成–Python代码实现,只需把用Python代码计算基尼指数这部分看完即可

直接上代码吧

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

float Gini(int train_data[2][2][2], int* target_data, int total_size, int index);

int main()
{
    int train_data[2][2][2] = {{{1, 1}, {1, 0}}, {{1, 1}, {1, 0}}};/*数据集合,划分为两个子集 
    {{1, 1}, {1, 0}}和{{1, 1}, {1, 0}}*/
    int target_data[2] = {0, 1};//类别 0和1 
    int total_size, index, local_gini[2];
    float gini;
    total_size = sizeof(train_data) / sizeof(train_data[0][0]);//数据集合包含的例子 
    printf("total_size = %d\n", total_size);
    for(index=0; index<2; index++){
        gini += Gini(train_data, target_data, total_size, index);//计算两个集合内部的基尼指数与相应的系数相乘,相加即可 
    }
    printf("gini = %f\n", gini);
}

float Gini(int train_data[2][2][2], int* target_data, int total_size, int index)
{
    int i, j;
    int target_classes, count, group_size;
    group_size = sizeof(train_data[index]) / sizeof(train_data[index][0]);//子集中包含的例子 
    target_classes = sizeof(target_data) / sizeof(target_data[0]);//计算有几种类别 
    float probablity[target_classes], local_probablity, ratio, local_gini=0;
    for(i=0; i<target_classes; i++){
        count = 0;
        for(j=0; j<group_size; j++){
            if(train_data[index][j][1] == target_data[i])
                count++;
        }
        printf("count = %d\n", count);
        local_probablity = float(count)/float(group_size);
        probablity[i] = local_probablity;
        printf("probablity = %f\n", probablity[i]);
    }//计算概率,并保存在数组里面,方便以后计算 
    for(i=0; i<target_classes; i++){
        local_gini += probablity[i] * (1.0 - probablity[i]);
    }//基尼指数计算公式
    printf("local_gini = %f\n", local_gini);
    ratio = float(group_size) / float(total_size);//系数-->子集中的元素占集合中所有例元素的比例 
    local_gini = ratio * local_gini;//相乘 
    printf("ratio * local_gini = %f\n", local_gini);
    return  local_gini;
}

运行结果

total_size = 4
class = 0
count = 1
probablity = 0.500000
class = 1
count = 1
probablity = 0.500000
local_gini = 0.500000
ratio * local_gini = 0.250000
class = 0
count = 1
probablity = 0.500000
class = 1
count = 1
probablity = 0.500000
local_gini = 0.500000
ratio * local_gini = 0.250000
gini = 0.500000

验证if(train_data[index][j][1] == target_data[i])可以把train_data[index][j][1] 更换为train_data[index][j][0]此时计算结果为0

猜你喜欢

转载自blog.csdn.net/eye_water/article/details/80552121