将原FCBO算法计算新的形式概念的函数,即Compute Closure函数并行化。但是由于CPU与GPU的数据交换开销太大,并行化没有带来加速效果。
fcbo.h文件
//fcbo.h文件,包括串行fcbo算法所需的函数与变量
//在Visual Studio 2015编译通过
#pragma once
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#define BIT ((unsigned long)1) //unsigned long类型的1
#define NULL_LONG ((unsigned long)0) //unsigned long类型的0
#define INT_SIZE (sizeof(int))
#define LONG_SIZE (sizeof(unsigned long))
#define ARCHBIT ((LONG_SIZE * 8) - 1) // 无符号长整形换成二进制位后的最高位index
#define BYTE_COUNT_A (LONG_SIZE * int_count_a) // 一个object的所有属性对应的长整型数占用的字节数
#define BYTE_COUNT_O (LONG_SIZE * int_count_o) //一个属性的所有object对应的长整型数占用的字节数
#define BUFFER_BLOCK 1024
int attributes = 0; // 属性个数
int objects = 0; // 对象个数
int int_count_a = 0; //属性数对应多少个无符号长整型数
int int_count_o = 0; //对象数对应多少个无符号长整型数
int table_entries = 0; //形式背景中的X的个数
int min_support = 0; //阈值
unsigned long *context;
unsigned long **cols;
int *supps; //具有某一属性的对象个数,supps[5]=10代表有10个对象具有属性5
int *attrib_numbers; //attrib_numbers[i]=i
unsigned long upto_bit[ARCHBIT + 1];
int attr_offset = 0; //属性偏移量,默认为0,若属性为1-xxxx,则偏移量为1
FILE *in_file; //输入文件
FILE *out_file; //输出文件
int verbosity_level = 3;
struct stats_t
{
int total;
int closures;
int fail_canon;
int fail_fcbo;
int fail_support;
} stats = { 0, 0, 0, 0, 0 };
LARGE_INTEGER nFreq;
LARGE_INTEGER time_start, time_inner, time_end;
int *buff = NULL;
int buff_index = 0;
size_t buff_size = BUFFER_BLOCK;
int get_next_integer(FILE *file, int *value)
{
int ch = ' ';
*value = -1;
//未到文件尾时,跳过非数字字符,当读到换行符时返回1
while ((ch != EOF) && ((ch < '0') || (ch > '9')))
{
ch = fgetc(file);
if (ch == '\n')
return 1;
}
//文件尾 返回0
if (ch == EOF)
return 0;
*value = 0;
//读取一个数字(可能是n位数)
while ((ch >= '0') && (ch <= '9'))
{
*value *= 10;
*value += ch - '0';
ch = fgetc(file);
}
ungetc(ch, file);
*value -= attr_offset;
return 1;
}
void allocate_buffer(int **buffer, int size)
{
if (*buffer)
*buffer = (int *)realloc(*buffer, INT_SIZE * size);
else
*buffer = (int *)malloc(INT_SIZE * size);
if (!*buffer)
{
fprintf(stderr, "Cannot reallocate buffer, quitting.");
exit(3);
}
}
#define PUSH_NEW_INTEGER(__value) \
{ \
if (buff_index >= buff_size) \
{ \
buff_size += BUFFER_BLOCK; \
allocate_buffer(&buff, buff_size); \
} \
buff[buff_index] = (__value); \
buff_index++; \
}
//read_file结束后,buff数组用-1分割两行,某行的数据代表第x个object具有哪些属性
void read_file(FILE *file)
{
int last_value = -1, value = 0, last_attribute = -1, last_object = -1;
allocate_buffer(&buff, buff_size); //buff是一维数组,初始大小1024
while (get_next_integer(file, &value)) //取所有整数,每一行用-1分隔
{
if ((value < 0) && (last_value < 0)) //一行到下一行的过渡,last_value此时为-1
continue;
if (value < 0) //一行结束
{
last_object++;
PUSH_NEW_INTEGER(-1);
}
else
{
if (value > last_attribute)
last_attribute = value;
PUSH_NEW_INTEGER(value);
}
last_value = value;
}
if (last_value >= 0) //当文末没有换行符,最后一个数字直接接EOF时,运行该段代码
{
last_object++;
PUSH_NEW_INTEGER(-1);
}
objects = last_object + 1;
attributes = last_attribute + 1;
}
void create_context()
{
int i = 0, row = 0;
int_count_a = (attributes / (ARCHBIT + 1)) + 1; //用1bit代表一条属性时,多少个长整数可以代表所有属性
int_count_o = (objects / (ARCHBIT + 1)) + 1; //用1bit代表一个对象时,多少个长整数可以代表所有对象
context = (unsigned long *)malloc(LONG_SIZE * int_count_a * objects);
if (!context)
{
fprintf(stderr, "Cannot allocate bitcontext, quitting.");
exit(5);
}
memset(context, 0, LONG_SIZE * int_count_a * objects);
supps = (int *)malloc(sizeof(int) * attributes);
memset(supps, 0, sizeof(int) * attributes);
for (i = 0; i < buff_index; i++)
{
if (buff[i] < 0) //每个-1标志一行数据
{
row++;
continue;
}
context[row * int_count_a + (buff[i] / (ARCHBIT + 1))] |= (BIT << (ARCHBIT - (buff[i] % (ARCHBIT + 1))));
//用二进制表示形式背景,例如,某一行0 4 6,可表示为context[0] = ...1010001
//context[0]到context[int_count_a]表示object0的所有属性,context[int_count_a]-context[2*int_count_a-1]表示object1的所有属性
supps[buff[i]]++;
table_entries++;
}
if (verbosity_level >= 2)
fprintf(stderr, "objects: %6i\nattributes: %4i\nentries: %8i\n", objects, attributes, table_entries);
}
void initialize_output()
{
int i;
attrib_numbers = (int *)malloc(sizeof(int) * attributes);
for (i = 0; i < attributes; i++)
attrib_numbers[i] = i;
}
void print_attributes(unsigned long *set)
{
int i, j, c;
int first = 1;
if (verbosity_level <= 0)
return;
for (c = j = 0; j < int_count_a; j++)
{
for (i = ARCHBIT; i >= 0; i--)
{
if (set[j] & (BIT << i))
{
if (!first)
fprintf(out_file, " ");
fprintf(out_file, "%i", attrib_numbers[c]);
first = 0;
}
c++;
if (c >= attributes)
goto out;
}
}
out:
fprintf(out_file, "\n");
}
int cols_compar(const void *a, const void *b) //如果具有a属性的对象个数比具有b属性的对象个数多,则将a排在左边
{
int x, y;
x = supps[*(int const *)a];
y = supps[*(int const *)b];
return (x < y) ? -1 : ((x > y) ? 1 : 0);
}
int rows_compar(const void *a, const void *b)
{
int i;
for (i = 0; i < int_count_a; i++)
if (((unsigned long *)a)[i] < ((unsigned long *)b)[i])
return -1;
else if (((unsigned long *)a)[i] > ((unsigned long *)b)[i])
return 1;
return 0;
}
void sort_context()
{
int i, j, k, x, y, z, ii, jj, a, aa;
unsigned long *new_context;
qsort(attrib_numbers, attributes, sizeof(int), cols_compar); //按属性对应的对象数降序排列属性
qsort(context, objects, BYTE_COUNT_A, rows_compar); //具有更多属性的object排在前面
}
void initialize_algorithm() //和context数组内容类似,但内容是object的0和1,且结构为指向指针的指针
{
int i, j, k, x, y;
unsigned long *ptr, mask, *cols_buff;
for (i = 0; i <= ARCHBIT; i++)
{
upto_bit[i] = NULL_LONG;
for (j = ARCHBIT; j > i; j--)
upto_bit[i] |= (BIT << j); //upto_bit[i]元素的二进制位中,从右向左有(i+1)个0,其他全为1
}
cols_buff = (unsigned long *)malloc(LONG_SIZE * int_count_o * attributes); //一维数组,int_count_a个数表示一条属性对应的所有对象,转为二进制后0,1表示对象是否具有该属性
memset(cols_buff, 0, LONG_SIZE * int_count_o * attributes);
cols = (unsigned long **)malloc(sizeof(unsigned long *) * attributes); //attributes个指针,每个指针指向一个一个无符号整数数组
ptr = cols_buff;
//cols[0]到cols[int_count_o]表示属性0的所有对象,cols[int_count_o]-context[2*int_count_o-1]表示属性1的所有对象
for (k = j = 0; j < int_count_a; j++) //j用来索引某对象的所有属性
for (i = ARCHBIT; i >= 0; i--, k++) //i索引位的标记
{
if (k >= attributes)
return;
mask = (BIT << i);
cols[k] = ptr;
for (x = 0, y = j; x < objects; x++, y += int_count_a) //x是object的索引,遍历每个object的第(archbit-i)属性
if (context[y] & mask) //代表object x具有attribute_numbers[ARCHBIT-i]
ptr[x / (ARCHBIT + 1)] |= BIT << (x % (ARCHBIT + 1));
ptr += int_count_o; //处理下一个属性
} //处理下一个无符号长整数
}
void compute_closure(unsigned long *intent, unsigned long *extent, unsigned long *prev_extent, unsigned long *attr_extent)
{
int i, j, k, l;
stats.closures++;
memset(intent, 0xFF, BYTE_COUNT_A);
if (attr_extent)
{
for (k = 0; k < int_count_o; k++)
{
extent[k] = prev_extent[k] & attr_extent[k]; //即算法中把A和{j}'的交集赋值给C,'代表闭包推导符
if (extent[k])
for (l = 0; l <= ARCHBIT; l++)
{
if (extent[k] >> l) //属性没处理完,extent中还有1
{
if ((extent[k] >> l) & BIT)
{
for (i = 0, j = int_count_a * (k * (ARCHBIT + 1) + l); i < int_count_a; i++, j++)
intent[i] &= context[j]; //将C'赋值给D(每次循环&一个对象,多次循环求所有object公共属性)
}
}
else
break;
}
}
}
else
{
memset(extent, 0xFF, BYTE_COUNT_O);
for (j = 0; j < objects; j++) //与所有object相与,求出公共属性
{
for (i = 0; i < int_count_a; i++)
intent[i] &= context[int_count_a * j + i]; //intent包含所有属性,概念没有的用0表示,有的用1表示
}
}
}
kernel.cu文件
//包括GPU运行的3个核函数,GPU端变量的声明与初始化
//对fcbo算法的改动主要在generate_from_node函数中
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "fcbo.h"
#define THREAD_NUM 8 //线程数
__device__ unsigned long *d_context; //设备端形式背景
__constant__ int d_int_count_o; //设备端对象数对应的无符号整数个数
__constant__ int d_int_count_a; //设备端属性数对应的无符号整数个数
unsigned long *d_context_tmp; //用来初始化GPU变量的临时变量
unsigned long *d_intent, *d_extent; //设备端外延与内涵
unsigned long *d_new_extent, *d_new_intent; //设备端函数参数中的新外延与新内涵
unsigned long *d_attr_extent;
unsigned long *d_temp; //存放GPU多线程计算的临时结果
__global__ void kernel(unsigned long *intent, unsigned long *temp)
{
int i = threadIdx.x;
for (int j = 0; j < THREAD_NUM; j++)
{
intent[i] &= temp[i * THREAD_NUM + j]; //每个线程计算一个无符号整数里8个32位数据相与的结果
}
}
__global__ void init_extent(unsigned long *extent, unsigned long *prev_extent, unsigned long *attr_extent)
{
int k = threadIdx.x;
extent[k] = prev_extent[k] & attr_extent[k]; //即算法中把A和{j}'的交集赋值给C,'代表闭包推导符,每个线程处理一个无符号整数
}
__global__ void init_attr(unsigned long *extent, unsigned long *temp)
{
int i, j, k, m;
int t = threadIdx.x;
for (k = 0; k < d_int_count_o; k++)
{
for (m = ((ARCHBIT+1)/THREAD_NUM) * t; m < ((ARCHBIT + 1) / THREAD_NUM)*(t+1); m++)
{
if ((extent[k] >> m) & BIT)
{
for (i = 0, j = d_int_count_a * (k * (ARCHBIT + 1) + m); i < d_int_count_a; i++, j++)
temp[i * THREAD_NUM + t] &= d_context[j]; //将C'赋值给D(每次循环&一个对象,多次循环求所有object公共属性),这里仅计算四个对象的公共属性,存于临时变量数组d_temp的特定位置,下一个GPU函数将32/4,共8个数再次相与
}
}
}
}
void generate_from_node(unsigned long *intent, unsigned long *extent, int start_int, int start_bit,
int *starts, unsigned long **implied, unsigned long ***implied_stack)
//generate_from_node(intent, extent, 0, ARCHBIT, starts, implied, implied_stack);
// int start_int, int start_bit 二者组合在一起表示本次调用的起始属性,即算法中的y。start_int是y所在单元(一个unsigned long),start_bit是y的位序号
// int *starts 这是一个堆栈,存储下次调用要使用的start_int与start_bit。所用空间在本函数之外分配,本函数不改变其大小,会改变其内容。
//设属性个数为n,第一层调用最多需要n组(一组2个int),第二层调用最多需要n-1组,最后一层调用最多需要1组,总空间为n(n+1)/2组,即n(n+1)个int。
// implied存储指向算法中的Ny(即新生成且重复的内涵D)的地址。所用空间在本函数之外分配,只有一次调用所需空间,一个属性对应一个地址,共计n个单元。
//上层调用的地址信息保存在implied_stack中,在递归返回时通过implied_stack恢复
// implied_stack这是一个堆栈,用于递归返回时恢复implied。逻辑结构域大小同starts。共有n+...+1=n(n+1)/2组,每组2个单元;第一单元存储当前属性对应的implied单元 的地址,第二单元存储其值
{
int i, total;
unsigned long *new_extent, *new_intent, *new_intent_i, *new_intents_head;
unsigned long ***implied_stack_i = implied_stack;
int *start_i = starts;
total = start_int * (ARCHBIT + 1) + (ARCHBIT - start_bit);
// total为当前属性的序号(也表示前面已经处理了total个属性),属性是按照自身序号顺序在bit数组中从左至右存放
new_intent = new_intent_i = new_intents_head = (unsigned long *)malloc((BYTE_COUNT_A + BYTE_COUNT_O) * (attributes - total));
//分配未处理属性对应的概念所占用的空间,包括内涵与外延
new_extent = new_intent + int_count_a; // 每个概念外延的起始位置
cudaMemcpy(d_intent, intent, BYTE_COUNT_A + BYTE_COUNT_O, cudaMemcpyHostToDevice);
d_extent = d_intent + int_count_a;
d_new_extent = d_new_intent + int_count_a;
for (; start_int < int_count_a; start_int++)
{
int endflag = 0; //是否结束总循环的flag
for (; start_bit >= 0; start_bit--)
{ /* 两个for循环结合在一起,实现了对未处理属性的循环,即算法中的for循环 */
int newtestflag = 0; //FCBO新增的canoncity test是否失败的flag
int oldtestflag = 0; //CBO算法的canoncity test是否失败的flag
if (total >= attributes) /* 算法中的for循环终止。不能等到两层for循环自然结束,因为存储内涵的空间占用的bit数比属性个数多 */
{
endflag = 1;
break;
}
if (intent[start_int] & (BIT << start_bit)) /* 算法中的j属于B */
{
total++;
continue;
}
if (implied[total] != NULL) //FCBO新增的canoncity test part
{
// implied[total]指算法中的Ny,intent[start_int]指B,upto_bit[start_bit]用于屏蔽start_bit及start_bit右边的bit
//如果前者包含于后者,与后者取反再相与,结果应该为0。例如10000和11100,11100取反00011,再与10000,结果为0,结果不为0,说明前者不包含于后者
if (implied[total][start_int] & ~(intent[start_int]) & upto_bit[start_bit]) //仅最后一个需要用uptobit屏蔽某些位
{
stats.fail_fcbo++;
total++;
continue; //new canoncity test failed
}
for (i = 0; i < start_int; i++) //不需要uptobit屏蔽的属性组
{
if (implied[total][i] & ~(intent[i])) //有一个失败就继续下一个y
{
stats.fail_fcbo++;
total++;
newtestflag = 1; //new canoncity test failed
break;
}
}
if (newtestflag == 1)
continue;
}
memset(new_intent, 0xFF, BYTE_COUNT_A);
cudaMemset(d_new_intent, 0xFF, BYTE_COUNT_A);
cudaMemset(d_temp, 0xFF, LONG_SIZE * int_count_a * THREAD_NUM);
if (int_count_o > 5)
{
cudaMemcpy(d_attr_extent, cols[total], BYTE_COUNT_O, cudaMemcpyHostToDevice);
init_extent << <1, int_count_o >> > (d_new_extent, d_extent, d_attr_extent);
cudaDeviceSynchronize();
}
else
{
for(int i=0;i<int_count_o;i++)
new_extent[i] = extent[i] & cols[total][i];
cudaMemcpy(d_new_extent, new_extent, BYTE_COUNT_O, cudaMemcpyHostToDevice);
}
stats.closures++;
init_attr << <1, THREAD_NUM >> > (d_new_extent, d_temp);
cudaDeviceSynchronize();
kernel << <1, int_count_a >> >(d_new_intent, d_temp);
cudaDeviceSynchronize();
cudaMemcpy(new_intent, d_new_intent, BYTE_COUNT_A + BYTE_COUNT_O, cudaMemcpyDeviceToHost);
if ((new_intent[start_int] ^ intent[start_int]) & upto_bit[start_bit]) //CBO算法的原始canoncity test
{
stats.fail_canon++;
oldtestflag = 1; //CBO算法的原始canoncity test失败
}
if (oldtestflag == 0)
{
for (i = 0; i < start_int; i++) //start_int 前的属性组不需要uptobit屏蔽某些位,对这些组进行canoncity test
if (new_intent[i] ^ intent[i])
{
stats.fail_canon++;
oldtestflag = 1; //CBO算法的原始canoncity test失败
}
}
if (oldtestflag == 0) //当两个canoncity test均通过时(不需要判断newtestflag,因为其为1时会continue)
{
print_attributes(new_intent);
stats.total++;
*start_i = start_int; //向starts中存入start_int
start_i++;
*start_i = start_bit; //向starts中存入start_bit
start_i++;
}
if (oldtestflag == 1) //canoncity test失败时
{
*implied_stack_i = &(implied[total]); //地址
implied_stack_i++;
*implied_stack_i = (unsigned long **)implied[total]; //值
implied_stack_i++;
implied[total] = new_intent;
new_intent[int_count_a - 1] |= BIT; //最右边的bit是标志位,表示此概念是重复的,作为Ny
}
new_intent = new_extent + int_count_o;
new_extent = new_intent + int_count_a;
total++;
}
if (endflag == 1)
break;
start_bit = ARCHBIT; /* 新的一个属性单元的循环从最高位(最左端)开始 */
}
for (; new_intent_i != new_intent; new_intent_i = new_extent + int_count_o)
{
new_extent = new_intent_i + int_count_a;
if (new_intent_i[int_count_a - 1] & BIT) /* 是重复概念,不递归 */
continue;
if (*(starts + 1) == 0) /* 此时的下一个属性在下一个属性单元的最高位,即j+1由*starts+1与ARCHBIT的组合表示 */
generate_from_node(new_intent_i, new_extent, *starts + 1, ARCHBIT, start_i, implied, implied_stack_i);
else /* j+1由*starts与*(starts+1)-1的组合表示 */
generate_from_node(new_intent_i, new_extent, *starts, *(starts + 1) - 1, start_i, implied, implied_stack_i);
starts += 2;
}
for (; implied_stack != implied_stack_i; implied_stack += 2)
**implied_stack = (unsigned long *)*(implied_stack + 1); /* 把implied恢复到刚进入本次调用时的状态 */
free(new_intents_head);
return;
}
void init_cuda()
{
cudaMalloc((void**)&d_context_tmp, LONG_SIZE * int_count_a * objects);
cudaMemcpy(d_context_tmp, context, LONG_SIZE * int_count_a * objects, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_context, &d_context_tmp, sizeof(unsigned long*), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_int_count_o, &int_count_o, sizeof(int), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(d_int_count_a, &int_count_a, sizeof(int), 0, cudaMemcpyHostToDevice);
cudaMalloc((void**)&d_attr_extent, BYTE_COUNT_O);
cudaMalloc((void**)&d_intent, BYTE_COUNT_A + BYTE_COUNT_O);
cudaMalloc((void**)&d_new_intent, BYTE_COUNT_A + BYTE_COUNT_O);
cudaMalloc((void**)&d_temp, LONG_SIZE * int_count_a * THREAD_NUM);
}
void find_all_intents()
{
unsigned long *extent;
unsigned long *intent;
int *starts;
unsigned long **implied;
unsigned long ***implied_stack;
intent = (unsigned long *)malloc(BYTE_COUNT_A + BYTE_COUNT_O); // 分配一个概念占用的空间,包括内涵与外延
extent = intent + int_count_a; // 外延的起始位置
compute_closure(intent, extent, NULL, NULL); // 计算最大概念,对象集为全集,属性集为所有对象都具有的属性集合
print_attributes(intent);
stats.total++;
if (intent[int_count_a - 1] & BIT)
return;
starts = (int *)malloc(sizeof(int) * (attributes + 1) * attributes);
implied = (unsigned long **)malloc(sizeof(unsigned long *) * attributes);
memset(implied, 0, sizeof(unsigned long *) * attributes);
implied_stack = (unsigned long ***)malloc(sizeof(unsigned long **) * (attributes + 1) * attributes);
generate_from_node(intent, extent, 0, ARCHBIT, starts, implied, implied_stack);
cudaFree(d_new_intent);
cudaFree(d_attr_extent);
cudaFree(d_intent);
cudaFree(d_temp);
}
int main(int argc, char **argv)
{
in_file = stdin;
out_file = stdout;
if (argc > 1)
{
int index = 1;
for (; (index < argc && argv[index][0] == '-' && argv[index][1] != 0); index++)
{
switch (argv[index][1])
{
case 'S':
min_support = atoi(argv[index] + 2); //跳过‘-’和‘S’,取后面的数字
break;
case 'V':
verbosity_level = atoi(argv[index] + 2); //跳过‘-’和‘V’,取后面的数字
break;
case 'h':
fprintf(stderr, "synopsis: %s [-h] [-index] [-Smin-support] [-Vlevel] [INPUT-FILE] [OUTPUT-FILE]\n", argv[0]);
return 0;
default:
attr_offset = atoi(argv[index] + 1);
if (attr_offset < 0)
attr_offset = 0;
}
}
if ((argc > index) && (argv[index][0] != '-')) //确定输入文件
in_file = fopen(argv[index], "rb");
if ((argc > index + 1) && (argv[index + 1][0] != '-')) //确定输出文件
out_file = fopen(argv[index + 1], "wb");
}
if (!in_file)
{
fprintf(stderr, "%s: cannot open input data stream\n", argv[0]);
return 1;
}
if (!out_file)
{
fprintf(stderr, "%s: open output data stream\n", argv[0]);
return 2;
}
QueryPerformanceFrequency(&nFreq);
if (verbosity_level >= 3) //-V3以上时,计算程序执行时间
QueryPerformanceCounter(&time_start);
read_file(in_file);
create_context();
free(buff);
fclose(in_file);
if (verbosity_level >= 3)
QueryPerformanceCounter(&time_inner);
initialize_output();
sort_context();
initialize_algorithm();
init_cuda();
find_all_intents();
if (verbosity_level >= 3)
{
QueryPerformanceCounter(&time_end);
{
fprintf(stderr, "inner time: %f s\n", (time_end.QuadPart - time_inner.QuadPart) / (double)nFreq.QuadPart);
fprintf(stderr, "total time: %f s\n", (time_end.QuadPart - time_start.QuadPart) / (double)nFreq.QuadPart);
}
}
if (verbosity_level >= 2)
fprintf(stderr, "total: %i\nclosures: %i\nfail_canon: %i\nfail_fcbo: %i\n", stats.total, stats.closures, stats.fail_canon, stats.fail_fcbo);
fclose(out_file);
return 0;
}
设编译生成的文件为a.exe,执行方式为:
a input.dat output.dat,output.dat缺省时默认在控制台输出。