基于CUDA的FCBO算法的并行实现(概念格的并行构造)

将原FCBO算法计算新的形式概念的函数,即Compute Closure函数并行化。但是由于CPU与GPU的数据交换开销太大,并行化没有带来加速效果。

fcbo.h文件

//fcbo.h文件,包括串行fcbo算法所需的函数与变量
//在Visual Studio 2015编译通过

#pragma once
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>

#define BIT ((unsigned long)1) //unsigned long类型的1
#define NULL_LONG ((unsigned long)0) //unsigned long类型的0
#define INT_SIZE (sizeof(int))
#define LONG_SIZE (sizeof(unsigned long))
#define ARCHBIT ((LONG_SIZE * 8) - 1)          // 无符号长整形换成二进制位后的最高位index
#define BYTE_COUNT_A (LONG_SIZE * int_count_a) // 一个object的所有属性对应的长整型数占用的字节数
#define BYTE_COUNT_O (LONG_SIZE * int_count_o) //一个属性的所有object对应的长整型数占用的字节数
#define BUFFER_BLOCK 1024

int attributes = 0; // 属性个数 
int objects = 0;    // 对象个数
int int_count_a = 0;   //属性数对应多少个无符号长整型数
int int_count_o = 0;   //对象数对应多少个无符号长整型数
int table_entries = 0;  //形式背景中的X的个数
int min_support = 0;  //阈值
unsigned long *context;
unsigned long **cols;
int *supps;   //具有某一属性的对象个数,supps[5]=10代表有10个对象具有属性5
int *attrib_numbers; //attrib_numbers[i]=i
unsigned long upto_bit[ARCHBIT + 1];
int attr_offset = 0; //属性偏移量,默认为0,若属性为1-xxxx,则偏移量为1
FILE *in_file; //输入文件
FILE *out_file; //输出文件

int verbosity_level = 3;
struct stats_t
{
	int total;
	int closures;
	int fail_canon;
	int fail_fcbo;
	int fail_support;
} stats = { 0, 0, 0, 0, 0 };
LARGE_INTEGER nFreq;
LARGE_INTEGER time_start, time_inner, time_end;
int *buff = NULL;
int buff_index = 0;
size_t buff_size = BUFFER_BLOCK;

int get_next_integer(FILE *file, int *value)
{
	int ch = ' ';
	*value = -1;
	//未到文件尾时,跳过非数字字符,当读到换行符时返回1
	while ((ch != EOF) && ((ch < '0') || (ch > '9')))
	{
		ch = fgetc(file);
		if (ch == '\n')
			return 1;
	}
	//文件尾 返回0
	if (ch == EOF)
		return 0;
	*value = 0;
	//读取一个数字(可能是n位数)
	while ((ch >= '0') && (ch <= '9'))
	{
		*value *= 10;
		*value += ch - '0';
		ch = fgetc(file);
	}
	ungetc(ch, file);
	*value -= attr_offset;
	return 1;
}

void allocate_buffer(int **buffer, int size)
{
	if (*buffer)
		*buffer = (int *)realloc(*buffer, INT_SIZE * size);
	else
		*buffer = (int *)malloc(INT_SIZE * size);
	if (!*buffer)
	{
		fprintf(stderr, "Cannot reallocate buffer, quitting.");
		exit(3);
	}
}

#define PUSH_NEW_INTEGER(__value)        \
  {                                      \
    if (buff_index >= buff_size)         \
    {                                    \
      buff_size += BUFFER_BLOCK;         \
      allocate_buffer(&buff, buff_size); \
    }                                    \
    buff[buff_index] = (__value);        \
    buff_index++;                        \
  }
//read_file结束后,buff数组用-1分割两行,某行的数据代表第x个object具有哪些属性
void read_file(FILE *file)
{
	int last_value = -1, value = 0, last_attribute = -1, last_object = -1;
	allocate_buffer(&buff, buff_size);     //buff是一维数组,初始大小1024
	while (get_next_integer(file, &value)) //取所有整数,每一行用-1分隔
	{
		if ((value < 0) && (last_value < 0))  //一行到下一行的过渡,last_value此时为-1
			continue;
		if (value < 0)   //一行结束
		{
			last_object++;
			PUSH_NEW_INTEGER(-1);
		}
		else
		{
			if (value > last_attribute)
				last_attribute = value;
			PUSH_NEW_INTEGER(value);
		}
		last_value = value;
	}
	if (last_value >= 0)     //当文末没有换行符,最后一个数字直接接EOF时,运行该段代码
	{
		last_object++;
		PUSH_NEW_INTEGER(-1);
	}
	objects = last_object + 1;
	attributes = last_attribute + 1;
}

void create_context()
{
	int i = 0, row = 0;
	int_count_a = (attributes / (ARCHBIT + 1)) + 1;   //用1bit代表一条属性时,多少个长整数可以代表所有属性
	int_count_o = (objects / (ARCHBIT + 1)) + 1;      //用1bit代表一个对象时,多少个长整数可以代表所有对象
	context = (unsigned long *)malloc(LONG_SIZE * int_count_a * objects);
	if (!context)
	{
		fprintf(stderr, "Cannot allocate bitcontext, quitting.");
		exit(5);
	}
	memset(context, 0, LONG_SIZE * int_count_a * objects);
	supps = (int *)malloc(sizeof(int) * attributes);
	memset(supps, 0, sizeof(int) * attributes);
	for (i = 0; i < buff_index; i++)
	{
		if (buff[i] < 0)  //每个-1标志一行数据
		{
			row++;
			continue;
		}
		context[row * int_count_a + (buff[i] / (ARCHBIT + 1))] |= (BIT << (ARCHBIT - (buff[i] % (ARCHBIT + 1))));
		//用二进制表示形式背景,例如,某一行0 4 6,可表示为context[0] = ...1010001
		//context[0]到context[int_count_a]表示object0的所有属性,context[int_count_a]-context[2*int_count_a-1]表示object1的所有属性
		supps[buff[i]]++;
		table_entries++;
	}
	if (verbosity_level >= 2)
		fprintf(stderr, "objects: %6i\nattributes: %4i\nentries: %8i\n", objects, attributes, table_entries);
}

void initialize_output()
{
	int i;
	attrib_numbers = (int *)malloc(sizeof(int) * attributes);
	for (i = 0; i < attributes; i++)
		attrib_numbers[i] = i;
}

void print_attributes(unsigned long *set)
{
	int i, j, c;
	int first = 1;
	if (verbosity_level <= 0)
		return;
	for (c = j = 0; j < int_count_a; j++)
	{
		for (i = ARCHBIT; i >= 0; i--)
		{
			if (set[j] & (BIT << i))
			{
				if (!first)
					fprintf(out_file, " ");
				fprintf(out_file, "%i", attrib_numbers[c]);
				first = 0;
			}
			c++;
			if (c >= attributes)
				goto out;
		}
	}
out:
	fprintf(out_file, "\n");
}
int cols_compar(const void *a, const void *b)    //如果具有a属性的对象个数比具有b属性的对象个数多,则将a排在左边
{
	int x, y;
	x = supps[*(int const *)a];
	y = supps[*(int const *)b];
	return (x < y) ? -1 : ((x > y) ? 1 : 0);
}

int rows_compar(const void *a, const void *b)
{
	int i;
	for (i = 0; i < int_count_a; i++)
		if (((unsigned long *)a)[i] < ((unsigned long *)b)[i])
			return -1;
		else if (((unsigned long *)a)[i] > ((unsigned long *)b)[i])
			return 1;
	return 0;
}

void sort_context()
{
	int i, j, k, x, y, z, ii, jj, a, aa;
	unsigned long *new_context;
	qsort(attrib_numbers, attributes, sizeof(int), cols_compar); //按属性对应的对象数降序排列属性
	qsort(context, objects, BYTE_COUNT_A, rows_compar);   //具有更多属性的object排在前面
}
void initialize_algorithm()  //和context数组内容类似,但内容是object的0和1,且结构为指向指针的指针
{
	int i, j, k, x, y;
	unsigned long *ptr, mask, *cols_buff;
	for (i = 0; i <= ARCHBIT; i++)
	{
		upto_bit[i] = NULL_LONG;
		for (j = ARCHBIT; j > i; j--)
			upto_bit[i] |= (BIT << j);  //upto_bit[i]元素的二进制位中,从右向左有(i+1)个0,其他全为1
	}
	cols_buff = (unsigned long *)malloc(LONG_SIZE * int_count_o * attributes);   //一维数组,int_count_a个数表示一条属性对应的所有对象,转为二进制后0,1表示对象是否具有该属性
	memset(cols_buff, 0, LONG_SIZE * int_count_o * attributes);
	cols = (unsigned long **)malloc(sizeof(unsigned long *) * attributes);       //attributes个指针,每个指针指向一个一个无符号整数数组
	ptr = cols_buff;
	//cols[0]到cols[int_count_o]表示属性0的所有对象,cols[int_count_o]-context[2*int_count_o-1]表示属性1的所有对象
	for (k = j = 0; j < int_count_a; j++)        //j用来索引某对象的所有属性
		for (i = ARCHBIT; i >= 0; i--, k++)        //i索引位的标记
		{
			if (k >= attributes)
				return;
			mask = (BIT << i);
			cols[k] = ptr;
			for (x = 0, y = j; x < objects; x++, y += int_count_a)  //x是object的索引,遍历每个object的第(archbit-i)属性
				if (context[y] & mask)         //代表object x具有attribute_numbers[ARCHBIT-i]
					ptr[x / (ARCHBIT + 1)] |= BIT << (x % (ARCHBIT + 1));
			ptr += int_count_o;					 //处理下一个属性
		}										//处理下一个无符号长整数
}

void compute_closure(unsigned long *intent, unsigned long *extent, unsigned long *prev_extent, unsigned long *attr_extent)
{
	int i, j, k, l;
	stats.closures++;
	memset(intent, 0xFF, BYTE_COUNT_A);
	if (attr_extent)
	{
		for (k = 0; k < int_count_o; k++)
		{
			extent[k] = prev_extent[k] & attr_extent[k];		 //即算法中把A和{j}'的交集赋值给C,'代表闭包推导符
			if (extent[k])
				for (l = 0; l <= ARCHBIT; l++)
				{
					if (extent[k] >> l)			//属性没处理完,extent中还有1
					{
						if ((extent[k] >> l) & BIT)
						{
							for (i = 0, j = int_count_a * (k * (ARCHBIT + 1) + l); i < int_count_a; i++, j++)
								intent[i] &= context[j];	//将C'赋值给D(每次循环&一个对象,多次循环求所有object公共属性)
						}
					}
					else
						break;
				}
		}
	}
	else
	{
		memset(extent, 0xFF, BYTE_COUNT_O);
		for (j = 0; j < objects; j++)			//与所有object相与,求出公共属性
		{
			for (i = 0; i < int_count_a; i++)
				intent[i] &= context[int_count_a * j + i];	//intent包含所有属性,概念没有的用0表示,有的用1表示
		}
	}
}

kernel.cu文件

//包括GPU运行的3个核函数,GPU端变量的声明与初始化
//对fcbo算法的改动主要在generate_from_node函数中

#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "fcbo.h"

#define THREAD_NUM 8           //线程数

__device__ unsigned long *d_context;   //设备端形式背景
__constant__ int d_int_count_o;        //设备端对象数对应的无符号整数个数
__constant__ int d_int_count_a;        //设备端属性数对应的无符号整数个数
unsigned long *d_context_tmp;          //用来初始化GPU变量的临时变量
unsigned long *d_intent, *d_extent;    //设备端外延与内涵
unsigned long *d_new_extent, *d_new_intent; //设备端函数参数中的新外延与新内涵
unsigned long *d_attr_extent;
unsigned long *d_temp;                 //存放GPU多线程计算的临时结果

__global__ void kernel(unsigned long *intent, unsigned long *temp)
{
	int i = threadIdx.x;
	for (int j = 0; j < THREAD_NUM; j++)
	{
		intent[i] &= temp[i * THREAD_NUM + j];   //每个线程计算一个无符号整数里8个32位数据相与的结果
	}
}
__global__ void init_extent(unsigned long *extent, unsigned long *prev_extent, unsigned long *attr_extent)
{
	int k = threadIdx.x;
	extent[k] = prev_extent[k] & attr_extent[k];		 //即算法中把A和{j}'的交集赋值给C,'代表闭包推导符,每个线程处理一个无符号整数
}
__global__ void init_attr(unsigned long *extent, unsigned long *temp)
{
	int i, j, k, m;
	int t = threadIdx.x;
	for (k = 0; k < d_int_count_o; k++)
	{
		for (m = ((ARCHBIT+1)/THREAD_NUM) * t; m < ((ARCHBIT + 1) / THREAD_NUM)*(t+1); m++)
		{
			if ((extent[k] >> m) & BIT)
			{
				for (i = 0, j = d_int_count_a * (k * (ARCHBIT + 1) + m); i < d_int_count_a; i++, j++)
					temp[i * THREAD_NUM + t] &= d_context[j];	//将C'赋值给D(每次循环&一个对象,多次循环求所有object公共属性),这里仅计算四个对象的公共属性,存于临时变量数组d_temp的特定位置,下一个GPU函数将32/4,共8个数再次相与
			}
		}
	}
	
}

void generate_from_node(unsigned long *intent, unsigned long *extent, int start_int, int start_bit,
	int *starts, unsigned long **implied, unsigned long ***implied_stack)
	//generate_from_node(intent, extent, 0, ARCHBIT, starts, implied, implied_stack);
	// int start_int, int start_bit 二者组合在一起表示本次调用的起始属性,即算法中的y。start_int是y所在单元(一个unsigned long),start_bit是y的位序号
	// int *starts 这是一个堆栈,存储下次调用要使用的start_int与start_bit。所用空间在本函数之外分配,本函数不改变其大小,会改变其内容。
	//设属性个数为n,第一层调用最多需要n组(一组2个int),第二层调用最多需要n-1组,最后一层调用最多需要1组,总空间为n(n+1)/2组,即n(n+1)个int。 
	// implied存储指向算法中的Ny(即新生成且重复的内涵D)的地址。所用空间在本函数之外分配,只有一次调用所需空间,一个属性对应一个地址,共计n个单元。
	//上层调用的地址信息保存在implied_stack中,在递归返回时通过implied_stack恢复 
	// implied_stack这是一个堆栈,用于递归返回时恢复implied。逻辑结构域大小同starts。共有n+...+1=n(n+1)/2组,每组2个单元;第一单元存储当前属性对应的implied单元 的地址,第二单元存储其值
{
	int i, total;
	unsigned long *new_extent, *new_intent, *new_intent_i, *new_intents_head;
	unsigned long ***implied_stack_i = implied_stack;
	int *start_i = starts;
	total = start_int * (ARCHBIT + 1) + (ARCHBIT - start_bit);
	// total为当前属性的序号(也表示前面已经处理了total个属性),属性是按照自身序号顺序在bit数组中从左至右存放
	new_intent = new_intent_i = new_intents_head = (unsigned long *)malloc((BYTE_COUNT_A + BYTE_COUNT_O) * (attributes - total));
	//分配未处理属性对应的概念所占用的空间,包括内涵与外延
	new_extent = new_intent + int_count_a; // 每个概念外延的起始位置
	cudaMemcpy(d_intent, intent, BYTE_COUNT_A + BYTE_COUNT_O, cudaMemcpyHostToDevice);
	d_extent = d_intent + int_count_a;
	d_new_extent = d_new_intent + int_count_a;
	for (; start_int < int_count_a; start_int++)
	{
		int endflag = 0;        //是否结束总循环的flag
		for (; start_bit >= 0; start_bit--)
		{                          /* 两个for循环结合在一起,实现了对未处理属性的循环,即算法中的for循环 */
			int newtestflag = 0;    //FCBO新增的canoncity test是否失败的flag
			int oldtestflag = 0;    //CBO算法的canoncity test是否失败的flag
			if (total >= attributes) /* 算法中的for循环终止。不能等到两层for循环自然结束,因为存储内涵的空间占用的bit数比属性个数多 */
			{
				endflag = 1;
				break;
			}
			if (intent[start_int] & (BIT << start_bit)) /* 算法中的j属于B */
			{
				total++;
				continue;
			}
			if (implied[total] != NULL)     //FCBO新增的canoncity test part
			{
				// implied[total]指算法中的Ny,intent[start_int]指B,upto_bit[start_bit]用于屏蔽start_bit及start_bit右边的bit
				//如果前者包含于后者,与后者取反再相与,结果应该为0。例如10000和11100,11100取反00011,再与10000,结果为0,结果不为0,说明前者不包含于后者
				if (implied[total][start_int] & ~(intent[start_int]) & upto_bit[start_bit])  //仅最后一个需要用uptobit屏蔽某些位
				{
					stats.fail_fcbo++;
					total++;
					continue;               //new canoncity test failed
				}
				for (i = 0; i < start_int; i++)          //不需要uptobit屏蔽的属性组
				{
					if (implied[total][i] & ~(intent[i]))       //有一个失败就继续下一个y
					{
						stats.fail_fcbo++;
						total++;
						newtestflag = 1;      //new canoncity test failed
						break;
					}
				}
				if (newtestflag == 1)
					continue;
			}
			memset(new_intent, 0xFF, BYTE_COUNT_A);
			cudaMemset(d_new_intent, 0xFF, BYTE_COUNT_A);
			cudaMemset(d_temp, 0xFF, LONG_SIZE * int_count_a * THREAD_NUM);

			if (int_count_o > 5)
			{
				cudaMemcpy(d_attr_extent, cols[total], BYTE_COUNT_O, cudaMemcpyHostToDevice);
				init_extent << <1, int_count_o >> > (d_new_extent, d_extent, d_attr_extent);
				cudaDeviceSynchronize();
			}
			else
			{
				for(int i=0;i<int_count_o;i++)
					new_extent[i] = extent[i] & cols[total][i];
				cudaMemcpy(d_new_extent, new_extent, BYTE_COUNT_O, cudaMemcpyHostToDevice);
			}
			stats.closures++;

			init_attr << <1, THREAD_NUM >> > (d_new_extent, d_temp);
			cudaDeviceSynchronize();

			kernel << <1, int_count_a >> >(d_new_intent, d_temp);
			cudaDeviceSynchronize();

			cudaMemcpy(new_intent, d_new_intent, BYTE_COUNT_A + BYTE_COUNT_O, cudaMemcpyDeviceToHost);
			if ((new_intent[start_int] ^ intent[start_int]) & upto_bit[start_bit])   //CBO算法的原始canoncity test
			{
				stats.fail_canon++;
				oldtestflag = 1;				//CBO算法的原始canoncity test失败
			}
			if (oldtestflag == 0)
			{
				for (i = 0; i < start_int; i++)        //start_int 前的属性组不需要uptobit屏蔽某些位,对这些组进行canoncity test
					if (new_intent[i] ^ intent[i])
					{
						stats.fail_canon++;
						oldtestflag = 1;         //CBO算法的原始canoncity test失败
					}
			}
			if (oldtestflag == 0)        //当两个canoncity test均通过时(不需要判断newtestflag,因为其为1时会continue)
			{
				print_attributes(new_intent);
				stats.total++;
				*start_i = start_int;       //向starts中存入start_int
				start_i++;
				*start_i = start_bit;		//向starts中存入start_bit
				start_i++;
			}
			if (oldtestflag == 1)       //canoncity test失败时
			{
				*implied_stack_i = &(implied[total]);    //地址
				implied_stack_i++;
				*implied_stack_i = (unsigned long **)implied[total];   //值
				implied_stack_i++;
				implied[total] = new_intent;
				new_intent[int_count_a - 1] |= BIT; //最右边的bit是标志位,表示此概念是重复的,作为Ny
			}
			new_intent = new_extent + int_count_o;
			new_extent = new_intent + int_count_a;
			total++;
		}
		if (endflag == 1)
			break;
		start_bit = ARCHBIT; /* 新的一个属性单元的循环从最高位(最左端)开始 */
	}
	for (; new_intent_i != new_intent; new_intent_i = new_extent + int_count_o)
	{
		new_extent = new_intent_i + int_count_a;
		if (new_intent_i[int_count_a - 1] & BIT) /* 是重复概念,不递归 */
			continue;
		if (*(starts + 1) == 0) /* 此时的下一个属性在下一个属性单元的最高位,即j+1由*starts+1与ARCHBIT的组合表示 */
			generate_from_node(new_intent_i, new_extent, *starts + 1, ARCHBIT, start_i, implied, implied_stack_i);
		else /* j+1由*starts与*(starts+1)-1的组合表示 */
			generate_from_node(new_intent_i, new_extent, *starts, *(starts + 1) - 1, start_i, implied, implied_stack_i);
		starts += 2;
	}
	for (; implied_stack != implied_stack_i; implied_stack += 2)
		**implied_stack = (unsigned long *)*(implied_stack + 1); /* 把implied恢复到刚进入本次调用时的状态 */
	free(new_intents_head);
	return;
}

void init_cuda()
{
	cudaMalloc((void**)&d_context_tmp, LONG_SIZE * int_count_a * objects);
	cudaMemcpy(d_context_tmp, context, LONG_SIZE * int_count_a * objects, cudaMemcpyHostToDevice);
	cudaMemcpyToSymbol(d_context, &d_context_tmp, sizeof(unsigned long*), 0, cudaMemcpyHostToDevice);
	cudaMemcpyToSymbol(d_int_count_o, &int_count_o, sizeof(int), 0, cudaMemcpyHostToDevice);
	cudaMemcpyToSymbol(d_int_count_a, &int_count_a, sizeof(int), 0, cudaMemcpyHostToDevice);
	cudaMalloc((void**)&d_attr_extent, BYTE_COUNT_O);
	cudaMalloc((void**)&d_intent, BYTE_COUNT_A + BYTE_COUNT_O);
	cudaMalloc((void**)&d_new_intent, BYTE_COUNT_A + BYTE_COUNT_O);
	cudaMalloc((void**)&d_temp, LONG_SIZE * int_count_a * THREAD_NUM);
}

void find_all_intents()
{
	unsigned long *extent;
	unsigned long *intent;
	int *starts;
	unsigned long **implied;
	unsigned long ***implied_stack;
	intent = (unsigned long *)malloc(BYTE_COUNT_A + BYTE_COUNT_O); // 分配一个概念占用的空间,包括内涵与外延
	extent = intent + int_count_a;                                 // 外延的起始位置
	compute_closure(intent, extent, NULL, NULL);                   // 计算最大概念,对象集为全集,属性集为所有对象都具有的属性集合
	print_attributes(intent);
	stats.total++;
	if (intent[int_count_a - 1] & BIT)
		return;
	starts = (int *)malloc(sizeof(int) * (attributes + 1) * attributes);
	implied = (unsigned long **)malloc(sizeof(unsigned long *) * attributes);
	memset(implied, 0, sizeof(unsigned long *) * attributes);
	implied_stack = (unsigned long ***)malloc(sizeof(unsigned long **) * (attributes + 1) * attributes);
	generate_from_node(intent, extent, 0, ARCHBIT, starts, implied, implied_stack);
	cudaFree(d_new_intent);
	cudaFree(d_attr_extent);
	cudaFree(d_intent);
	cudaFree(d_temp);
}

int main(int argc, char **argv)
{
	in_file = stdin;
	out_file = stdout;
	if (argc > 1)
	{
		int index = 1;
		for (; (index < argc && argv[index][0] == '-' && argv[index][1] != 0); index++)
		{
			switch (argv[index][1])
			{
			case 'S':
				min_support = atoi(argv[index] + 2); //跳过‘-’和‘S’,取后面的数字
				break;
			case 'V':
				verbosity_level = atoi(argv[index] + 2); //跳过‘-’和‘V’,取后面的数字
				break;
			case 'h':
				fprintf(stderr, "synopsis: %s [-h] [-index] [-Smin-support] [-Vlevel] [INPUT-FILE] [OUTPUT-FILE]\n", argv[0]);
				return 0;
			default:
				attr_offset = atoi(argv[index] + 1);
				if (attr_offset < 0)
					attr_offset = 0;
			}
		}
		if ((argc > index) && (argv[index][0] != '-')) //确定输入文件
			in_file = fopen(argv[index], "rb");
		if ((argc > index + 1) && (argv[index + 1][0] != '-')) //确定输出文件
			out_file = fopen(argv[index + 1], "wb");
	}
	if (!in_file)
	{
		fprintf(stderr, "%s: cannot open input data stream\n", argv[0]);
		return 1;
	}
	if (!out_file)
	{
		fprintf(stderr, "%s: open output data stream\n", argv[0]);
		return 2;
	}
	QueryPerformanceFrequency(&nFreq);
	if (verbosity_level >= 3) //-V3以上时,计算程序执行时间
		QueryPerformanceCounter(&time_start);
	read_file(in_file);
	create_context();
	free(buff);
	fclose(in_file);
	if (verbosity_level >= 3)
		QueryPerformanceCounter(&time_inner);
	initialize_output();
	sort_context();
	initialize_algorithm();
	init_cuda();
	find_all_intents();
	if (verbosity_level >= 3)
	{
		QueryPerformanceCounter(&time_end);
		{
			fprintf(stderr, "inner time: %f s\n", (time_end.QuadPart - time_inner.QuadPart) / (double)nFreq.QuadPart);
			fprintf(stderr, "total time: %f s\n", (time_end.QuadPart - time_start.QuadPart) / (double)nFreq.QuadPart);
		}
	}
	if (verbosity_level >= 2)
		fprintf(stderr, "total: %i\nclosures: %i\nfail_canon: %i\nfail_fcbo: %i\n", stats.total, stats.closures, stats.fail_canon, stats.fail_fcbo);
	fclose(out_file);
	return 0;
}

设编译生成的文件为a.exe,执行方式为:

a input.dat output.dat,output.dat缺省时默认在控制台输出。

猜你喜欢

转载自blog.csdn.net/liuqi_67676767/article/details/106673852
今日推荐