[C++11]std::atomic、boost::atomic、Interlocked三者的性能比较(benchmark)

原文作者:@玄冬Wong

好久没做过benchmark了,这次之所以想测试下,是怕std::atomic的效率没有windows的Interlocked性能好,测一下发现,性能差不多,Interlocked微弱的领先优势可以忽略不计。

先公布结果:三者的性能几乎相同,windows的Interlocked略好一点点

 

测试代码:

#ifdef _WIN64
#ifndef _DEBUG
#pragma comment(lib, "libboost_atomic-vc140-mt-1_60.lib")
#endif
#endif

//#include "stdafx.h"
#include <windows.h>
#include <iostream>
#include <atomic>
#include <boost/atomic.hpp> 
#include <time.h>  
#include <thread>
#include <list> 


#define MAX_THREADS 16  
#define LOOP_COUNT 10000000

volatile long g_CountWin = 0;
std::atomic<long> g_CountStd = 0;
boost::atomic_long g_CountBoost(0);

void Interlocked_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		InterlockedIncrement((LPLONG)&g_CountWin);
	}
}

void std_atomic_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		++g_CountStd;
	}
}

void boost_atomic_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		++g_CountBoost;
	}
}



void test_Interlocked()
{
	std::list<std::thread*> threadlist;

	//测试Interlocked
	printf("testing Interlocked...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&Interlocked_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountWin);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

void test_std_atomic()
{
	std::list<std::thread*> threadlist;

	//测试std::atomic
	printf("testing std::atomic...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&std_atomic_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountStd);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

void test_boost_atomic()
{
	std::list<std::thread*> threadlist;

	//测试boost::atomic
	printf("testing boost::atomic...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&boost_atomic_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountBoost);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

int main(char* args, int size)
{
	test_Interlocked();
	//test_std_atomic();
	//test_boost_atomic();
}

 

 

三种API的测试线程数都是16个并发线程,测试输出结果如下(跑了5次,取的平均值):

testing Interlocked...

result:160000000

cost:4926ms

 

testing std::atomic...

result:160000000

cost:4952ms

 

testing boost::atomic...

result:160000000 

cost:4949ms

 

测试环境:

boost 1.60

windows 10 pro x64

VS2015企业版 update2,release x64

CPU:i7二代移动版

 

 

猜你喜欢

转载自aigo.iteye.com/blog/2291402