[C++11]std::atomic、boost::atomic、Interlocked三者的性能比较(benchmark)

原文作者：@玄冬Wong

好久没做过benchmark了，这次之所以想测试下，是怕std::atomic的效率没有windows的Interlocked性能好，测一下发现，性能差不多，Interlocked微弱的领先优势可以忽略不计。

先公布结果：三者的性能几乎相同，windows的Interlocked略好一点点。

测试代码：

#ifdef _WIN64
#ifndef _DEBUG
#pragma comment(lib, "libboost_atomic-vc140-mt-1_60.lib")
#endif
#endif

//#include "stdafx.h"
#include <windows.h>
#include <iostream>
#include <atomic>
#include <boost/atomic.hpp> 
#include <time.h>  
#include <thread>
#include <list> 


#define MAX_THREADS 16  
#define LOOP_COUNT 10000000

volatile long g_CountWin = 0;
std::atomic<long> g_CountStd = 0;
boost::atomic_long g_CountBoost(0);

void Interlocked_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		InterlockedIncrement((LPLONG)&g_CountWin);
	}
}

void std_atomic_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		++g_CountStd;
	}
}

void boost_atomic_fun()
{
	for (int i = 0; i < LOOP_COUNT; i++)
	{
		++g_CountBoost;
	}
}



void test_Interlocked()
{
	std::list<std::thread*> threadlist;

	//测试Interlocked
	printf("testing Interlocked...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&Interlocked_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountWin);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

void test_std_atomic()
{
	std::list<std::thread*> threadlist;

	//测试std::atomic
	printf("testing std::atomic...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&std_atomic_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountStd);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

void test_boost_atomic()
{
	std::list<std::thread*> threadlist;

	//测试boost::atomic
	printf("testing boost::atomic...\n");
	clock_t start = clock();
	for (int i = 0; i < MAX_THREADS; ++i)
	{
		std::thread *t1 = new std::thread((&boost_atomic_fun));
		threadlist.push_back(t1);
	}
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		(*i)->join();
	}
	clock_t finish = clock();
	printf("result:%d\n", g_CountBoost);
	printf("cost:%dms\n", finish - start);
	for (std::list<std::thread*>::const_iterator i = threadlist.begin(); i != threadlist.end(); i++)
	{
		delete(*i);
	}
}

int main(char* args, int size)
{
	test_Interlocked();
	//test_std_atomic();
	//test_boost_atomic();
}

三种API的测试线程数都是16个并发线程，测试输出结果如下（跑了5次，取的平均值）：

testing Interlocked...

result:160000000

cost:4926ms

testing std::atomic...

result:160000000

cost:4952ms

testing boost::atomic...

result:160000000

cost:4949ms

测试环境：

boost 1.60

windows 10 pro x64

VS2015企业版 update2，release x64

CPU：i7二代移动版

[C++11]std::atomic、boost::atomic、Interlocked三者的性能比较(benchmark)

猜你喜欢