map和hash_map都是C++里面提供的关联容器，它们都支持高性能的插入、删除、查找操作。map内部是基于红黑树来实现的，而hash_map是基于线性同余哈希+开链解决冲突来实现的。
注意，hash_map并未纳入C++标准之中，因此不同厂商的STL库的hash_map的接口、性能保障可能会有出入。

在C++11中，hash_map被正式纳入到STL里面，但是换了个名字：unordered_map，取这个名字是因为hash_map不能提供元素的排序功能，而基于红黑树的map是可以通过不断地摘除最小元素来进行排序。

那么map和hash_map之间性能相差怎么样呢？我们来模拟一个场景：执行n次的随机插入、删除、find操作，统计他们的时长和内存使用情况。代码如下：

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <map>
#include <utility>
#include <unordered_map>
#include <windows.h>
#include <Psapi.h>

#pragma comment(lib,"Psapi.lib")
using namespace std;
template <typename Map,typename Key,typename Value>
void insert(Map & map_obj, Key& k,Value & v)
{
	map_obj.insert(pair<Key,Value>(k,v));
}

template <typename Map,typename Key>
void erase(Map & map_obj, Key & key)
{
	map_obj.erase(key);
}

template <typename Map,typename Key>
void find(Map &map_obj, Key & k)
{
	map_obj.find(k);
}

int main()
{
	map<int, int> rb_maps;
	unordered_map<int, int> hash_maps;
	int n = 1e5;
	void* handle = GetCurrentProcess();
	PROCESS_MEMORY_COUNTERS pmc;
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int base_work_size = pmc.WorkingSetSize;
	srand(time(0));
	//rb_maps
	int start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = rand() % n;
		int value = rand() % n;
		switch (rand()%3)
		{
		case 0:
			insert(rb_maps, key,value);
			break;
		case 1:
			erase(rb_maps, key);
			break;
		case 2:
			find(rb_maps, key);
			break;
		}
	}
	int end = clock();
	printf("rb_tree based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int rb_work_size = pmc.WorkingSetSize-base_work_size;
	printf("memory used: %d bytes\n", rb_work_size);

	//hash_maps
	start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = rand() % n;
		int value = rand() % n;
		switch (rand() % 3)
		{
		case 0:
			insert(hash_maps, key, value);
			break;
		case 1:
			erase(hash_maps, key);
			break;
		case 2:
			find(hash_maps, key);
			break;
		}
	}
	end = clock();
	printf("hash_table based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));
	int hash_work_size = pmc.WorkingSetSize-rb_work_size-base_work_size;
	printf("memory used: %d bytes\n", hash_work_size);
	system("pause");
}

以下列出了当n取不同值时候的差异：

插入、删除、查找三种操作混合

n	map(时间)	hash_map(时间)	map(内存)	hash_map(内存)
$10^5$	1.028s	0.8s	1，351，680 bytes	1，265，664 bytes
$10^6$	11.439s	7.893s	1，564，672bytes	1，695，744bytes
$10^7$	115.0739s	77.593s	1，515，520bytes	1，691，648bytes

可以看出，在时间上，hash_map优于map；
但是hash_map却比map更耗空间。

对比插入操作

n	map(时间)	hash_map(时间)	map(内存)	hash_map(内存)
$10^5$	1.086s	1.276s	2，850，816 bytes	2，764，800 bytes
$10^6$	11.208s	10.06s	2，985，984bytes	2，887，680bytes
$10^7$	110.33s	78.3s	2，945，024bytes	2，887，680bytes

可以看出，如果只是插入操作，在时间上hash_map优于map。
而且是hash_map依然比map更耗空间。
但是可以看到一个很有意思的地方：n= $10^6$ 和 n = $10^7$ ，hash_map使用的空间的相同的。

对比查找操作

先插入，后查找。

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <map>
#include <utility>
#include <unordered_map>
#include <windows.h>
#include <Psapi.h>

#pragma comment(lib,"Psapi.lib")
using namespace std;
template <typename Map,typename Key,typename Value>
void insert(Map & map_obj, Key& k,Value & v)
{
	map_obj.insert(pair<Key,Value>(k,v));
}

template <typename Map,typename Key>
void erase(Map & map_obj, Key & key)
{
	map_obj.erase(key);
}

template <typename Map,typename Key>
void find(Map &map_obj, Key & k)
{
	map_obj.find(k);
}

int main()
{
	map<int, int> rb_maps;
	unordered_map<int, int> hash_maps;

	int n = 1e7;
	for (int i = 0; i < n; i++)
	{
		int key = rand() % n;
		int value = rand() % n;
		insert(rb_maps, key, value);
		insert(hash_maps, key, value);
	}

	void* handle = GetCurrentProcess();
	PROCESS_MEMORY_COUNTERS pmc;
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int base_work_size = pmc.WorkingSetSize;
	srand(time(0));
	//rb_maps

	int start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = rand() % n;
		int value = rand() % n;
		switch (2)
		{
		case 0:
			insert(rb_maps, key,value);
			break;
		case 1:
			erase(rb_maps, key);
			break;
		case 2:
			find(rb_maps, key);
			break;
		}
	}
	int end = clock();
	printf("rb_tree based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int rb_work_size = pmc.WorkingSetSize-base_work_size;
	printf("memory used: %d bytes\n", rb_work_size);

	//hash_maps
	start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = rand() % n;
		int value = rand() % n;
		switch (2)
		{
		case 0:
			insert(hash_maps, key, value);
			break;
		case 1:
			erase(hash_maps, key);
			break;
		case 2:
			find(hash_maps, key);
			break;
		}
	}
	end = clock();
	printf("hash_table based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));
	int hash_work_size = pmc.WorkingSetSize-rb_work_size-base_work_size;
	printf("memory used: %d bytes\n", hash_work_size);
	system("pause");
}

结果：

n	map(时间)	hash_map(时间)	map(内存)	hash_map(内存)
$10^5$	0.746s	0.209s	102，400 bytes	4,096 bytes
$10^6$	7.183s	2.210s	102，400bytes	4，096bytes
$10^7$	72.33s	23.3s	2，945，024bytes	2，887，680bytes

key不随机

上面都还只是key是很随机的情况，如果key是递增或递减或怎么样呢？我们还是考察n=1e5,1e6时插入、删除、查找操作一起来。
实验代码：

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <map>
#include <utility>
#include <unordered_map>
#include <windows.h>
#include <Psapi.h>

#pragma comment(lib,"Psapi.lib")
using namespace std;
template <typename Map,typename Key,typename Value>
void insert(Map & map_obj, Key& k,Value & v)
{
	map_obj.insert(pair<Key,Value>(k,v));
}

template <typename Map,typename Key>
void erase(Map & map_obj, Key & key)
{
	map_obj.erase(key);
}

template <typename Map,typename Key>
void find(Map &map_obj, Key & k)
{
	map_obj.find(k);
}

int main()
{
	map<int, int> rb_maps;
	unordered_map<int, int> hash_maps;

	int n = 1e6;

	void* handle = GetCurrentProcess();
	PROCESS_MEMORY_COUNTERS pmc;
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int base_work_size = pmc.WorkingSetSize;
	srand(time(0));
	//rb_maps

	int start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = i;
		int value = rand() % n;
		switch (rand()%3)
		{
		case 0:
			insert(rb_maps, key,value);
			break;
		case 1:
			erase(rb_maps, key);
			break;
		case 2:
			find(rb_maps, key);
			break;
		}
	}
	int end = clock();
	printf("rb_tree based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));//获取内存使用量
	int rb_work_size = pmc.WorkingSetSize-base_work_size;
	printf("memory used: %d bytes\n", rb_work_size);

	//hash_maps
	start = clock();
	for (int i = 0; i < n; i++)
	{
		int key = i;
		int value = rand() % n;
		switch (rand()%3)
		{
		case 0:
			insert(hash_maps, key, value);
			break;
		case 1:
			erase(hash_maps, key);
			break;
		case 2:
			find(hash_maps, key);
			break;
		}
	}
	end = clock();
	printf("hash_table based map:%f\n", float(end - start)/CLOCKS_PER_SEC);
	GetProcessMemoryInfo(handle, &pmc, sizeof(pmc));
	int hash_work_size = pmc.WorkingSetSize-rb_work_size-base_work_size;
	printf("memory used: %d bytes\n", hash_work_size);
	system("pause");
}

递增：

n	map(时间)	hash_map(时间)	map(内存)	hash_map(内存)
$10^5$	1.504s	0.965s	3，035，136bytes	3，416，064 bytes
$10^6$	17.9s	8.87s	29，356，032 bytes	30，892，032bytes

递减:

n	map(时间)	hash_map(时间)	map(内存)	hash_map(内存)
$10^5$	1.501s	0.965s	3，035，136bytes	3，416，064 bytes
$10^6$	18.06s	8.97s	29，356，032 bytes	30，892，032bytes

可以看出，key不随机后，map是受蛮大的影响，而hash_map受影响较小。这是因为hash_map在实现的时候就不依赖key的随机，它自己会对key进行一次线性同余的hash,这种哈希函数的hash结果是随机的。

结论

当数据量大了以后，hash_map在时间性能上是全面优于map的，在空间性能上会稍逊于map。
对于非随机的key,hash_map在时间性能上更是优于map。

Icoding_F2014 博客专家

发布了307 篇原创文章 · 获赞 268 · 访问量 56万+

他的留言板关注

C++ map和hash_map的性能对比

插入、删除、查找三种操作混合

对比插入操作

对比查找操作

key不随机

结论

猜你喜欢

C++ map和hash_map的性能对比

插入、删除、查找 三种操作混合

对比插入操作

对比查找操作

key不随机

结论

猜你喜欢

插入、删除、查找三种操作混合