【哈希表封装实现】—— 我与C++的不解之缘（二十九）

企业开发 2025-04-08 11:44:37 阅读次数: 0

前言

我们知道unordered_set和unordered_map的底层是哈希表，那现在我们就是使用我们自己实现的HashTable来封装实现出简单的unordered_set和unordered_map。

一、了解源码

在SGL-STL30版本之前源代码中是没有unordered_set和unordered_map的

unordered_set和unordered_map是C++11之后才更新的；SGL-STL30是C++11之前的版本

但是想SGL-STL30中实现了哈希表，但容器的名字叫做hash_map和hash_set（它是作为非标准的容器出现的）

源代码在hash_map/hash_set/stl_hash_map/stl_hash_map/stl_hash_set/stl_hashtable.h中

这里截取其中的一部分来看一下：

// stl_hash_set
template <class Value, class HashFcn = hash<Value>,
	class EqualKey = equal_to<Value>,
	class Alloc = alloc>
class hash_set
{
    
    
private:
	typedef hashtable<Value, Value, HashFcn, identity<Value>,
		EqualKey, Alloc> ht;
	ht rep;
public:
	typedef typename ht::key_type key_type;
	typedef typename ht::value_type value_type;
	typedef typename ht::hasher hasher;
	typedef typename ht::key_equal key_equal;
	typedef typename ht::const_iterator iterator;
	typedef typename ht::const_iterator const_iterator;
	hasher hash_funct() const {
    
     return rep.hash_funct(); }
	key_equal key_eq() const {
    
     return rep.key_eq(); }
};
// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,
	class EqualKey = equal_to<Key>,
	class Alloc = alloc>
class hash_map
{
    
    
private:
	typedef hashtable<pair<const Key, T>, Key, HashFcn,
		select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
	ht rep;
public:
	typedef typename ht::key_type key_type;
	typedef T data_type;
	typedef T mapped_type;
	typedef typename ht::value_type value_type;
	typedef typename ht::hasher hasher;
	typedef typename ht::key_equal key_equal;
	typedef typename ht::iterator iterator;
	typedef typename ht::const_iterator const_iterator;
};
// stl_hashtable.h
template <class Value, class Key, class HashFcn,
	class ExtractKey, class EqualKey,
	class Alloc>
class hashtable {
    
    
public:
	typedef Key key_type;
	typedef Value value_type;
	typedef HashFcn hasher;
	typedef EqualKey key_equal;
private:
	hasher hash;
	key_equal equals;
	ExtractKey get_key;
	typedef __hashtable_node<Value> node;
	vector<node*, Alloc> buckets;
	size_type num_elements;
public:
	typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
		Alloc> iterator;
	pair<iterator, bool> insert_unique(const value_type& obj);
	const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{
    
    
	__hashtable_node* next;
	Value val;
};

简单了解一下源码：（这里和map/set大致是一样的）

这里hashtable实现的是<V,K>结构，第一个参数V表示存储的数据类型，第二个参数K表示关键值key。

结构上依然是unordered_set和unordered_map使用同一个哈希表实现Key和Key/Value结构；unordered_set传<Key,Key>、unordered_map传<pair<Key,Value>,Key>。

这里呢它第一个参数是Value，感觉有点怪怪的；我们在实现的时候依然按照<Key,Value>来实现。

二、封装实现`unordered_set`和`unordered_map`

1. 复用哈希表框架，实现`insert`

根据我们上篇博客实现的哈希表，我们要复用这个框架，写出来unordered_set和unordered_map的大致框架并支持insert操作。
这里我们就按照红黑树封装实现set和map的逻辑，Key参数就用K，Value参数就用V来实现；对于哈希表存储的数据类型，就用T来表示。
这里我们将数据类型用T来表示，就导致了我们在HashTable中不知道T是K还是pair<K,V>，所以我们要像实现set和map那样，用一个模版参数KeyOfT来让我们在HashTable中能通过T取到K。
那这个KeyOfT就只能从unordered_set和unordered_map中传（因为在unordered_set和unordered_map中才知道数据类型是K还是pair<K,V>）

这里再重述一下为什么要用KeyOfT：因为我们不知道T到底是K还是pair<K,V>

如果是K那可以直接进行转换成整型然后取模操作。

但如果是pair<K,V>，pair<K,V>是不支持转换成整型的，所以我们要实现KeyOfT通过T取出K，然后对K进行转化整型然后取模操作。

逻辑理清楚了，现在来代码（这里有了KeyOfT，那每次使用Key转整型并取模操作之前都要套上一层取Key操作）。

先来看我们的HashTable.h

template<class K>
struct HashFunc
{
    
    
	size_t operator()(const K& key)
	{
    
    
		return (size_t)key;
	}
};
template<>
struct HashFunc<string>
{
    
    
	size_t operator()(const string& str)
	{
    
    
		size_t ret = 1;
		int i = 1;
		for (auto& ch : str)
		{
    
    
			ret += ch * i;
			i++;
		}
		return ret;
	}
};
template<class T>
struct HashNode
{
    
    
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{
    
    }
	T _data;
	HashNode<T>* _next;
};

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
    
    
	typedef HashNode<K, T> Node;
public:
	HashTable()
	{
    
    
		_table.resize(__stl_next_prime(0));
	}
	bool insert(const T& data)
	{
    
    
		KeyOfT kof;
		if (find(kof(data)) != nullptr)
			return false;
		Hash hs;
		//扩容
		if ((double)_n / (double)_table.size() >= 1)
		{
    
    
			size_t  newsize = __stl_next_prime(_table.size() + 1);
			vector<Node*> newtable;
			newtable.resize(newsize);
			for (int i = 0; i < _table.size(); i++)
			{
    
    
				Node* cur = _table[i];
				while (cur)
				{
    
    
					Node* next = cur->_next;
					size_t hash0 = hs(kof(cur->_data)) % newsize;
					//size_t hash0 = hs(cur->_kv.first) % newsize;
					//size_t hash0 = cur->_kv.first % newsize;
					cur->_next = newtable[hash0];
					newtable[hash0] = cur;
					cur = next;
				}
			}
			_table.swap(newtable);
		}
		//插入
		size_t hash0 = hs(kof(data)) % _table.size();
		//size_t hash0 = hs(kv.first) % _table.size();
		//size_t hash0 = kv.first % _table.size();
		//头插到当前位置
		Node* newnode = new Node(data);
		newnode->_next = _table[hash0];
		_table[hash0] = newnode;
		_n++;
		return true;
	}
	Node* find(const K& key)
	{
    
    
		Hash hs;
		KeyOfT kof;
		size_t hashi = hs(key) % _table.size();
		//size_t hashi = key % _table.size();
		Node* cur = _table[hashi];
		while (cur)
		{
    
    
			if (kof(cur->_data) == key)
				return cur;
			cur = cur->_next;
		}
		return nullptr;
	}
	bool erase(const K& key)
	{
    
    
		KeyOfT kof;
		for (int i = 0; i < _table.size(); i++)
		{
    
    
			Node* cur = _table[i];
			Node* prve = nullptr;
			while (cur)
			{
    
    
				if (key == kof(cur->_data))
				//if (key == cur->_kv.first)
				{
    
    
					if (prve == nullptr)
					{
    
    
						_table[i] = cur->_next;
					}
					else
					{
    
    
						prve->_next = cur->_next;
					}
					delete cur;
					--_n;
					return true;
				}
				prve = cur;
				cur = cur->_next;
			}
		}
	}
	~HashTable()
	{
    
    
		for (int i = 0; i < _table.size(); i++)
		{
    
    
			Node* cur = _table[i];
			while (cur)
			{
    
    
				Node* next = cur->_next;
				delete cur;
				cur = next;
			}
			_table[i] = nullptr;
		}
	}
private:
	vector<Node*> _table;
	size_t _n = 0;
};

再来看unordered_set.h

namespace HL
{
    
    
	template<class K>
	class unordered_set
	{
    
    
		struct SetKeyOfT
		{
    
    
			const K& operator()(const K& key)
			{
    
    
				return key;
			}
		};
	public:
		bool insert(const K& key)
		{
    
    
			return _hash.insert(key);
		}
	private:
		HashTable<K, const K, SetKeyOfT> _hash;
	};
	void test_set()
	{
    
    
		unordered_set<int> ust;
		for (int i = 0; i <= 54; i++)
		{
    
    
			int x = rand();
			ust.insert(x);
		}
	}
}

最后再来看unordered_map.h

namespace HL
{
    
    
	template<class K,class V>
	class unordered_map
	{
    
    
		struct MapKeyOfT
		{
    
    
			const K& operator()(const pair<K,V>& kv)
			{
    
    
				return kv.first;
			}
		};
	public:
		bool insert(const pair<K, V>& kv)
		{
    
    
			return _hash.insert(kv);
		}
	private:
		HashTable<K, pair<const K, V>, MapKeyOfT> _hash;
	};
	void test_map()
	{
    
    
		unordered_map<int, int> ump;
		for (int i = 0; i <= 54; i++)
		{
    
    
			int x = rand();
			ump.insert({
    
     x,i });
		}
	}
}

这里对于初步实现的unordered_set和unordered_map，博主还写了两个测试方法test_unordered_set和test_unordered_map

在测试方法中插入了55个数据，也涉及到了扩容（且没有出事初始化随机数生成器，每次生成的随机数都是一样的；方便测试）。

2. 实现`iterator`迭代器，完成`unordered_set`和`unordered_map`的迭代器遍历

`HashTable`的迭代器：

对于我们这里实现的链式结构的HashTable，它的迭代器显而易见是单向的；

unordered_set和unordered_map的迭代器也是单向的。

这里我们HashTable的迭代器，和list大致是一样的，都是封装节点的指针，再通过运算符重载实现；让迭代器像指针一种访问。

**对于HashTable**的迭代器中==、!=、*、->，实现这些运算符重载都好说；那operator++呢？

那operator++如何实现呢？

我们迭代器是对节点指针的封装，但是我们节点HashNode中只存放了数据_data和下一个位置的指针_next。

如果我们当前节点下一个位置不为空（当前桶（位置）还有节点，那就直接指向下一个节点即可。

那如果我们当前位置下一个位置为空（当前桶遍历完了），我们就要想办法找大下一个不为空的桶。

那如何去找下一个桶呢？

如果我们单纯的对节点的指针进行封装，显然是不能解决这个问题的；参照源码我们可以发现，在iterator中除了节点的指针，还存在一个哈希表对象的指针。

有了哈希表对象的指针，我们走完当前桶那就很容易找到下一个桶了。

这里我们找到下一个桶位置需要用到KeyOfT进行取Key操作，也要用到Hash对Key取模操作；

所以我们iterator的模版参数就不只是原来的template<class T,classRef,class Ptr>了。

而是template<class K,class T,classRef,class Ptr,class KeyOfT,class Hash>。

那现在问题又出现了：

我们HashIterator是定义在HashTable前面的，而我们HashIterator中还要使用HashTable，并且在HashIterator中还用访问HashTable的私有成员，那怎么办呢？

首先我们要对HashTable进行前置声明，让我们在HashIterator中知道有HashTable这个类。

然后就是友元，让HashIterator成为HashTable类的友元。

`const`迭代器

对于const迭代器实现起来就好很多了，只需要在HashIterator第二和第三个参数位置传const T&,const T*即可。

那这样我们大体就清楚了，现在来实现代码

//前置声明
template<class K,class T,class KeyOfT,class Hash>
class HashTable;
template<class K,class T,class Ref,class Ptr,class KeyOfT,class Hash>
class HashIterator
{
    
    
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HashIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
public:
	bool operator== (const Self& it)
	{
    
    
		return _node == it._node;
	}
	bool operator!=(const Self& it)
	{
    
    
		return _node != it._node;
	}
	Ref operator*()
	{
    
    
		return _node->_data;
	}
	Ptr operator->()
	{
    
    
		return &_node->_data;
	}
	Self& operator++()
	{
    
    
		if (_node->_next != nullptr)
		{
    
    
			_node = _node->_next;
		}
		else
		{
    
    
			//当前桶走完了，找到下一个桶
			KeyOfT kof;
			Hash hs;
			size_t  hashi = hs(kof(_node->_data)) % _ht->_table.size();
			hashi++;
			while (hashi < _ht->_table.size())
			{
    
    
				if (_ht->_table[hashi])
				{
    
    
					_node = _ht->_table[hashi];
					break;
				}
				hashi++;
			}
			if (hashi == _ht->_table.size())
				_node = nullptr;
		}
		return *this;
	}
	HashIterator(Node* node, const HT* ht)
		:_node(node)
		,_ht(ht)
	{
    
    }
private:
	Node* _node;
	const HT* _ht;
};

当然不要忘了在HashTable中声明友元

这里就只展示出了HashTable中新增的部分。

template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
    
    
	//友元
	template<class K,class T,class Ref,class Ptr,class KeyOfT,class Hash>
	friend class HashIterator;
public:
	typedef HashIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
	typedef HashIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;
	//迭代器部分
	Iterator begin()
	{
    
    
		//找到第一个迭代器的位置
		for (int i = 0; i < _table.size(); i++)
		{
    
    
			if (_table[i] != nullptr)
				return {
    
     _table[i],this };
		}
		return {
    
     nullptr,this };
	}
	Iterator end()
	{
    
    
		return {
    
     nullptr,this };
	}
	ConstIterator begin() const
	{
    
    
		//找到第一个迭代器的位置
		for (int i = 0; i < _table.size(); i++)
		{
    
    
			if (_table[i] != nullptr)
				return {
    
     _table[i],this };
		}
		return {
    
     nullptr,this };
	}
	ConstIterator end() const
	{
    
    
		return {
    
     nullptr,this };
	}
};

`unordered_set`和`unordered_map`迭代器：

对于unordered_set和unordered_map迭代器对HashTable的迭代器进行复用即可。

unordered_set迭代器

		typedef typename HashTable<K, const K, SetKeyOfT>::Iterator iterator;
		typedef typename HashTable<K, const K, SetKeyOfT>::ConstIterator const_iterator;
		iterator begin()
		{
    
    
			return _hash.begin();
		}
		iterator end()
		{
    
    
			return _hash.end();
		}
		const_iterator begin() const
		{
    
    
			return _hash.begin();
		}
		const_iterator end() const
		{
    
    
			return _hash.end();
		}

unordered_map迭代器

		typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::Iterator iterator;
		typedef typename HashTable<K, pair<const K, V>, MapKeyOfT>::ConstIterator const_iterator;
		iterator begin()
		{
    
    
			return _hash.begin();
		}
		iterator end()
		{
    
    
			return _hash.end();
		}
		const_iterator begin() const
		{
    
    
			return _hash.begin();
		}
		const_iterator end() const
		{
    
    
			return _hash.end();
		}

这里博主还写出了测试迭代器的代码（只有普通迭代器的）

	void test_set()
	{
    
    
		unordered_set<int> ust;
		for (int i = 0; i <= 54; i++)
		{
    
    
			int x = rand();
			ust.insert(x);
		}
		auto it = ust.begin();
		while (it != ust.end())
		{
    
    
			cout << *it << endl;
			++it;
		}
		cout << endl;
	}
	void test_map()
	{
    
    
		unordered_map<int, int> ump;
		for (int i = 0; i <= 54; i++)
		{
    
    
			int x = rand();
			ump.insert({
    
     x,i });
		}
		auto it = ump.begin();
		while (it != ump.end())
		{
    
    
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}

3. 实现`unordered_map`的`operator[]`

在这里插入图片描述

对于unordered_map的operator[]，其功能和map的operator[]一样，如果key值存在就返回key对应的value的引用；如果key不存在就先插入key（value是缺省值），再返回value的引用。

那我们要实现operator[]就要依赖于insert，所以我们要对insert进行修改（将其返回值修改为pair<Iterator,bool>。

这里我们insert又依赖于find判断数据是否存在，所以对find也要进行一点点修改。

HashTable的insert和find：

	//bool insert(const T& data)
	pair<Iterator,bool> insert(const T& data)
	{
    
    
		KeyOfT kof;
		//if (find(kof(data)) != nullptr)
		//	return false;
		Iterator it = find(kof(data));
		if (it != end())
			return {
    
     it,false };
		Hash hs;
		//扩容
		if ((double)_n / (double)_table.size() >= 1)
		{
    
    
			size_t  newsize = __stl_next_prime(_table.size() + 1);
			vector<Node*> newtable;
			newtable.resize(newsize);
			for (int i = 0; i < _table.size(); i++)
			{
    
    
				Node* cur = _table[i];
				while (cur)
				{
    
    
					Node* next = cur->_next;
					size_t hash0 = hs(kof(cur->_data)) % newsize;
					//size_t hash0 = hs(cur->_kv.first) % newsize;
					//size_t hash0 = cur->_kv.first % newsize;
					cur->_next = newtable[hash0];
					newtable[hash0] = cur;
					cur = next;
				}
			}
			_table.swap(newtable);
		}
		//插入
		size_t hash0 = hs(kof(data)) % _table.size();
		//size_t hash0 = hs(kv.first) % _table.size();
		//size_t hash0 = kv.first % _table.size();
		//头插到当前位置
		Node* newnode = new Node(data);
		newnode->_next = _table[hash0];
		_table[hash0] = newnode;
		_n++;
		//return true;
		return {
    
     {
    
    newnode,this},true };
	}
	//Node* find(const K& key)
	Iterator find(const K& key)
	{
    
    
		Hash hs;
		KeyOfT kof;
		size_t hashi = hs(key) % _table.size();
		//size_t hashi = key % _table.size();
		Node* cur = _table[hashi];
		while (cur)
		{
    
    
			if (kof(cur->_data) == key)
				return {
    
     cur,this };
				//return cur;
			cur = cur->_next;
		}
		//return nullptr;
		return {
    
     nullptr,this };
	}

unordered_set的insert：

		//bool insert(const K& key)
		pair<iterator,bool> insert(const K& key)
		{
    
    
			return _hash.insert(key);
		}

unordered_map的insert和operator[]：

		//bool insert(const pair<K, V>& kv)
		pair<iterator,bool> insert(const pair<K, V>& kv)
		{
    
    
			return _hash.insert(kv);
		}
		V& operator[](const K& key)
		{
    
    
			pair<iterator, bool> p = insert({
    
     key,V() });
			return p.first->second;
		}

这里提供了测试unordered_map的方法：

	void test_map()
	{
    
    
		HL::unordered_map<string, string> ump;
		ump.insert({
    
     "sort", "排序" });
		ump.insert({
    
     "left", "左边" });
		ump.insert({
    
     "right", "右边" });
		ump["left"] = "左边，剩余";
		ump["insert"] = "插入";
		ump["string"];
		HL::unordered_map<string, string>::iterator it = ump.begin();
		while (it != ump.end())
		{
    
    
			it->second += 'x';
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}

在这里插入图片描述

可以看到没有问题的好吧

4. 构造函数和析构函数

这里我们HashTable是写了构造函数和析构函数的，而unordered_set和unordered_map都只是对HashTable的封装复用，所以这里不需要写unordered_set和unordered_map的析构函数的。

三、简单总结

供了测试unordered_map的方法：

	void test_map()
	{
    
    
		HL::unordered_map<string, string> ump;
		ump.insert({
    
     "sort", "排序" });
		ump.insert({
    
     "left", "左边" });
		ump.insert({
    
     "right", "右边" });
		ump["left"] = "左边，剩余";
		ump["insert"] = "插入";
		ump["string"];
		HL::unordered_map<string, string>::iterator it = ump.begin();
		while (it != ump.end())
		{
    
    
			it->second += 'x';
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}

[外链图片转存中…(img-lLsjrj3p-1743398072126)]

可以看到没有问题的好吧

4. 构造函数和析构函数

这里我们HashTable是写了构造函数和析构函数的，而unordered_set和unordered_map都只是对HashTable的封装复用，所以这里不需要写unordered_set和unordered_map的析构函数的。

三、简单总结

这里erase函数也是直接复用HashTable的erase函数，这里就不演示了。

简单总结一下

这里我们通过复用HashTable，实现了unordered_set和unordered_map的insert、find和 迭代器以及unordered_map的operator[]。

首先就是复用HashTable，实现unordered_set和unordered_map的框架；

然后实现HashTable的迭代器Iterator，并完成复用实现unordered_set和unordered_map的iterator；

接着就是unordered_map的operator[]操作；

最后呢就是构造函数和析构函数这些（虽然不需要写unordered_set的unordered_map）。

这里对于size、empty等这些简单的方法这里并没有实现（也是直接复用的好吧）