目录
1. 前言
当前的存储结构本质上只有两种:数组、链表。数组支持随机访问,访问速度极快,哈希表就是利用这一点。
2. 哈希表原理
①哈希表的底层是数组,数组是通过下标索引访问的,那哈希表就要生成一个索引,为每一个元素分配唯一的索引,而生成这个索引的工具就叫做“哈希函数”,每一个存放数据的位置叫做“桶”。
②数组中的每一个元素以键值对key-value形式存在,索引就是通过用哈希函数将key转化为索引。至于怎么转化,这里举一个例子,假设数组容量为16,这里有一个key值为100的元素,那么哈希函数可以直接用 key % 16即100 % 16 = 4来计算,得到的结果4就是这个索引。当插入一个新的元素时,如果其索引位置上已有元素,那么就引入一个新的问题,哈希冲突。
③哈希冲突的解决办法,这里只列举我代码实现的方法,也是教科书上的办法:当哈希冲突时,后来元素的索引需要一直增加1,即一直往后移动,直到找到空的位置为止。当插入一个新的元素后,如果容量已满,需要即时扩充容量,否则在下一次哈希冲突寻找空位置时会造成死循环。
3. 源码实现
①为了增加遍历哈希容器元素的方便性,我这里为每一个元素增加双向链表作为存储容器,遍历时就遍历这双向链表。
②代码已与C++11中的std::unordered_map和std::unordered_set作一定的兼容。
③代码支持C++03标准,可不必使用C++11以上版本,代码已在VS2005和GCC 4.1中编译通过。
头文件:HashTable.h
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
#include <utility>
#include <cmath>
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshift-count-overflow"
#elif defined(Clang)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshift-count-overflow"
#elif defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable:4293)
#endif
#if __cplusplus >= 201703L
#define null nullptr
#define CONSTEXPR constexpr
#define CXX14_CONSTEXPR constexpr
#define CXX17_CONSTEXPR constexpr
#elif __cplusplus >= 201402L
#define null nullptr
#define CONSTEXPR constexpr
#define CXX14_CONSTEXPR constexpr
#define CXX17_CONSTEXPR
#elif __cplusplus >= 201103L
#define null nullptr
#define CONSTEXPR constexpr
#define CXX14_CONSTEXPR
#define CXX17_CONSTEXPR
#else
#define null 0
#define CONSTEXPR
#define CXX14_CONSTEXPR
#define CXX17_CONSTEXPR
#endif
typedef unsigned long HashType;
template<typename _Tp>
struct Hash
{
HashType operator()(const _Tp &_h) const
{ return _h.Hash(); }
};
#define HASH_SPECIALIZED(type) \
template<> \
struct Hash<type> \
{ \
HashType operator()(const type &c) const \
{ return static_cast<HashType>(c); } \
};
HASH_SPECIALIZED(char)
HASH_SPECIALIZED(unsigned char)
HASH_SPECIALIZED(short)
HASH_SPECIALIZED(unsigned short)
HASH_SPECIALIZED(int)
HASH_SPECIALIZED(unsigned int)
HASH_SPECIALIZED(long)
HASH_SPECIALIZED(unsigned long)
HASH_SPECIALIZED(long long)
HASH_SPECIALIZED(unsigned long long)
template<typename _Tp>
struct Hash<_Tp *>
{
HashType operator()(const _Tp *_h) const
{ return reinterpret_cast<HashType>(_h); }
};
template<typename _Tp>
struct Equal
{
bool operator()(const _Tp &_1, const _Tp &_2) const
{ return _1 == _2; }
};
template<typename _Key,
typename _Value,
typename _Hash = Hash<_Key>,
typename _EqualTo = Equal<_Key>
>
class HashTable
{
private:
struct NodeType
{
NodeType(const _Key &k, const _Value &v)
: key(k), value(v), next(null), prev(null)
{ }
const _Key key;
_Value value;
NodeType *next;
NodeType *prev;
};
public:
class const_iterator;
class iterator
{
private:
friend class HashTable;
friend class const_iterator;
NodeType *_M_data;
public:
iterator(NodeType *__v = null) : _M_data(__v) { }
iterator operator++()
{ return iterator(_M_data = _M_data->next); }
iterator operator++(int)
{
iterator __ret(_M_data);
_M_data = _M_data->next;
return __ret;
}
bool operator==(const iterator &__it) const
{ return _M_data == __it._M_data; }
bool operator!=(const iterator &__it) const
{ return _M_data != __it._M_data; }
_Value& operator*()
{ return _M_data->value; }
_Value* operator->()
{ return &_M_data->value; }
};
class const_iterator
{
private:
friend class HashTable;
const NodeType *_M_data;
public:
const_iterator(const NodeType *__v = null) : _M_data(__v) { }
const_iterator(const iterator &__i) : _M_data(__i._M_data) { }
const_iterator operator++()
{ return const_iterator(_M_data = _M_data->next); }
const_iterator operator++(int)
{
const_iterator __ret(_M_data);
_M_data = _M_data->next;
return __ret;
}
bool operator==(const const_iterator &__it) const
{ return _M_data == __it._M_data; }
bool operator!=(const const_iterator &__it) const
{ return _M_data != __it._M_data; }
const _Value& operator*()
{ return _M_data->value; }
const _Value* operator->()
{ return &_M_data->value; }
};
public:
typedef unsigned long size_type;
public:
HashTable()
: _M_capacity(0), _M_count(0), _M_head(null), _M_tail(null), _M_data(null)
{ _M_rehash(1, 0); }
HashTable(size_type __bkts)
: _M_capacity(0), _M_count(0), _M_head(null), _M_tail(null), _M_data(null)
{ _M_rehash(__bkts, 0); }
HashTable(const HashTable &__ht)
: _M_capacity(0), _M_count(0), _M_head(null), _M_tail(null), _M_data(null)
{ this->operator=(__ht); }
#if __cplusplus >= 201103L
HashTable(HashTable &&__ht)
: _M_capacity(0), _M_count(0), _M_head(null), _M_tail(null), _M_data(null)
{ swap(__ht); }
#endif
~HashTable()
{
clear();
delete[] _M_data;
}
void clear()
{
if(_M_count == 0)
{
return;
}
for(size_type __i = 0; __i < _M_capacity; ++__i)
{
if(_M_data[__i])
{
delete _M_data[__i];
_M_data[__i] = null;
}
}
_M_count = 0;
_M_head = _M_tail = null;
}
bool empty() const
{ return size() == 0; }
size_type size() const
{ return _M_count; }
size_type max_size() const
{ return _M_capacity; }
size_type bucket_count() const
{ return max_size(); }
size_type bucket_size(size_type) const
{ return 1; }
iterator begin()
{ return iterator(_M_head); }
const_iterator begin() const
{ return const_iterator(_M_head); }
const_iterator cbegin() const
{ return begin(); }
iterator end()
{ return iterator(null); }
const_iterator end() const
{ return const_iterator(null); }
const_iterator cend() const
{ return const_iterator(null); }
std::pair<iterator, bool> insert(const _Key &__k, const _Value &__v)
{ return _M_insert(__k, __v); }
std::pair<iterator, bool> insert(const std::pair<_Key, _Value> &__p)
{ return _M_insert(__p.first, __p.second); }
const_iterator find(const _Key &__k) const
{ return _M_find_node(__k); }
iterator find(const _Key &__k)
{ return iterator(const_cast<NodeType *>(_M_find_node(__k)._M_data)); }
void rehash(size_type __s)
{ _M_rehash(__s, 0); }
_Value& at(const _Key &__k)
{ return _M_find_and_insert(__k)._M_data->value; }
_Value& operator[](const _Key &__k)
{ return at(__k); }
void erase(const _Key &__k)
{
const_iterator __beg = _M_find_node(__k);
const_iterator __end = __beg;
_M_erase(__beg, ++__end);
}
void erase(const_iterator __beg, const_iterator __end)
{ _M_erase(__beg, __end); }
void swap(HashTable &__ht)
{
std::swap<size_type>(_M_capacity, __ht._M_capacity);
std::swap<size_type>(_M_count, __ht._M_count);
std::swap<NodeType *>(_M_head, __ht._M_head);
std::swap<NodeType *>(_M_tail, __ht._M_tail);
std::swap<NodeType **>(_M_data, __ht._M_data);
}
HashTable& operator=(const HashTable &__ht)
{
clear();
rehash(__ht._M_capacity);
for(size_type __i = 0; __i < __ht._M_capacity; ++__i)
{
NodeType *__node = __ht._M_data[__i];
if(__node)
{
_M_insert(_M_data[__i] = new NodeType(__node->key, __node->value));
}
}
return *this;
}
private:
std::pair<iterator, bool> _M_insert(const _Key &__k, const _Value &__v)
{
if(_M_capacity - _M_count <= 1)
{
_M_rehash(_M_capacity * 2);
}
_Hash __hash;
HashType __code = __hash(__k);
_EqualTo __e;
size_type __index = _S_bucket_index(__code, _M_capacity);
NodeType *__node = null;
while(_M_data[__index])
{
__node = _M_data[__index];
if(__e(__node->key, __k))
{
__node->value = __v;
return std::pair<iterator, bool>(iterator(__node), false);
}
__index = _M_inc(__index, _M_capacity);
}
__node = new NodeType(__k, __v);
_M_data[__index] = __node;
return std::pair<iterator, bool>(_M_insert(__node), true);
}
iterator _M_find_and_insert(const _Key &__k)
{
const_iterator __f = _M_find_node(__k);
return __f == cend()
? _M_insert(__k, _Value()).first
: iterator(const_cast<NodeType *>(__f._M_data));
}
static CXX14_CONSTEXPR size_type _M_inc(size_type __s, size_type __max)
{ return ++__s >= __max ? 0 : __s; }
static CXX14_CONSTEXPR size_type _S_bucket_index(size_type __code, size_type __bkt_count)
{ return __code % __bkt_count; }
static CXX14_CONSTEXPR size_type _S_clp2(size_type __n)
{
size_type __x = __n;
__x = __x - 1;
__x = __x | (__x >> 1);
__x = __x | (__x >> 2);
__x = __x | (__x >> 4);
__x = __x | (__x >> 8);
__x = __x | (__x >> 16);
if CXX17_CONSTEXPR (sizeof(size_type) >= 8)
{
__x = __x | (__x >> 32);
}
return __x + 1;
}
static CXX14_CONSTEXPR bool _S_is_prime(size_type __n)
{
if (__n <= 3)
{
return __n > 1;
}
if (__n % 6 != 1 && __n % 6 != 5)
{
return false;
}
size_type __s = static_cast<size_type>(::sqrt(double(__n)));
for (size_type __i = 5; __i <= __s; __i += 6)
{
if (__n % __i == 0 || __n % (__i + 2) == 0)
{
return false;
}
}
return true;
}
static CXX14_CONSTEXPR size_type _S_next_bucket(size_type __n)
{
if(__n <= 1)
{
return 1;
}
if(__n == 2 || __n == 3)
{
return __n;
}
__n = _S_clp2(__n) - 1;
while(!_S_is_prime(__n))
{ __n -= 2; }
return __n;
}
void _M_rehash(size_type __bkts, size_type __its = 1)
{
__bkts = _S_next_bucket(__bkts);
while(_M_count + __its > __bkts)
{
__bkts = _S_next_bucket(__bkts << 1);
}
NodeType **__tmp = new NodeType*[__bkts];
for(size_type i = 0; i < __bkts; ++i)
{
__tmp[i] = null;
}
_Hash __hash;
for(iterator __it = begin(); __it != end(); ++__it)
{
NodeType *__node = __it._M_data;
size_type __index = _S_bucket_index(__hash(__node->key), __bkts);
while(__tmp[__index])
{
__index = _M_inc(__index, __bkts);
}
__tmp[__index] = __node;
}
delete[] _M_data;
_M_data = __tmp;
_M_capacity = __bkts;
}
const_iterator _M_find_node(const _Key &key) const
{
_Hash __hash;
HashType __code = __hash(key);
_EqualTo __e;
NodeType *__node = null;
size_type __index = _S_bucket_index(__code, _M_capacity);
while(_M_data[__index])
{
__node = _M_data[__index];
if(__e(key, __node->key))
{
return const_iterator(__node);
}
__index = _M_inc(__index, _M_capacity);
}
return const_iterator(null);
}
iterator _M_insert(NodeType *__n)
{
if(!_M_head)
{
_M_head = _M_tail = __n;
}
else
{
_M_tail->next = __n;
__n->prev = _M_tail;
_M_tail = __n;
}
++_M_count;
return iterator(__n);
}
void _M_erase(const_iterator __beg, const_iterator __end)
{
while(__beg != __end)
{
_M_erase((__beg++)._M_data);
}
}
void _M_erase(const NodeType *__node)
{
if(!__node)
{
return;
}
_Hash __hash;
HashType __code = __hash(__node->key);
_EqualTo __e;
size_type __index = _S_bucket_index(__code, _M_capacity);
while(_M_data[__index])
{
const NodeType *__n = _M_data[__index];
if(__e(__n->key, __node->key))
{
_M_data[__index] = null;
break;
}
__index = _M_inc(__index, _M_capacity);
}
NodeType *__p = __node->prev;
NodeType *__n = __node->next;
if(__node == _M_head)
{
_M_head = __n;
_M_head->prev = null;
}
if(__node == _M_tail)
{
_M_tail = __p;
_M_tail->next = null;
}
else
{
__p->next = __n;
__n->prev = __p;
}
delete __node;
--_M_count;
}
private:
size_type _M_capacity;
size_type _M_count;
NodeType *_M_head;
NodeType *_M_tail;
NodeType **_M_data;
};
#ifdef __GNUC__
#pragma GCC diagnostic pop
#elif defined(Clang)
#pragma clang diagnostic pop
#elif defined(_MSC_VER)
#pragma warning(pop)
#endif
#endif // HASH_TABLE_H
头文件:HashMap.h
#ifndef HASH_MAP_H
#define HASH_MAP_H
#include "HashTable.h"
template<typename _Key,
typename _Value,
typename _Hash = Hash<_Key>,
typename _EqualTo = Equal<_Key>
>
class HashMap
{
private:
typedef HashTable<_Key, std::pair<_Key, _Value>, _Hash, _EqualTo> _HashTable;
public:
typedef typename _HashTable::iterator iterator;
typedef typename _HashTable::const_iterator const_iterator;
public:
typedef _Key key_type;
typedef _Value mapped_type;
typedef std::pair<key_type, mapped_type> value_type;
typedef typename _HashTable::size_type size_type;
public:
HashMap() {}
HashMap(const HashMap &__m)
: _M_impl(__m._M_impl) { }
#if __cplusplus >= 201103L
HashMap(HashMap &&__m)
: _M_impl(std::move(__m._M_impl)) { }
#endif
_Value& at(const _Key &__k)
{ return _M_impl.at(__k).second; }
_Value& operator[](const _Key &__k)
{ return _M_impl.at(__k).second; }
std::pair<iterator, bool> insert(const _Key &__k, const _Value &__v)
{ return _M_impl.insert(__k, std::pair<_Key, _Value>(__k, __v)); }
std::pair<iterator, bool> insert(const std::pair<_Key, _Value> &__p)
{ return _M_impl.insert(__p.first, __p); }
void erase(const_iterator __i)
{
const_iterator __e = __i;
_M_impl.erase(__i, ++__e);
}
void erase(iterator __i)
{
const_iterator __e = __i;
_M_impl.erase(__i, ++__e);
}
void erase(const_iterator __b, const_iterator __e)
{ _M_impl.erase(__b, __e); }
void erase(const _Key &__k)
{ _M_impl.erase(__k); }
size_type max_size() const
{ return _M_impl.max_size(); }
void rehash(size_type __s)
{ _M_impl.rehash(__s); }
size_type size() const
{ return _M_impl.size(); }
size_type count(const _Key &__k) const
{ return static_cast<size_type>(contains(__k)); }
size_type bucket_count() const
{ return _M_impl.bucket_count(); }
void swap(HashMap &__m)
{ _M_impl.swap(__m._M_impl); }
bool empty() const
{ return _M_impl.empty(); }
iterator find(const _Key &__k)
{ return _M_impl.find(__k); }
const_iterator find(const _Key &__k) const
{ return _M_impl.find(__k); }
bool contains(const _Key &__k) const
{ return find(__k) == end(); }
iterator begin()
{ return _M_impl.begin(); }
const_iterator begin() const
{ return _M_impl.begin(); }
const_iterator cbegin() const
{ return _M_impl.cbegin(); }
iterator end()
{ return _M_impl.end(); }
const_iterator end() const
{ return _M_impl.end(); }
const_iterator cend() const
{ return _M_impl.cend(); }
void clear()
{ _M_impl.clear(); }
private:
_HashTable _M_impl;
};
#endif // HASH_MAP_H
头文件:HashSet.h
#ifndef HASH_SET_H
#define HASH_SET_H
#include "HashTable.h"
template<typename _Key,
typename _Hash = Hash<_Key>,
typename _EqualTo = Equal<_Key>
>
class HashSet
{
private:
typedef HashTable<_Key, _Key, _Hash, _EqualTo> _HashTable;
public:
typedef typename _HashTable::const_iterator iterator;
typedef typename _HashTable::const_iterator const_iterator;
public:
typedef _Key key_type;
typedef _Key value_type;
typedef typename _HashTable::size_type size_type;
public:
std::pair<iterator, bool> insert(const _Key &__k)
{ return _M_impl.insert(__k, __k); }
void erase(const_iterator __i)
{
const_iterator __e = __i;
_M_impl.erase(__i, ++__e);
}
void erase(const_iterator __b, const_iterator __e)
{ _M_impl.erase(__b, __e); }
void erase(const _Key &__k)
{ _M_impl.erase(__k); }
size_type max_size() const
{ return _M_impl.max_size(); }
void rehash(size_type __s)
{ _M_impl.rehash(__s); }
size_type size() const
{ return _M_impl.size(); }
size_type count(const _Key &__k) const
{ return static_cast<size_type>(contains(__k)); }
size_type bucket_count() const
{ return _M_impl.bucket_count(); }
void swap(HashSet &__m)
{ _M_impl.swap(__m._M_impl); }
bool empty() const
{ return _M_impl.empty(); }
iterator find(const _Key &__k)
{ return _M_impl.find(__k); }
const_iterator find(const _Key &__k) const
{ return _M_impl.find(__k); }
bool contains(const _Key &__k) const
{ return find(__k) != end(); }
iterator begin()
{ return _M_impl.begin(); }
const_iterator begin() const
{ return _M_impl.begin(); }
const_iterator cbegin() const
{ return _M_impl.cbegin(); }
iterator end()
{ return _M_impl.end(); }
const_iterator end() const
{ return _M_impl.end(); }
const_iterator cend() const
{ return _M_impl.cend(); }
void clear()
{ _M_impl.clear(); }
private:
_HashTable _M_impl;
};
#endif // HASH_SET_H
4. 测试代码
源码:
#include <iostream>
#include "HashMap.h"
#include "HashSet.h"
struct A
{
int v;
A(int vv = 0) : v(vv)
{ }
friend std::ostream& operator<<(std::ostream &os, const A &a)
{
os << a.v;
return os;
}
bool operator==(const A &a) const
{ return v == a.v; }
};
struct A_Hash
{
HashType operator()(const A &a) const
{ return static_cast<HashType>(a.v); }
};
typedef HashMap<int, A, A_Hash> TestHashMap;
typedef HashSet<A, A_Hash> TestHashSet;
static void print(const TestHashMap &a)
{
for(typename TestHashMap::const_iterator it = a.begin(); it != a.end(); ++it)
{
std::cout << it->second.v << ' ';
}
std::cout << std::endl;
}
static void print(const TestHashSet &a)
{
for(typename TestHashSet::const_iterator it = a.begin(); it != a.end(); ++it)
{
std::cout << *it << ' ';
}
std::cout << std::endl;
}
static void print_insert(TestHashMap &a, int k, const A &v)
{
static int index = 0;
std::cout << "---------------insert node: " << ++index << "-------------------" << std::endl;
std::cout << "node: " << k << "<--->" << v << std::endl;
a.insert(std::make_pair(k, v));
std::cout << "bkt size : " << a.bucket_count() << std::endl;
print(a);
}
static void test_hashmap()
{
std::cout << "******************** test_hashmap ***********************" << std::endl;
TestHashMap a;
print_insert(a, 1, A(10));
print_insert(a, 3, A(30));
print_insert(a, 2, A(20));
print_insert(a, 7, A(70));
print_insert(a, 0, A(0));
print_insert(a, 4, A(4));
print_insert(a, 8, A(80));
a[20] = A(200);
std::cout << "------------------find node----------------" << std::endl;
TestHashMap::iterator found = a.find(0);
if(found != a.end())
{
std::cout << "0 is exists! its value: " << found->second << std::endl;
}
found = a.find(100);
if(found == a.end())
{
std::cout << "100 is not exists!!" << std::endl;
}
std::cout << "--------------remove node: 2--20" << std::endl;
a.erase(a.find(2));
print(a);
}
static void test_hashset()
{
std::cout << "******************** test_hashset ***********************" << std::endl;
TestHashSet a;
a.insert(A(10));
a.insert(A(-1));
a.insert(A(1));
a.insert(A(8));
a.insert(A(100));
std::cout << "node list: ";
print(a);
std::cout << "node 10 is exists? " << std::boolalpha << a.contains(10) << std::endl;
std::cout << "---------- remove node : -1 -----------------" << std::endl;
TestHashSet::const_iterator found = a.find(A(-1));
if(found != a.end())
{
a.erase(found);
}
std::cout << "node list: ";
print(a);
}
int main()
{
test_hashmap();
test_hashset();
return 0;
}
输出结果:
5. 结尾
如有问题,欢迎指出!