STL复习之——hashtable

1、二叉搜索树具有对数平均时间的表现，但这样的表现依赖于输入数据具有足够的随机性。hashtable在插入、删除。搜寻等操作上也具有“常数平均时间”的表现，而且这种表现是以统计为基础，不依赖与输入数据的随机性。

2、在找元素的存储位置时的使用的hash函数主要有三种。这三种方法都是为了解决不同的元素被hash函数映射到同一位置，引起冲突的问题。

一、线性探测法；

二、二次探测法；

三、开链法。

下图是以开链法完成hashtable的图形描述，称hashtable表格内的元素为桶（bucke）。意思是表格内的每个单元并不是用来存放一个元素的，而是用来存放一桶（bucket）元素的。

<1>hashtable中结点的定义：

  template<class _Val>
    struct _Hashtable_node
    {
      _Hashtable_node* _M_next;
      _Val _M_val;
    };

<2>hashtable迭代器定义。

  template<class _Val, class _Key, class _HashFcn,
	   class _ExtractKey, class _EqualKey, class _Alloc>
    struct _Hashtable_iterator
    {
      typedef hashtable<_Val, _Key, _HashFcn, _ExtractKey, _EqualKey, _Alloc>
        _Hashtable;
      typedef _Hashtable_iterator<_Val, _Key, _HashFcn,
				  _ExtractKey, _EqualKey, _Alloc>
        iterator;
      typedef _Hashtable_const_iterator<_Val, _Key, _HashFcn,
					_ExtractKey, _EqualKey, _Alloc>
        const_iterator;
      typedef _Hashtable_node<_Val> _Node;
      typedef forward_iterator_tag iterator_category;
      typedef _Val value_type;
      typedef ptrdiff_t difference_type;
      typedef size_t size_type;
      typedef _Val& reference;
      typedef _Val* pointer;
      
      _Node* _M_cur;       //迭代器目前所指向的结点
      _Hashtable* _M_ht;   //指向hashtable的指针，以保持与hashtable的联系，因为可能从一个“桶”调到另一个“桶”

      _Hashtable_iterator(_Node* __n, _Hashtable* __tab)
      : _M_cur(__n), _M_ht(__tab) { }

      _Hashtable_iterator() { }

      reference
      operator*() const
      { return _M_cur->_M_val; }

      pointer
      operator->() const
      { return &(operator*()); }

      iterator&
      operator++();

      iterator
      operator++(int);

      bool
      operator==(const iterator& __it) const
      { return _M_cur == __it._M_cur; }

      bool
      operator!=(const iterator& __it) const
      { return _M_cur != __it._M_cur; }
    };

<3>迭代器的自增操作：

  template<class _Val, class _Key, class _HF, class _ExK, class _EqK,
	    class _All>
    _Hashtable_iterator<_Val, _Key, _HF, _ExK, _EqK, _All>&
    _Hashtable_iterator<_Val, _Key, _HF, _ExK, _EqK, _All>::
    operator++()
    {
      const _Node* __old = _M_cur;   //old指向当前迭代器节点指针所指结点
      _M_cur = _M_cur->_M_next;      //当前迭代器节点指针指向old的下一节点
      if (!_M_cur)                   //当前迭代器节点指针为null
	{
	  size_type __bucket = _M_ht->_M_bkt_num(__old->_M_val);   //根据old指针指向的结点的值寻找“桶”号
	  while (!_M_cur && ++__bucket < _M_ht->_M_buckets.size()) //当前迭代器节点指针指向为空且还有“桶”可找
	    _M_cur = _M_ht->_M_buckets[__bucket];                  //当前迭代器节点指针指向编号为__bucket的“桶”的第一个结点
	}
      return *this;                  //返回迭代器对象指针
    }

<4>hashtable的数据结构：

  template<class _Val, class _Key, class _HashFcn,
	   class _ExtractKey, class _EqualKey, class _Alloc>
    class hashtable
    {
    public:
      typedef _Key key_type;
      typedef _Val value_type;
      typedef _HashFcn hasher;
      typedef _EqualKey key_equal;

      typedef size_t            size_type;
      typedef ptrdiff_t         difference_type;
      typedef value_type*       pointer;
      typedef const value_type* const_pointer;
      typedef value_type&       reference;
      typedef const value_type& const_reference;

      hasher
      hash_funct() const
      { return _M_hash; }

      key_equal
      key_eq() const
      { return _M_equals; }

    private:
      typedef _Hashtable_node<_Val> _Node;

    public:
      typedef typename _Alloc::template rebind<value_type>::other allocator_type;
      allocator_type
      get_allocator() const
      { return _M_node_allocator; }

    private:
      typedef typename _Alloc::template rebind<_Node>::other _Node_Alloc;
      typedef typename _Alloc::template rebind<_Node*>::other _Nodeptr_Alloc;
      typedef vector<_Node*, _Nodeptr_Alloc> _Vector_type;     //用std::vector作为桶的聚合体

      _Node_Alloc _M_node_allocator;

      _Node*
      _M_get_node()
      { return _M_node_allocator.allocate(1); }

      void
      _M_put_node(_Node* __p)
      { _M_node_allocator.deallocate(__p, 1); }

    private:
      hasher                _M_hash;
      key_equal             _M_equals;
      _ExtractKey           _M_get_key;
      _Vector_type          _M_buckets;
      size_type             _M_num_elements;

<5>虽然开链法并不要求表格的大小为素数，但g++ stl仍以质数大来设计表格的大小，并且现将29个质数（后一个大约是前一个质数的两倍大的关系）存放在一个静态数组里面，以备随时访问，同时，提供一个接受一个unsigned long类型为参数的（记这个参数为n）帮助函数，以找出在这29个质数中最接近n且大于等于n的质数。

  // Note: 假定long最少32bit
  enum { _S_num_primes = 29 };  

  template<typename _PrimeType>
    struct _Hashtable_prime_list
    {
      static const _PrimeType  __stl_prime_list[_S_num_primes]; //29个质数组成的质数数组

      static const _PrimeType*
      _S_get_prime_list(); //返回质数数组的首地址
    };

  template<typename _PrimeType> const _PrimeType
  _Hashtable_prime_list<_PrimeType>::__stl_prime_list[_S_num_primes] =
    {
      5ul,          53ul,         97ul,         193ul,       389ul,
      769ul,        1543ul,       3079ul,       6151ul,      12289ul,
      24593ul,      49157ul,      98317ul,      196613ul,    393241ul,
      786433ul,     1572869ul,    3145739ul,    6291469ul,   12582917ul,
      25165843ul,   50331653ul,   100663319ul,  201326611ul, 402653189ul,
      805306457ul,  1610612741ul, 3221225473ul, 4294967291ul
    };

 template<class _PrimeType> inline const _PrimeType*
 _Hashtable_prime_list<_PrimeType>::_S_get_prime_list()
 {
   return __stl_prime_list;
 }

  inline unsigned long
  __stl_next_prime(unsigned long __n)
  {
    const unsigned long* __first = _Hashtable_prime_list<unsigned long>::_S_get_prime_list();
    const unsigned long* __last = __first + (int)_S_num_primes;
    const unsigned long* pos = std::lower_bound(__first, __last, __n); //在[first，last)区间内找出第一个大于等于n的元素的位置
    return pos == __last ? *(__last - 1) : *pos;
  }

      size_type 
      max_bucket_count() const   //质数数组中最大的质数，即hashtable最大的“格子”数
      { return _Hashtable_prime_list<unsigned long>::
               _S_get_prime_list()[(int)_S_num_primes - 1];
      }

<6>、插入操作与表格调整。

6-1、元素的插入操作：

      pair<iterator, bool>
      insert_unique(const value_type& __obj)
      {
	resize(_M_num_elements + 1);
	return insert_unique_noresize(__obj);
      }

Step1；尝试重置“桶”的个数；

  template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
    void
    hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
    resize(size_type __num_elements_hint)
    {
      const size_type __old_n = _M_buckets.size();  //旧hashtable中的bucket的个数
      if (__num_elements_hint > __old_n)            //当前元素的个数大于bucket的个数
	{
	  const size_type __n = _M_next_size(__num_elements_hint);  //计算大于等于 元素个数 的质数（即新的bucket的个数）
	  if (__n > __old_n) //计算出来的质数比旧的大，即bucket的个数要增加了
	    {
	      _Vector_type __tmp(__n, (_Node*)(0), _M_buckets.get_allocator()); //构造一个具有n的bucket的集合，初始值均为null
	      __try
		{
		  for (size_type __bucket = 0; __bucket < __old_n; ++__bucket)
		    {
		      _Node* __first = _M_buckets[__bucket];   //first指向编号为__bucket的“桶”的第一个节点
		      while (__first)
			{
			  size_type __new_bucket = _M_bkt_num(__first->_M_val,
							      __n);  //计算first所指向的结点的值在新的hashtable中的位置（桶编号）
			  _M_buckets[__bucket] = __first->_M_next;   //<1>_M_buckets[__bucket]指向__first所指向节点的下一节点
			  __first->_M_next = __tmp[__new_bucket];    //<2>first指向的结点的next域的指向编号为__new_bucket的新bucket的第一个元素
			  __tmp[__new_bucket] = __first;             //<3>新的bucket的指向first所指向的节点，<2><3>两步为头插法建立链表的过程
			  __first = _M_buckets[__bucket];
			}
		    }
		  _M_buckets.swap(__tmp); //新旧两个bucket集合对调
		}
	      __catch(...)
		{
		  for (size_type __bucket = 0; __bucket < __tmp.size();
		       ++__bucket)
		    {
		      while (__tmp[__bucket])
			{

  template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
    pair<typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator, bool>
    hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
    insert_unique_noresize(const value_type& __obj)
    {
      const size_type __n = _M_bkt_num(__obj);
      _Node* __first = _M_buckets[__n];
      
      for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
	if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
	  return pair<iterator, bool>(iterator(__cur, this), false);
      
      _Node* __tmp = _M_new_node(__obj);
      __tmp->_M_next = __first;
      _M_buckets[__n] = __tmp;
      ++_M_num_elements;
      return pair<iterator, bool>(iterator(__tmp, this), true);
    }

_Node* __next = __tmp[__bucket]->_M_next; _M_delete_node(__tmp[__bucket]); __tmp[__bucket] = __next;} } __throw_exception_again;} }} }

Step2:

在不需要调整hashtable格子的个数的情形下，插入元素：

情形1：不允许有相同的元素

  template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
    pair<typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator, bool>
    hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
    insert_unique_noresize(const value_type& __obj)
    {
      const size_type __n = _M_bkt_num(__obj); //定位__obj所在的bucket的下标
      _Node* __first = _M_buckets[__n];        //__first指向当前bucket的第一个节点
      //在当前的bucket内找到了相同的元素，则插入失败
      for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
	if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
	  return pair<iterator, bool>(iterator(__cur, this), false);
      //新构造一个结点，并采用头插法插入bucket中，hashtable中的元素个数+1
      _Node* __tmp = _M_new_node(__obj);
      __tmp->_M_next = __first;
      _M_buckets[__n] = __tmp;
      ++_M_num_elements;
      return pair<iterator, bool>(iterator(__tmp, this), true);
    }

情形2：允许有相同的元素

  template<class _Val, class _Key, class _HF, class _Ex, class _Eq, class _All>
    typename hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::iterator
    hashtable<_Val, _Key, _HF, _Ex, _Eq, _All>::
    insert_equal_noresize(const value_type& __obj)
    {
      const size_type __n = _M_bkt_num(__obj);
      _Node* __first = _M_buckets[__n];
      
      for (_Node* __cur = __first; __cur; __cur = __cur->_M_next)
	if (_M_equals(_M_get_key(__cur->_M_val), _M_get_key(__obj)))
	  {
	    _Node* __tmp = _M_new_node(__obj);
	    __tmp->_M_next = __cur->_M_next;
	    __cur->_M_next = __tmp;
	    ++_M_num_elements;
	    return iterator(__tmp, this);  //有相同的元素，直接返回指向当前结点且结果为true的pair
	  }
     //没有相同的元素，采用头插法插入新的结点，元素个数+1
      _Node* __tmp = _M_new_node(__obj);
      __tmp->_M_next = __first;
      _M_buckets[__n] = __tmp;
      ++_M_num_elements;
      return iterator(__tmp, this);
    }

STL复习之——hashtable

猜你喜欢