欢迎您访问 最编程 本站为您分享编程语言代码,编程技术文章!
您现在的位置是: 首页

std::unordered_map

最编程 2024-07-30 15:03:25
...

std::unordered_map

版本XcodeDefault.xctoolchain/usr/include/c++/v1

1:unorderd_map typedef

 1 template <class _Key, class _Tp, class _Hash = hash<_Key>, class _Pred = equal_to<_Key>,
 2           class _Alloc = allocator<pair<const _Key, _Tp> > >
 3 class _LIBCPP_TEMPLATE_VIS unordered_map
 4 {
 5 public:
 6     // types
 7     typedef _Key                                           key_type;
 8     typedef _Tp                                            mapped_type;
 9     typedef _Hash                                          hasher;
10     typedef _Pred                                          key_equal;
11     typedef _Alloc                                         allocator_type;
12     typedef pair<const key_type, mapped_type>              value_type;
13     typedef value_type&                                    reference;
14     typedef const value_type&                              const_reference;
15     static_assert((is_same<value_type, typename allocator_type::value_type>::value),
16                   "Invalid allocator::value_type");
17 
18 private:
19     typedef __hash_value_type<key_type, mapped_type>                 __value_type;
20     typedef __unordered_map_hasher<key_type, __value_type, hasher>   __hasher;
21     typedef __unordered_map_equal<key_type, __value_type, key_equal> __key_equal;
22     typedef typename __rebind_alloc_helper<allocator_traits<allocator_type>,
23                                                  __value_type>::type __allocator_type;
24 
25     typedef __hash_table<__value_type, __hasher,
26                          __key_equal,  __allocator_type>   __table;
27 
28     __table __table_;
29 
30     ......
31       
32 }

例子:typedef std::unordered_map<std::string, int>  UM_STR_INT;

    模板参数:

  • key_type -> _Key -> std::string
  • mapped_type -> _Tp -> int
  • hasher - > _Hash = hash<_Key>   -> hash<std::string> 
  • key_equal -> _Pred = equal_to<_Key>  -> equal_to<std::string>
  • _Alloc = allocator<pair<const _Key, _Tp> > > -> allocator<pair<const std::string, int> > 

unorderd_map内部持有__hash_table对象,std::unordered_map<std::string, int>特化模板的_hash_table类型应该是

__hash_table<

pair<const std::string, int>,

hash<std::string>,

equal_to<std::string>,

allocator<pair<const std::string, int> >

>

 1 template <class _Tp, class _Hash, class _Equal, class _Alloc>
 2 class __hash_table
 3 {
 4 public:
 5     typedef _Tp    value_type;
 6     typedef _Hash  hasher;
 7     typedef _Equal key_equal;
 8     typedef _Alloc allocator_type;
 9 
10 private:
11     typedef unique_ptr<__next_pointer[], __bucket_list_deleter> __bucket_list;
12     // --- Member data begin ---
13     __bucket_list                                         __bucket_list_;
14     __compressed_pair<__first_node, __node_allocator>     __p1_;
15     __compressed_pair<size_type, hasher>                  __p2_;
16     __compressed_pair<float, key_equal>                   __p3_;
17     // --- Member data end ---
18     
19    ......
20   
21 }

__hash_table内部持有4个局部变量,

  • __bucket_list_,__next_pointer数组,储存插入节点node,内部含有多个bucket(node节点的集合),以node节点的形式链式组织
  • __p1_,head node  --  node分配器;
  • __p2_,node总数量 -- hash key size_t计算器;每成功插入一个node,node总数量+1
  • __p3_,负载因子 -- 数据比较器;负载因子调整bucket的数量(rehash方法),数据比较器用于比较参数和bucket node中_Key是否相同(因为是bucket是链式储存,在hash key sizt_t到bucket index之后,会从bucket的头node开始,逐一比较node是否和参数相同)

  模板推导出类型后,就可以得知unorder map的几个关键要点

  • __p2_->second, hash<std::string>,提供string到hash key sizt_t的计算
  • __bucket_list_,unorderd_map的存储区
  • __p3_->first, 负载因子, rebase,决定
  • hash key sizt_t -> bucket index, __constrain_hash
  • __p3_ -> second, equal_to<std::string>,数据的比较器

2: hash<std::string>

在std::string实现,提供std::string到size_t的 转换 ?

提供operator()操作符,作为计算hash数值的入口方法

 1 template <class _CharT, class _Allocator>
 2 struct _LIBCPP_TEMPLATE_VIS
 3     hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> >
 4     : public unary_function<
 5           basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t>
 6 {
 7     size_t
 8     operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT
 9     { return __do_string_hash(__val.data(), __val.data() + __val.size()); }
10 };

hash<std::string>::operator()   调用 __do_string_hash

__do_string_hash 调用 __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len)

__murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len) 按照字符串长度__len,分成若干计算

 1 template <class _Size>
 2 _Size
 3 __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len)
 4 {
 5   const char* __s = static_cast<const char*>(__key);
 6   if (__len <= 32) {
 7     if (__len <= 16) {
 8       return __hash_len_0_to_16(__s, __len);
 9     } else {
10       return __hash_len_17_to_32(__s, __len);
11     }
12   } else if (__len <= 64) {
13     return __hash_len_33_to_64(__s, __len);
14   }
15 
16   // For strings over 64 bytes we hash the end first, and then as we
17   // loop we keep 56 bytes of state: v, w, x, y, and z.
18   _Size __x = __loadword<_Size>(__s + __len - 40);
19   _Size __y = __loadword<_Size>(__s + __len - 16) +
20               __loadword<_Size>(__s + __len - 56);
21   _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len,
22                           __loadword<_Size>(__s + __len - 24));
23   pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z);
24   pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x);
25   __x = __x * __k1 + __loadword<_Size>(__s);
26 
27   // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
28   __len = (__len - 1) & ~static_cast<_Size>(63);
29   do {
30     __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1;
31     __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1;
32     __x ^= __w.second;
33     __y += __v.first + __loadword<_Size>(__s + 40);
34     __z = __rotate(__z + __w.first, 33) * __k1;
35     __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first);
36     __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second,
37                                         __y + __loadword<_Size>(__s + 16));
38     std::swap(__z, __x);
39     __s += 64;
40     __len -= 64;
41   } while (__len != 0);
42   return __hash_len_16(
43       __hash_len_16(__v.first, __w.first) + __shift_mix(__y) * __k1 + __z,
44       __hash_len_16(__v.second, __w.second) + __x);
45 }

举例,__hash_len_0_to_16

 1 static _Size __hash_len_0_to_16(const char* __s, _Size __len)
 2      _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
 3   {
 4     if (__len > 8) {
 5       const _Size __a = __loadword<_Size>(__s);
 6       const _Size __b = __loadword<_Size>(__s + __len - 8);
 7       return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b;
 8     }
 9     if (__len >= 4) {
10       const uint32_t __a = __loadword<uint32_t>(__s);
11       const uint32_t __b = __loadword<uint32_t>(__s + __len - 4);
12       return __hash_len_16(__len + (__a << 3), __b);
13     }
14     if (__len > 0) {
15       const unsigned char __a = __s[0];
16       const unsigned char __b = __s[__len >> 1];
17       const unsigned char __c = __s[__len - 1];
18       const uint32_t __y = static_cast<uint32_t>(__a) +
19                            (static_cast<uint32_t>(__b) << 8);
20       const uint32_t __z = __len + (static_cast<uint32_t>(__c) << 2);
21       return __shift_mix(__y * __k2 ^ __z * __k3) * __k2;
22     }
23     return __k2;
24   }

hash key size_t定位到bucket index的计算方法

1 inline _LIBCPP_INLINE_VISIBILITY
2 size_t
3 __constrain_hash(size_t __h, size_t __bc)
4 {
5     return !(__bc & (__bc - 1)) ? __h & (__bc - 1) :
6         (__h < __bc ? __h : __h % __bc);
7 }

3:

原文地址:https://www.cnblogs.com/hgwang/p/13493045.html