std::unordered_map
std::unordered_map
版本XcodeDefault.xctoolchain/usr/include/c++/v1
1:unorderd_map typedef
1 template <class _Key, class _Tp, class _Hash = hash<_Key>, class _Pred = equal_to<_Key>, 2 class _Alloc = allocator<pair<const _Key, _Tp> > > 3 class _LIBCPP_TEMPLATE_VIS unordered_map 4 { 5 public: 6 // types 7 typedef _Key key_type; 8 typedef _Tp mapped_type; 9 typedef _Hash hasher; 10 typedef _Pred key_equal; 11 typedef _Alloc allocator_type; 12 typedef pair<const key_type, mapped_type> value_type; 13 typedef value_type& reference; 14 typedef const value_type& const_reference; 15 static_assert((is_same<value_type, typename allocator_type::value_type>::value), 16 "Invalid allocator::value_type"); 17 18 private: 19 typedef __hash_value_type<key_type, mapped_type> __value_type; 20 typedef __unordered_map_hasher<key_type, __value_type, hasher> __hasher; 21 typedef __unordered_map_equal<key_type, __value_type, key_equal> __key_equal; 22 typedef typename __rebind_alloc_helper<allocator_traits<allocator_type>, 23 __value_type>::type __allocator_type; 24 25 typedef __hash_table<__value_type, __hasher, 26 __key_equal, __allocator_type> __table; 27 28 __table __table_; 29 30 ...... 31 32 }
例子:typedef std::unordered_map<std::string, int> UM_STR_INT;
模板参数:
- key_type -> _Key -> std::string
- mapped_type -> _Tp -> int
- hasher - > _Hash = hash<_Key> -> hash<std::string>
- key_equal -> _Pred = equal_to<_Key> -> equal_to<std::string>
- _Alloc = allocator<pair<const _Key, _Tp> > > -> allocator<pair<const std::string, int> >
unorderd_map内部持有__hash_table对象,std::unordered_map<std::string, int>特化模板的_hash_table类型应该是
__hash_table<
pair<const std::string, int>,
hash<std::string>,
equal_to<std::string>,
allocator<pair<const std::string, int> >
>
1 template <class _Tp, class _Hash, class _Equal, class _Alloc> 2 class __hash_table 3 { 4 public: 5 typedef _Tp value_type; 6 typedef _Hash hasher; 7 typedef _Equal key_equal; 8 typedef _Alloc allocator_type; 9 10 private: 11 typedef unique_ptr<__next_pointer[], __bucket_list_deleter> __bucket_list; 12 // --- Member data begin --- 13 __bucket_list __bucket_list_; 14 __compressed_pair<__first_node, __node_allocator> __p1_; 15 __compressed_pair<size_type, hasher> __p2_; 16 __compressed_pair<float, key_equal> __p3_; 17 // --- Member data end --- 18 19 ...... 20 21 }
__hash_table内部持有4个局部变量,
- __bucket_list_,__next_pointer数组,储存插入节点node,内部含有多个bucket(node节点的集合),以node节点的形式链式组织
- __p1_,head node -- node分配器;
- __p2_,node总数量 -- hash key size_t计算器;每成功插入一个node,node总数量+1
- __p3_,负载因子 -- 数据比较器;负载因子调整bucket的数量(rehash方法),数据比较器用于比较参数和bucket node中_Key是否相同(因为是bucket是链式储存,在hash key sizt_t到bucket index之后,会从bucket的头node开始,逐一比较node是否和参数相同)
模板推导出类型后,就可以得知unorder map的几个关键要点
- __p2_->second, hash<std::string>,提供string到hash key sizt_t的计算
- __bucket_list_,unorderd_map的存储区
- __p3_->first, 负载因子, rebase,决定
- hash key sizt_t -> bucket index, __constrain_hash
- __p3_ -> second, equal_to<std::string>,数据的比较器
2: hash<std::string>
在std::string实现,提供std::string到size_t的 转换 ?
提供operator()操作符,作为计算hash数值的入口方法
1 template <class _CharT, class _Allocator> 2 struct _LIBCPP_TEMPLATE_VIS 3 hash<basic_string<_CharT, char_traits<_CharT>, _Allocator> > 4 : public unary_function< 5 basic_string<_CharT, char_traits<_CharT>, _Allocator>, size_t> 6 { 7 size_t 8 operator()(const basic_string<_CharT, char_traits<_CharT>, _Allocator>& __val) const _NOEXCEPT 9 { return __do_string_hash(__val.data(), __val.data() + __val.size()); } 10 };
hash<std::string>::operator() 调用 __do_string_hash
__do_string_hash 调用 __murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len)
__murmur2_or_cityhash<size_t>::operator(const void* __key, _Size __len) 按照字符串长度__len,分成若干计算
1 template <class _Size> 2 _Size 3 __murmur2_or_cityhash<_Size, 64>::operator()(const void* __key, _Size __len) 4 { 5 const char* __s = static_cast<const char*>(__key); 6 if (__len <= 32) { 7 if (__len <= 16) { 8 return __hash_len_0_to_16(__s, __len); 9 } else { 10 return __hash_len_17_to_32(__s, __len); 11 } 12 } else if (__len <= 64) { 13 return __hash_len_33_to_64(__s, __len); 14 } 15 16 // For strings over 64 bytes we hash the end first, and then as we 17 // loop we keep 56 bytes of state: v, w, x, y, and z. 18 _Size __x = __loadword<_Size>(__s + __len - 40); 19 _Size __y = __loadword<_Size>(__s + __len - 16) + 20 __loadword<_Size>(__s + __len - 56); 21 _Size __z = __hash_len_16(__loadword<_Size>(__s + __len - 48) + __len, 22 __loadword<_Size>(__s + __len - 24)); 23 pair<_Size, _Size> __v = __weak_hash_len_32_with_seeds(__s + __len - 64, __len, __z); 24 pair<_Size, _Size> __w = __weak_hash_len_32_with_seeds(__s + __len - 32, __y + __k1, __x); 25 __x = __x * __k1 + __loadword<_Size>(__s); 26 27 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks. 28 __len = (__len - 1) & ~static_cast<_Size>(63); 29 do { 30 __x = __rotate(__x + __y + __v.first + __loadword<_Size>(__s + 8), 37) * __k1; 31 __y = __rotate(__y + __v.second + __loadword<_Size>(__s + 48), 42) * __k1; 32 __x ^= __w.second; 33 __y += __v.first + __loadword<_Size>(__s + 40); 34 __z = __rotate(__z + __w.first, 33) * __k1; 35 __v = __weak_hash_len_32_with_seeds(__s, __v.second * __k1, __x + __w.first); 36 __w = __weak_hash_len_32_with_seeds(__s + 32, __z + __w.second, 37 __y + __loadword<_Size>(__s + 16)); 38 std::swap(__z, __x); 39 __s += 64; 40 __len -= 64; 41 } while (__len != 0); 42 return __hash_len_16( 43 __hash_len_16(__v.first, __w.first) + __shift_mix(__y) * __k1 + __z, 44 __hash_len_16(__v.second, __w.second) + __x); 45 }
举例,__hash_len_0_to_16
1 static _Size __hash_len_0_to_16(const char* __s, _Size __len) 2 _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK 3 { 4 if (__len > 8) { 5 const _Size __a = __loadword<_Size>(__s); 6 const _Size __b = __loadword<_Size>(__s + __len - 8); 7 return __hash_len_16(__a, __rotate_by_at_least_1(__b + __len, __len)) ^ __b; 8 } 9 if (__len >= 4) { 10 const uint32_t __a = __loadword<uint32_t>(__s); 11 const uint32_t __b = __loadword<uint32_t>(__s + __len - 4); 12 return __hash_len_16(__len + (__a << 3), __b); 13 } 14 if (__len > 0) { 15 const unsigned char __a = __s[0]; 16 const unsigned char __b = __s[__len >> 1]; 17 const unsigned char __c = __s[__len - 1]; 18 const uint32_t __y = static_cast<uint32_t>(__a) + 19 (static_cast<uint32_t>(__b) << 8); 20 const uint32_t __z = __len + (static_cast<uint32_t>(__c) << 2); 21 return __shift_mix(__y * __k2 ^ __z * __k3) * __k2; 22 } 23 return __k2; 24 }
hash key size_t定位到bucket index的计算方法
1 inline _LIBCPP_INLINE_VISIBILITY 2 size_t 3 __constrain_hash(size_t __h, size_t __bc) 4 { 5 return !(__bc & (__bc - 1)) ? __h & (__bc - 1) : 6 (__h < __bc ? __h : __h % __bc); 7 }
3:
原文地址:https://www.cnblogs.com/hgwang/p/13493045.html
上一篇: 从零开始学习Netty:实战教程指南
推荐阅读
-
std::make_pair的Android C++常用功能
-
用 unordered_set 封装 unordered_map
-
C++ | STL | std::priority_queue | 大顶堆与小顶堆实现
-
C/C++ | STL | std::priority_queue for Max and Min Heap
-
C++如何使用std::chrono获取实时时间
-
std::unordered_map
-
[C++]简明讲解std::sort函数的用法与总结
-
在 C++ STL 中理解与运用 std::sort 函数
-
询问在C++中如何对数组运用std::sort进行排序的操作步骤
-
在C++中,找不到将 "std::string" 转换为 "LPCWSTR" 的合适转换方法