commit 38b7e47: [Minor] Update unordered_dense library
Vsevolod Stakhov
vsevolod at rspamd.com
Fri Dec 2 22:49:04 UTC 2022
Author: Vsevolod Stakhov
Date: 2022-12-02 21:19:22 +0000
URL: https://github.com/rspamd/rspamd/commit/38b7e477927c9fe765245eefa33292b9c4972d07
[Minor] Update unordered_dense library
---
contrib/DEPENDENCY_INFO.md | 2 +-
contrib/ankerl/unordered_dense.h | 853 +++++++++++++++++++++++++++------------
2 files changed, 586 insertions(+), 269 deletions(-)
diff --git a/contrib/DEPENDENCY_INFO.md b/contrib/DEPENDENCY_INFO.md
index d4fabd6b9..8220b7444 100644
--- a/contrib/DEPENDENCY_INFO.md
+++ b/contrib/DEPENDENCY_INFO.md
@@ -36,4 +36,4 @@
| doctest | 2.4.6 | MIT | NO | |
| function2 | 4.1.0 | Boost | NO | |
| ankerl/svector | 1.0.2 | MIT | NO | |
-| ankerl/unordered_dense | 1.0.2 | MIT | NO | |
+| ankerl/unordered_dense | 2.0.1 | MIT | NO | |
diff --git a/contrib/ankerl/unordered_dense.h b/contrib/ankerl/unordered_dense.h
index 9ae108173..737d12bf3 100644
--- a/contrib/ankerl/unordered_dense.h
+++ b/contrib/ankerl/unordered_dense.h
@@ -1,7 +1,7 @@
///////////////////////// ankerl::unordered_dense::{map, set} /////////////////////////
// A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion.
-// Version 1.0.2
+// Version 2.0.1
// https://github.com/martinus/unordered_dense
//
// Licensed under the MIT License <http://opensource.org/licenses/MIT>.
@@ -30,14 +30,34 @@
#define ANKERL_UNORDERED_DENSE_H
// see https://semver.org/spec/v2.0.0.html
-#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 1 // incompatible API changes
-#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // add functionality in a backwards compatible manner
-#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 2 // backwards compatible bug fixes
+#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 2 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes
+#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality
+#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes
+
+// API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) v##major##_##minor##_##patch
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT(major, minor, patch) ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch)
+#define ANKERL_UNORDERED_DENSE_NAMESPACE \
+ ANKERL_UNORDERED_DENSE_VERSION_CONCAT( \
+ ANKERL_UNORDERED_DENSE_VERSION_MAJOR, ANKERL_UNORDERED_DENSE_VERSION_MINOR, ANKERL_UNORDERED_DENSE_VERSION_PATCH)
+
+#if defined(_MSVC_LANG)
+# define ANKERL_UNORDERED_DENSE_CPP_VERSION _MSVC_LANG
+#else
+# define ANKERL_UNORDERED_DENSE_CPP_VERSION __cplusplus
+#endif
-#if __cplusplus < 201703L
+#if defined(__GNUC__)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+# define ANKERL_UNORDERED_DENSE_PACK(decl) decl __attribute__((__packed__))
+#elif defined(_MSC_VER)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+# define ANKERL_UNORDERED_DENSE_PACK(decl) __pragma(pack(push, 1)) decl __pragma(pack(pop))
+#endif
+
+#if ANKERL_UNORDERED_DENSE_CPP_VERSION < 201703L
# error ankerl::unordered_dense requires C++17 or higher
#else
-
# include <array> // for array
# include <cstdint> // for uint64_t, uint32_t, uint8_t, UINT64_C
# include <cstring> // for size_t, memcpy, memset
@@ -54,12 +74,12 @@
# include <utility> // for forward, exchange, pair, as_const, piece...
# include <vector> // for vector
-# define ANKERL_UNORDERED_DENSE_PMR 0
+# define ANKERL_UNORDERED_DENSE_PMR 0 // NOLINT(cppcoreguidelines-macro-usage)
# if defined(__has_include)
# if __has_include(<memory_resource>)
# undef ANKERL_UNORDERED_DENSE_PMR
-# define ANKERL_UNORDERED_DENSE_PMR 1
-# include <memory_resource> // for polymorphic_allocator
+# define ANKERL_UNORDERED_DENSE_PMR 1 // NOLINT(cppcoreguidelines-macro-usage)
+# include <memory_resource> // for polymorphic_allocator
# endif
# endif
@@ -69,14 +89,15 @@
# endif
# if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
-# define ANKERL_UNORDERED_DENSE_LIKELY(x) __builtin_expect(x, 1)
-# define ANKERL_UNORDERED_DENSE_UNLIKELY(x) __builtin_expect(x, 0)
+# define ANKERL_UNORDERED_DENSE_LIKELY(x) __builtin_expect(x, 1) // NOLINT(cppcoreguidelines-macro-usage)
+# define ANKERL_UNORDERED_DENSE_UNLIKELY(x) __builtin_expect(x, 0) // NOLINT(cppcoreguidelines-macro-usage)
# else
-# define ANKERL_UNORDERED_DENSE_LIKELY(x) (x)
-# define ANKERL_UNORDERED_DENSE_UNLIKELY(x) (x)
+# define ANKERL_UNORDERED_DENSE_LIKELY(x) (x) // NOLINT(cppcoreguidelines-macro-usage)
+# define ANKERL_UNORDERED_DENSE_UNLIKELY(x) (x) // NOLINT(cppcoreguidelines-macro-usage)
# endif
namespace ankerl::unordered_dense {
+inline namespace ANKERL_UNORDERED_DENSE_NAMESPACE {
// hash ///////////////////////////////////////////////////////////////////////
@@ -123,7 +144,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
// read functions. WARNING: we don't care about endianness, so results are different on big endian!
[[nodiscard]] static inline auto r8(const uint8_t* p) -> uint64_t {
uint64_t v{};
- std::memcpy(&v, p, 8);
+ std::memcpy(&v, p, 8U);
return v;
}
@@ -138,7 +159,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
return (static_cast<uint64_t>(p[0]) << 16U) | (static_cast<uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
}
-[[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
+[[maybe_unused]] [[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f),
UINT64_C(0xe7037ed1a0b428db),
UINT64_C(0x8ebc6af09c88c6e3),
@@ -192,70 +213,73 @@ static inline void mum(uint64_t* a, uint64_t* b) {
} // namespace detail::wyhash
template <typename T, typename Enable = void>
-struct hash : public std::hash<T> {
- using is_avalanching = void;
+struct hash {
auto operator()(T const& obj) const noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>())))
- -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(std::hash<T>::operator()(obj)));
+ -> uint64_t {
+ return std::hash<T>{}(obj);
}
};
template <typename CharT>
struct hash<std::basic_string<CharT>> {
using is_avalanching = void;
- auto operator()(std::basic_string<CharT> const& str) const noexcept -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(str.data(), sizeof(CharT) * str.size()));
+ auto operator()(std::basic_string<CharT> const& str) const noexcept -> uint64_t {
+ return detail::wyhash::hash(str.data(), sizeof(CharT) * str.size());
}
};
template <typename CharT>
struct hash<std::basic_string_view<CharT>> {
using is_avalanching = void;
- auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size()));
+ auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> uint64_t {
+ return detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size());
}
};
template <class T>
struct hash<T*> {
using is_avalanching = void;
- auto operator()(T* ptr) const noexcept -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr)));
+ auto operator()(T* ptr) const noexcept -> uint64_t {
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+ return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr));
}
};
template <class T>
struct hash<std::unique_ptr<T>> {
using is_avalanching = void;
- auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
+ auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> uint64_t {
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+ return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
}
};
template <class T>
struct hash<std::shared_ptr<T>> {
using is_avalanching = void;
- auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> size_t {
- return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
+ auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> uint64_t {
+ // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+ return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
}
};
template <typename Enum>
struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
using is_avalanching = void;
- auto operator()(Enum e) const noexcept -> size_t {
- using Underlying = typename std::underlying_type_t<Enum>;
- return static_cast<size_t>(detail::wyhash::hash(static_cast<Underlying>(e)));
+ auto operator()(Enum e) const noexcept -> uint64_t {
+ using underlying = typename std::underlying_type_t<Enum>;
+ return detail::wyhash::hash(static_cast<underlying>(e));
}
};
-# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
- template <> \
- struct hash<T> { \
- using is_avalanching = void; \
- auto operator()(T const& obj) const noexcept -> size_t { \
- return static_cast<size_t>(detail::wyhash::hash(static_cast<uint64_t>(obj))); \
- } \
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+# define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T) \
+ template <> \
+ struct hash<T> { \
+ using is_avalanching = void; \
+ auto operator()(T const& obj) const noexcept -> uint64_t { \
+ return detail::wyhash::hash(static_cast<uint64_t>(obj)); \
+ } \
}
# if defined(__GNUC__) && !defined(__clang__)
@@ -267,7 +291,7 @@ ANKERL_UNORDERED_DENSE_HASH_STATICCAST(bool);
ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char);
ANKERL_UNORDERED_DENSE_HASH_STATICCAST(signed char);
ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned char);
-# if __cplusplus >= 202002L
+# if ANKERL_UNORDERED_DENSE_CPP_VERSION >= 202002L
ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char8_t);
# endif
ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char16_t);
@@ -286,6 +310,28 @@ ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned long long);
# pragma GCC diagnostic pop
# endif
+// bucket_type //////////////////////////////////////////////////////////
+
+namespace bucket_type {
+
+struct standard {
+ static constexpr uint32_t dist_inc = 1U << 8U; // skip 1 byte fingerprint
+ static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+ uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+ uint32_t m_value_idx; // index into the m_values vector.
+};
+
+ANKERL_UNORDERED_DENSE_PACK(struct big {
+ static constexpr uint32_t dist_inc = 1U << 8U; // skip 1 byte fingerprint
+ static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+ uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+ size_t m_value_idx; // index into the m_values vector.
+});
+
+} // namespace bucket_type
+
namespace detail {
struct nonesuch {};
@@ -314,88 +360,126 @@ using detect_avalanching = typename T::is_avalanching;
template <typename T>
using detect_is_transparent = typename T::is_transparent;
-template <typename H, typename KE>
-using is_transparent =
- std::enable_if_t<is_detected_v<detect_is_transparent, H> && is_detected_v<detect_is_transparent, KE>, bool>;
+template <typename T>
+using detect_iterator = typename T::iterator;
+
+template <typename T>
+using detect_reserve = decltype(std::declval<T&>().reserve(size_t{}));
+
+// enable_if helpers
+
+template <typename Mapped>
+constexpr bool is_map_v = !std::is_void_v<Mapped>;
+
+template <typename Hash, typename KeyEqual>
+constexpr bool is_transparent_v = is_detected_v<detect_is_transparent, Hash>&& is_detected_v<detect_is_transparent, KeyEqual>;
+
+template <typename From, typename To1, typename To2>
+constexpr bool is_neither_convertible_v = !std::is_convertible_v<From, To1> && !std::is_convertible_v<From, To2>;
+
+template <typename T>
+constexpr bool has_reserve = is_detected_v<detect_reserve, T>;
// This is it, the table. Doubles as map and set, and uses `void` for T when its used as a set.
template <class Key,
class T, // when void, treat it as a set.
class Hash,
class KeyEqual,
- class Allocator>
+ class AllocatorOrContainer,
+ class Bucket>
class table {
- struct Bucket;
- using ValueContainer =
- typename std::vector<typename std::conditional_t<std::is_void_v<T>, Key, std::pair<Key, T>>, Allocator>;
- using BucketAlloc = typename std::allocator_traits<Allocator>::template rebind_alloc<Bucket>;
- using BucketAllocTraits = std::allocator_traits<BucketAlloc>;
+public:
+ using value_container_type = std::conditional_t<
+ is_detected_v<detect_iterator, AllocatorOrContainer>,
+ AllocatorOrContainer,
+ typename std::vector<typename std::conditional_t<std::is_void_v<T>, Key, std::pair<Key, T>>, AllocatorOrContainer>>;
- static constexpr uint32_t BUCKET_DIST_INC = 1U << 8U; // skip 1 byte fingerprint
- static constexpr uint32_t BUCKET_FINGERPRINT_MASK = BUCKET_DIST_INC - 1; // mask for 1 byte of fingerprint
- static constexpr uint8_t INITIAL_SHIFTS = 64 - 3; // 2^(64-m_shift) number of buckets
- static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.8F;
+private:
+ using bucket_alloc =
+ typename std::allocator_traits<typename value_container_type::allocator_type>::template rebind_alloc<Bucket>;
+ using bucket_alloc_traits = std::allocator_traits<bucket_alloc>;
+
+ static constexpr uint8_t initial_shifts = 64 - 3; // 2^(64-m_shift) number of buckets
+ static constexpr float default_max_load_factor = 0.8F;
public:
using key_type = Key;
using mapped_type = T;
- using value_type = typename ValueContainer::value_type;
- using size_type = typename ValueContainer::size_type;
- using difference_type = typename ValueContainer::difference_type;
+ using value_type = typename value_container_type::value_type;
+ using size_type = typename value_container_type::size_type;
+ using difference_type = typename value_container_type::difference_type;
using hasher = Hash;
using key_equal = KeyEqual;
- using allocator_type = typename ValueContainer::allocator_type;
- using reference = typename ValueContainer::reference;
- using const_reference = typename ValueContainer::const_reference;
- using pointer = typename ValueContainer::pointer;
- using const_pointer = typename ValueContainer::const_pointer;
- using iterator = typename ValueContainer::iterator;
- using const_iterator = typename ValueContainer::const_iterator;
+ using allocator_type = typename value_container_type::allocator_type;
+ using reference = typename value_container_type::reference;
+ using const_reference = typename value_container_type::const_reference;
+ using pointer = typename value_container_type::pointer;
+ using const_pointer = typename value_container_type::const_pointer;
+ using iterator = typename value_container_type::iterator;
+ using const_iterator = typename value_container_type::const_iterator;
+ using bucket_type = Bucket;
private:
- struct Bucket {
- uint32_t dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
- uint32_t value_idx; // index into the m_values vector.
- };
+ using value_idx_type = decltype(Bucket::m_value_idx);
+ using dist_and_fingerprint_type = decltype(Bucket::m_dist_and_fingerprint);
+
static_assert(std::is_trivially_destructible_v<Bucket>, "assert there's no need to call destructor / std::destroy");
static_assert(std::is_trivially_copyable_v<Bucket>, "assert we can just memset / memcpy");
- ValueContainer m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
- Bucket* m_buckets_start = nullptr;
- Bucket* m_buckets_end = nullptr;
- uint32_t m_max_bucket_capacity = 0;
- float m_max_load_factor = DEFAULT_MAX_LOAD_FACTOR;
+ value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
+ typename std::allocator_traits<bucket_alloc>::pointer m_buckets{};
+ size_t m_num_buckets = 0;
+ size_t m_max_bucket_capacity = 0;
+ float m_max_load_factor = default_max_load_factor;
Hash m_hash{};
KeyEqual m_equal{};
- uint8_t m_shifts = INITIAL_SHIFTS;
+ uint8_t m_shifts = initial_shifts;
+
+ [[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
+ return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
+ ? 0
+ : static_cast<value_idx_type>(bucket_idx + 1U);
+ }
+
+ // Helper to access bucket through pointer types
+ [[nodiscard]] static constexpr auto at(typename std::allocator_traits<bucket_alloc>::pointer bucket_ptr, size_t offset)
+ -> Bucket& {
+ return *(bucket_ptr + static_cast<typename std::allocator_traits<bucket_alloc>::difference_type>(offset));
+ }
- [[nodiscard]] auto next(Bucket const* bucket) const -> Bucket const* {
- return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket + 1 == m_buckets_end) ? m_buckets_start : bucket + 1;
+ // use the dist_inc and dist_dec functions so that uint16_t types work without warning
+ [[nodiscard]] static constexpr auto dist_inc(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+ return static_cast<dist_and_fingerprint_type>(x + Bucket::dist_inc);
}
- [[nodiscard]] auto next(Bucket* bucket) -> Bucket* {
- return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket + 1 == m_buckets_end) ? m_buckets_start : bucket + 1;
+ [[nodiscard]] static constexpr auto dist_dec(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+ return static_cast<dist_and_fingerprint_type>(x - Bucket::dist_inc);
}
+ // The goal of mixed_hash is to always produce a high quality 64bit hash.
template <typename K>
[[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
if constexpr (is_detected_v<detect_avalanching, Hash>) {
- return m_hash(key);
+ // we know that the hash is good because is_avalanching.
+ if constexpr (sizeof(decltype(m_hash(key))) < sizeof(uint64_t)) {
+ // 32bit hash and is_avalanching => multiply with a constant to avalanche bits upwards
+ return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
+ } else {
+ // 64bit and is_avalanching => only use the hash itself.
+ return m_hash(key);
+ }
} else {
+ // not is_avalanching => apply wyhash
return wyhash::hash(m_hash(key));
}
}
- [[nodiscard]] constexpr auto dist_and_fingerprint_from_hash(uint64_t hash) const -> uint32_t {
- return BUCKET_DIST_INC | (hash & BUCKET_FINGERPRINT_MASK);
+ [[nodiscard]] constexpr auto dist_and_fingerprint_from_hash(uint64_t hash) const -> dist_and_fingerprint_type {
+ return Bucket::dist_inc | (static_cast<dist_and_fingerprint_type>(hash) & Bucket::fingerprint_mask);
}
- [[nodiscard]] constexpr auto bucket_from_hash(uint64_t hash) const -> Bucket const* {
- return m_buckets_start + (hash >> m_shifts);
- }
-
- [[nodiscard]] constexpr auto bucket_from_hash(uint64_t hash) -> Bucket* {
- return m_buckets_start + (hash >> m_shifts);
+ [[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> value_idx_type {
+ return static_cast<value_idx_type>(hash >> m_shifts);
}
[[nodiscard]] static constexpr auto get_key(value_type const& vt) -> key_type const& {
@@ -407,51 +491,45 @@ private:
}
template <typename K>
- [[nodiscard]] auto next_while_less(K const& key) -> std::pair<uint32_t, Bucket*> {
- auto const& pair = std::as_const(*this).next_while_less(key);
- return {pair.first, const_cast<Bucket*>(pair.second)}; // NOLINT(cppcoreguidelines-pro-type-const-cast)
- }
-
- template <typename K>
- [[nodiscard]] auto next_while_less(K const& key) const -> std::pair<uint32_t, Bucket const*> {
+ [[nodiscard]] auto next_while_less(K const& key) const -> Bucket {
auto hash = mixed_hash(key);
auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
- auto const* bucket = bucket_from_hash(hash);
+ auto bucket_idx = bucket_idx_from_hash(hash);
- while (dist_and_fingerprint < bucket->dist_and_fingerprint) {
- dist_and_fingerprint += BUCKET_DIST_INC;
- bucket = next(bucket);
+ while (dist_and_fingerprint < at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+ dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+ bucket_idx = next(bucket_idx);
}
- return {dist_and_fingerprint, bucket};
+ return {dist_and_fingerprint, bucket_idx};
}
- void place_and_shift_up(Bucket bucket, Bucket* place) {
- while (0 != place->dist_and_fingerprint) {
- bucket = std::exchange(*place, bucket);
- bucket.dist_and_fingerprint += BUCKET_DIST_INC;
+ void place_and_shift_up(Bucket bucket, value_idx_type place) {
+ while (0 != at(m_buckets, place).m_dist_and_fingerprint) {
+ bucket = std::exchange(at(m_buckets, place), bucket);
+ bucket.m_dist_and_fingerprint = dist_inc(bucket.m_dist_and_fingerprint);
place = next(place);
}
- *place = bucket;
+ at(m_buckets, place) = bucket;
}
- [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> uint64_t {
- return UINT64_C(1) << (64U - shifts);
+ [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t {
+ return std::min(max_bucket_count(), size_t{1} << (64U - shifts));
}
[[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t {
- auto shifts = INITIAL_SHIFTS;
- while (shifts > 0 && static_cast<uint64_t>(calc_num_buckets(shifts) * max_load_factor()) < s) {
+ auto shifts = initial_shifts;
+ while (shifts > 0 && static_cast<size_t>(static_cast<float>(calc_num_buckets(shifts)) * max_load_factor()) < s) {
--shifts;
}
return shifts;
}
- // assumes m_values has data, m_buckets_start=m_buckets_end=nullptr, m_shifts is INITIAL_SHIFTS
+ // assumes m_values has data, m_buckets=m_buckets_end=nullptr, m_shifts is INITIAL_SHIFTS
void copy_buckets(table const& other) {
if (!empty()) {
m_shifts = other.m_shifts;
allocate_buckets_from_shift();
- std::memcpy(m_buckets_start, other.m_buckets_start, sizeof(Bucket) * bucket_count());
+ std::memcpy(m_buckets, other.m_buckets, sizeof(Bucket) * bucket_count());
}
}
@@ -463,30 +541,37 @@ private:
}
void deallocate_buckets() {
- auto bucket_alloc = BucketAlloc(m_values.get_allocator());
- BucketAllocTraits::deallocate(bucket_alloc, m_buckets_start, bucket_count());
- m_buckets_start = nullptr;
- m_buckets_end = nullptr;
+ auto ba = bucket_alloc(m_values.get_allocator());
+ if (nullptr != m_buckets) {
+ bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
+ }
+ m_buckets = nullptr;
+ m_num_buckets = 0;
m_max_bucket_capacity = 0;
}
void allocate_buckets_from_shift() {
- auto bucket_alloc = BucketAlloc(m_values.get_allocator());
- auto num_buckets = calc_num_buckets(m_shifts);
- m_buckets_start = BucketAllocTraits::allocate(bucket_alloc, num_buckets);
- m_buckets_end = m_buckets_start + num_buckets;
- m_max_bucket_capacity = static_cast<uint64_t>(num_buckets * max_load_factor());
+ auto ba = bucket_alloc(m_values.get_allocator());
+ m_num_buckets = calc_num_buckets(m_shifts);
+ m_buckets = bucket_alloc_traits::allocate(ba, m_num_buckets);
+ if (m_num_buckets == max_bucket_count()) {
+ // reached the maximum, make sure we can use each bucket
+ m_max_bucket_capacity = max_bucket_count();
+ } else {
+ m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
+ }
}
void clear_buckets() {
- if (m_buckets_start != nullptr) {
- std::memset(m_buckets_start, 0, sizeof(Bucket) * bucket_count());
+ if (m_buckets != nullptr) {
+ std::memset(&*m_buckets, 0, sizeof(Bucket) * bucket_count());
}
}
void clear_and_fill_buckets_from_values() {
clear_buckets();
- for (uint32_t value_idx = 0, end_idx = static_cast<uint32_t>(m_values.size()); value_idx < end_idx; ++value_idx) {
+ for (value_idx_type value_idx = 0, end_idx = static_cast<value_idx_type>(m_values.size()); value_idx < end_idx;
+ ++value_idx) {
auto const& key = get_key(m_values[value_idx]);
auto [dist_and_fingerprint, bucket] = next_while_less(key);
@@ -496,22 +581,26 @@ private:
}
void increase_size() {
+ if (ANKERL_UNORDERED_DENSE_UNLIKELY(m_max_bucket_capacity == max_bucket_count())) {
+ throw std::overflow_error("ankerl::unordered_dense: reached max bucket size, cannot increase size");
+ }
--m_shifts;
deallocate_buckets();
allocate_buckets_from_shift();
clear_and_fill_buckets_from_values();
}
- void do_erase(Bucket* bucket) {
- auto const value_idx_to_remove = bucket->value_idx;
+ void do_erase(value_idx_type bucket_idx) {
+ auto const value_idx_to_remove = at(m_buckets, bucket_idx).m_value_idx;
// shift down until either empty or an element with correct spot is found
- auto* next_bucket = next(bucket);
- while (next_bucket->dist_and_fingerprint >= BUCKET_DIST_INC * 2) {
- *bucket = {next_bucket->dist_and_fingerprint - BUCKET_DIST_INC, next_bucket->value_idx};
- bucket = std::exchange(next_bucket, next(next_bucket));
+ auto next_bucket_idx = next(bucket_idx);
+ while (at(m_buckets, next_bucket_idx).m_dist_and_fingerprint >= Bucket::dist_inc * 2) {
+ at(m_buckets, bucket_idx) = {dist_dec(at(m_buckets, next_bucket_idx).m_dist_and_fingerprint),
+ at(m_buckets, next_bucket_idx).m_value_idx};
+ bucket_idx = std::exchange(next_bucket_idx, next(next_bucket_idx));
}
- *bucket = {};
+ at(m_buckets, bucket_idx) = {};
// update m_values
if (value_idx_to_remove != m_values.size() - 1) {
@@ -521,13 +610,13 @@ private:
// update the values_idx of the moved entry. No need to play the info game, just look until we find the values_idx
auto mh = mixed_hash(get_key(val));
- bucket = bucket_from_hash(mh);
+ bucket_idx = bucket_idx_from_hash(mh);
- auto const values_idx_back = static_cast<uint32_t>(m_values.size() - 1);
- while (values_idx_back != bucket->value_idx) {
- bucket = next(bucket);
+ auto const values_idx_back = static_cast<value_idx_type>(m_values.size() - 1);
+ while (values_idx_back != at(m_buckets, bucket_idx).m_value_idx) {
+ bucket_idx = next(bucket_idx);
}
- bucket->value_idx = value_idx_to_remove;
+ at(m_buckets, bucket_idx).m_value_idx = value_idx_to_remove;
}
m_values.pop_back();
}
@@ -538,17 +627,18 @@ private:
return 0;
}
- auto [dist_and_fingerprint, bucket] = next_while_less(key);
+ auto [dist_and_fingerprint, bucket_idx] = next_while_less(key);
- while (dist_and_fingerprint == bucket->dist_and_fingerprint && !m_equal(key, get_key(m_values[bucket->value_idx]))) {
- dist_and_fingerprint += BUCKET_DIST_INC;
- bucket = next(bucket);
+ while (dist_and_fingerprint == at(m_buckets, bucket_idx).m_dist_and_fingerprint &&
+ !m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).m_value_idx]))) {
+ dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+ bucket_idx = next(bucket_idx);
}
- if (dist_and_fingerprint != bucket->dist_and_fingerprint) {
+ if (dist_and_fingerprint != at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
return 0;
}
- do_erase(bucket);
+ do_erase(bucket_idx);
return 1;
}
@@ -561,66 +651,83 @@ private:
return it_isinserted;
}
+ template <typename K, typename... Args>
+ auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, value_idx_type bucket_idx, K&& key, Args&&... args)
+ -> std::pair<iterator, bool> {
+
+ // emplace the new value. If that throws an exception, no harm done; index is still in a valid state
+ m_values.emplace_back(std::piecewise_construct,
+ std::forward_as_tuple(std::forward<K>(key)),
+ std::forward_as_tuple(std::forward<Args>(args)...));
+
+ // place element and shift up until we find an empty spot
+ auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
+ place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
+ return {begin() + static_cast<difference_type>(value_idx), true};
+ }
+
template <typename K, typename... Args>
auto do_try_emplace(K&& key, Args&&... args) -> std::pair<iterator, bool> {
- if (is_full()) {
+ if (ANKERL_UNORDERED_DENSE_UNLIKELY(is_full())) {
increase_size();
}
auto hash = mixed_hash(key);
auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
- auto* bucket = bucket_from_hash(hash);
+ auto bucket_idx = bucket_idx_from_hash(hash);
- while (dist_and_fingerprint <= bucket->dist_and_fingerprint) {
- if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, m_values[bucket->value_idx].first)) {
- return {begin() + bucket->value_idx, false};
+ while (true) {
+ auto* bucket = &at(m_buckets, bucket_idx);
+ if (dist_and_fingerprint == bucket->m_dist_and_fingerprint) {
+ if (m_equal(key, m_values[bucket->m_value_idx].first)) {
+ return {begin() + static_cast<difference_type>(bucket->m_value_idx), false};
+ }
+ } else if (dist_and_fingerprint > bucket->m_dist_and_fingerprint) {
+ return do_place_element(dist_and_fingerprint, bucket_idx, std::forward<K>(key), std::forward<Args>(args)...);
}
- dist_and_fingerprint += BUCKET_DIST_INC;
- bucket = next(bucket);
+ dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+ bucket_idx = next(bucket_idx);
}
-
- // emplace the new value. If that throws an exception, no harm done; index is still in a valid state
- m_values.emplace_back(std::piecewise_construct,
- std::forward_as_tuple(std::forward<K>(key)),
- std::forward_as_tuple(std::forward<Args>(args)...));
-
- // place element and shift up until we find an empty spot
- uint32_t value_idx = static_cast<uint32_t>(m_values.size()) - 1;
- place_and_shift_up({dist_and_fingerprint, value_idx}, bucket);
- return {begin() + value_idx, true};
}
template <typename K>
auto do_find(K const& key) -> iterator {
- if (empty()) {
+ if (ANKERL_UNORDERED_DENSE_UNLIKELY(empty())) {
return end();
}
*** OUTPUT TRUNCATED, 744 LINES SKIPPED ***
More information about the Commits
mailing list