commit 38b7e47: [Minor] Update unordered_dense library

Fri Dec 2 22:49:04 UTC 2022

Author: Vsevolod Stakhov
Date: 2022-12-02 21:19:22 +0000
URL: https://github.com/rspamd/rspamd/commit/38b7e477927c9fe765245eefa33292b9c4972d07

[Minor] Update unordered_dense library

---
 contrib/DEPENDENCY_INFO.md       |   2 +-
 contrib/ankerl/unordered_dense.h | 853 +++++++++++++++++++++++++++------------
 2 files changed, 586 insertions(+), 269 deletions(-)

diff --git a/contrib/DEPENDENCY_INFO.md b/contrib/DEPENDENCY_INFO.md
index d4fabd6b9..8220b7444 100644
--- a/contrib/DEPENDENCY_INFO.md
+++ b/contrib/DEPENDENCY_INFO.md
@@ -36,4 +36,4 @@
 | doctest                | 2.4.6   | MIT                 | NO      |                    |
 | function2              | 4.1.0   | Boost               | NO      |                    |
 | ankerl/svector         | 1.0.2   | MIT                 | NO      |                    |
-| ankerl/unordered_dense | 1.0.2   | MIT                 | NO      |                    |
+| ankerl/unordered_dense | 2.0.1   | MIT                 | NO      |                    |
diff --git a/contrib/ankerl/unordered_dense.h b/contrib/ankerl/unordered_dense.h
index 9ae108173..737d12bf3 100644
--- a/contrib/ankerl/unordered_dense.h
+++ b/contrib/ankerl/unordered_dense.h
@@ -1,7 +1,7 @@
 ///////////////////////// ankerl::unordered_dense::{map, set} /////////////////////////
 
 // A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion.
-// Version 1.0.2
+// Version 2.0.1
 // https://github.com/martinus/unordered_dense
 //
 // Licensed under the MIT License <http://opensource.org/licenses/MIT>.
@@ -30,14 +30,34 @@
 #define ANKERL_UNORDERED_DENSE_H
 
 // see https://semver.org/spec/v2.0.0.html
-#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 1 // incompatible API changes
-#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // add functionality in a backwards compatible manner
-#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 2 // backwards compatible bug fixes
+#define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 2 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes
+#define ANKERL_UNORDERED_DENSE_VERSION_MINOR 0 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality
+#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes
+
+// API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) v##major##_##minor##_##patch
+#define ANKERL_UNORDERED_DENSE_VERSION_CONCAT(major, minor, patch) ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch)
+#define ANKERL_UNORDERED_DENSE_NAMESPACE   \
+    ANKERL_UNORDERED_DENSE_VERSION_CONCAT( \
+        ANKERL_UNORDERED_DENSE_VERSION_MAJOR, ANKERL_UNORDERED_DENSE_VERSION_MINOR, ANKERL_UNORDERED_DENSE_VERSION_PATCH)
+
+#if defined(_MSVC_LANG)
+#    define ANKERL_UNORDERED_DENSE_CPP_VERSION _MSVC_LANG
+#else
+#    define ANKERL_UNORDERED_DENSE_CPP_VERSION __cplusplus
+#endif
 
-#if __cplusplus < 201703L
+#if defined(__GNUC__)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_PACK(decl) decl __attribute__((__packed__))
+#elif defined(_MSC_VER)
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_PACK(decl) __pragma(pack(push, 1)) decl __pragma(pack(pop))
+#endif
+
+#if ANKERL_UNORDERED_DENSE_CPP_VERSION < 201703L
 #    error ankerl::unordered_dense requires C++17 or higher
 #else
-
 #    include <array>            // for array
 #    include <cstdint>          // for uint64_t, uint32_t, uint8_t, UINT64_C
 #    include <cstring>          // for size_t, memcpy, memset
@@ -54,12 +74,12 @@
 #    include <utility>          // for forward, exchange, pair, as_const, piece...
 #    include <vector>           // for vector
 
-#    define ANKERL_UNORDERED_DENSE_PMR 0
+#    define ANKERL_UNORDERED_DENSE_PMR 0 // NOLINT(cppcoreguidelines-macro-usage)
 #    if defined(__has_include)
 #        if __has_include(<memory_resource>)
 #            undef ANKERL_UNORDERED_DENSE_PMR
-#            define ANKERL_UNORDERED_DENSE_PMR 1
-#            include <memory_resource> // for polymorphic_allocator
+#            define ANKERL_UNORDERED_DENSE_PMR 1 // NOLINT(cppcoreguidelines-macro-usage)
+#            include <memory_resource>           // for polymorphic_allocator
 #        endif
 #    endif
 
@@ -69,14 +89,15 @@
 #    endif
 
 #    if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
-#        define ANKERL_UNORDERED_DENSE_LIKELY(x) __builtin_expect(x, 1)
-#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) __builtin_expect(x, 0)
+#        define ANKERL_UNORDERED_DENSE_LIKELY(x) __builtin_expect(x, 1)   // NOLINT(cppcoreguidelines-macro-usage)
+#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) __builtin_expect(x, 0) // NOLINT(cppcoreguidelines-macro-usage)
 #    else
-#        define ANKERL_UNORDERED_DENSE_LIKELY(x) (x)
-#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) (x)
+#        define ANKERL_UNORDERED_DENSE_LIKELY(x) (x)   // NOLINT(cppcoreguidelines-macro-usage)
+#        define ANKERL_UNORDERED_DENSE_UNLIKELY(x) (x) // NOLINT(cppcoreguidelines-macro-usage)
 #    endif
 
 namespace ankerl::unordered_dense {
+inline namespace ANKERL_UNORDERED_DENSE_NAMESPACE {
 
 // hash ///////////////////////////////////////////////////////////////////////
 
@@ -123,7 +144,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
 // read functions. WARNING: we don't care about endianness, so results are different on big endian!
 [[nodiscard]] static inline auto r8(const uint8_t* p) -> uint64_t {
     uint64_t v{};
-    std::memcpy(&v, p, 8);
+    std::memcpy(&v, p, 8U);
     return v;
 }
 
@@ -138,7 +159,7 @@ static inline void mum(uint64_t* a, uint64_t* b) {
     return (static_cast<uint64_t>(p[0]) << 16U) | (static_cast<uint64_t>(p[k >> 1U]) << 8U) | p[k - 1];
 }
 
-[[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
+[[maybe_unused]] [[nodiscard]] static inline auto hash(void const* key, size_t len) -> uint64_t {
     static constexpr auto secret = std::array{UINT64_C(0xa0761d6478bd642f),
                                               UINT64_C(0xe7037ed1a0b428db),
                                               UINT64_C(0x8ebc6af09c88c6e3),
@@ -192,70 +213,73 @@ static inline void mum(uint64_t* a, uint64_t* b) {
 } // namespace detail::wyhash
 
 template <typename T, typename Enable = void>
-struct hash : public std::hash<T> {
-    using is_avalanching = void;
+struct hash {
     auto operator()(T const& obj) const noexcept(noexcept(std::declval<std::hash<T>>().operator()(std::declval<T const&>())))
-        -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(std::hash<T>::operator()(obj)));
+        -> uint64_t {
+        return std::hash<T>{}(obj);
     }
 };
 
 template <typename CharT>
 struct hash<std::basic_string<CharT>> {
     using is_avalanching = void;
-    auto operator()(std::basic_string<CharT> const& str) const noexcept -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(str.data(), sizeof(CharT) * str.size()));
+    auto operator()(std::basic_string<CharT> const& str) const noexcept -> uint64_t {
+        return detail::wyhash::hash(str.data(), sizeof(CharT) * str.size());
     }
 };
 
 template <typename CharT>
 struct hash<std::basic_string_view<CharT>> {
     using is_avalanching = void;
-    auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size()));
+    auto operator()(std::basic_string_view<CharT> const& sv) const noexcept -> uint64_t {
+        return detail::wyhash::hash(sv.data(), sizeof(CharT) * sv.size());
     }
 };
 
 template <class T>
 struct hash<T*> {
     using is_avalanching = void;
-    auto operator()(T* ptr) const noexcept -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr)));
+    auto operator()(T* ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr));
     }
 };
 
 template <class T>
 struct hash<std::unique_ptr<T>> {
     using is_avalanching = void;
-    auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
+    auto operator()(std::unique_ptr<T> const& ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
     }
 };
 
 template <class T>
 struct hash<std::shared_ptr<T>> {
     using is_avalanching = void;
-    auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> size_t {
-        return static_cast<size_t>(detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get())));
+    auto operator()(std::shared_ptr<T> const& ptr) const noexcept -> uint64_t {
+        // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
+        return detail::wyhash::hash(reinterpret_cast<uintptr_t>(ptr.get()));
     }
 };
 
 template <typename Enum>
 struct hash<Enum, typename std::enable_if<std::is_enum<Enum>::value>::type> {
     using is_avalanching = void;
-    auto operator()(Enum e) const noexcept -> size_t {
-        using Underlying = typename std::underlying_type_t<Enum>;
-        return static_cast<size_t>(detail::wyhash::hash(static_cast<Underlying>(e)));
+    auto operator()(Enum e) const noexcept -> uint64_t {
+        using underlying = typename std::underlying_type_t<Enum>;
+        return detail::wyhash::hash(static_cast<underlying>(e));
     }
 };
 
-#    define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T)                                         \
-        template <>                                                                           \
-        struct hash<T> {                                                                      \
-            using is_avalanching = void;                                                      \
-            auto operator()(T const& obj) const noexcept -> size_t {                          \
-                return static_cast<size_t>(detail::wyhash::hash(static_cast<uint64_t>(obj))); \
-            }                                                                                 \
+// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
+#    define ANKERL_UNORDERED_DENSE_HASH_STATICCAST(T)                    \
+        template <>                                                      \
+        struct hash<T> {                                                 \
+            using is_avalanching = void;                                 \
+            auto operator()(T const& obj) const noexcept -> uint64_t {   \
+                return detail::wyhash::hash(static_cast<uint64_t>(obj)); \
+            }                                                            \
         }
 
 #    if defined(__GNUC__) && !defined(__clang__)
@@ -267,7 +291,7 @@ ANKERL_UNORDERED_DENSE_HASH_STATICCAST(bool);
 ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char);
 ANKERL_UNORDERED_DENSE_HASH_STATICCAST(signed char);
 ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned char);
-#    if __cplusplus >= 202002L
+#    if ANKERL_UNORDERED_DENSE_CPP_VERSION >= 202002L
 ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char8_t);
 #    endif
 ANKERL_UNORDERED_DENSE_HASH_STATICCAST(char16_t);
@@ -286,6 +310,28 @@ ANKERL_UNORDERED_DENSE_HASH_STATICCAST(unsigned long long);
 #        pragma GCC diagnostic pop
 #    endif
 
+// bucket_type //////////////////////////////////////////////////////////
+
+namespace bucket_type {
+
+struct standard {
+    static constexpr uint32_t dist_inc = 1U << 8U;             // skip 1 byte fingerprint
+    static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+    uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+    uint32_t m_value_idx;            // index into the m_values vector.
+};
+
+ANKERL_UNORDERED_DENSE_PACK(struct big {
+    static constexpr uint32_t dist_inc = 1U << 8U;             // skip 1 byte fingerprint
+    static constexpr uint32_t fingerprint_mask = dist_inc - 1; // mask for 1 byte of fingerprint
+
+    uint32_t m_dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
+    size_t m_value_idx;              // index into the m_values vector.
+});
+
+} // namespace bucket_type
+
 namespace detail {
 
 struct nonesuch {};
@@ -314,88 +360,126 @@ using detect_avalanching = typename T::is_avalanching;
 template <typename T>
 using detect_is_transparent = typename T::is_transparent;
 
-template <typename H, typename KE>
-using is_transparent =
-    std::enable_if_t<is_detected_v<detect_is_transparent, H> && is_detected_v<detect_is_transparent, KE>, bool>;
+template <typename T>
+using detect_iterator = typename T::iterator;
+
+template <typename T>
+using detect_reserve = decltype(std::declval<T&>().reserve(size_t{}));
+
+// enable_if helpers
+
+template <typename Mapped>
+constexpr bool is_map_v = !std::is_void_v<Mapped>;
+
+template <typename Hash, typename KeyEqual>
+constexpr bool is_transparent_v = is_detected_v<detect_is_transparent, Hash>&& is_detected_v<detect_is_transparent, KeyEqual>;
+
+template <typename From, typename To1, typename To2>
+constexpr bool is_neither_convertible_v = !std::is_convertible_v<From, To1> && !std::is_convertible_v<From, To2>;
+
+template <typename T>
+constexpr bool has_reserve = is_detected_v<detect_reserve, T>;
 
 // This is it, the table. Doubles as map and set, and uses `void` for T when its used as a set.
 template <class Key,
           class T, // when void, treat it as a set.
           class Hash,
           class KeyEqual,
-          class Allocator>
+          class AllocatorOrContainer,
+          class Bucket>
 class table {
-    struct Bucket;
-    using ValueContainer =
-        typename std::vector<typename std::conditional_t<std::is_void_v<T>, Key, std::pair<Key, T>>, Allocator>;
-    using BucketAlloc = typename std::allocator_traits<Allocator>::template rebind_alloc<Bucket>;
-    using BucketAllocTraits = std::allocator_traits<BucketAlloc>;
+public:
+    using value_container_type = std::conditional_t<
+        is_detected_v<detect_iterator, AllocatorOrContainer>,
+        AllocatorOrContainer,
+        typename std::vector<typename std::conditional_t<std::is_void_v<T>, Key, std::pair<Key, T>>, AllocatorOrContainer>>;
 
-    static constexpr uint32_t BUCKET_DIST_INC = 1U << 8U;                    // skip 1 byte fingerprint
-    static constexpr uint32_t BUCKET_FINGERPRINT_MASK = BUCKET_DIST_INC - 1; // mask for 1 byte of fingerprint
-    static constexpr uint8_t INITIAL_SHIFTS = 64 - 3;                        // 2^(64-m_shift) number of buckets
-    static constexpr float DEFAULT_MAX_LOAD_FACTOR = 0.8F;
+private:
+    using bucket_alloc =
+        typename std::allocator_traits<typename value_container_type::allocator_type>::template rebind_alloc<Bucket>;
+    using bucket_alloc_traits = std::allocator_traits<bucket_alloc>;
+
+    static constexpr uint8_t initial_shifts = 64 - 3; // 2^(64-m_shift) number of buckets
+    static constexpr float default_max_load_factor = 0.8F;
 
 public:
     using key_type = Key;
     using mapped_type = T;
-    using value_type = typename ValueContainer::value_type;
-    using size_type = typename ValueContainer::size_type;
-    using difference_type = typename ValueContainer::difference_type;
+    using value_type = typename value_container_type::value_type;
+    using size_type = typename value_container_type::size_type;
+    using difference_type = typename value_container_type::difference_type;
     using hasher = Hash;
     using key_equal = KeyEqual;
-    using allocator_type = typename ValueContainer::allocator_type;
-    using reference = typename ValueContainer::reference;
-    using const_reference = typename ValueContainer::const_reference;
-    using pointer = typename ValueContainer::pointer;
-    using const_pointer = typename ValueContainer::const_pointer;
-    using iterator = typename ValueContainer::iterator;
-    using const_iterator = typename ValueContainer::const_iterator;
+    using allocator_type = typename value_container_type::allocator_type;
+    using reference = typename value_container_type::reference;
+    using const_reference = typename value_container_type::const_reference;
+    using pointer = typename value_container_type::pointer;
+    using const_pointer = typename value_container_type::const_pointer;
+    using iterator = typename value_container_type::iterator;
+    using const_iterator = typename value_container_type::const_iterator;
+    using bucket_type = Bucket;
 
 private:
-    struct Bucket {
-        uint32_t dist_and_fingerprint; // upper 3 byte: distance to original bucket. lower byte: fingerprint from hash
-        uint32_t value_idx;            // index into the m_values vector.
-    };
+    using value_idx_type = decltype(Bucket::m_value_idx);
+    using dist_and_fingerprint_type = decltype(Bucket::m_dist_and_fingerprint);
+
     static_assert(std::is_trivially_destructible_v<Bucket>, "assert there's no need to call destructor / std::destroy");
     static_assert(std::is_trivially_copyable_v<Bucket>, "assert we can just memset / memcpy");
 
-    ValueContainer m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
-    Bucket* m_buckets_start = nullptr;
-    Bucket* m_buckets_end = nullptr;
-    uint32_t m_max_bucket_capacity = 0;
-    float m_max_load_factor = DEFAULT_MAX_LOAD_FACTOR;
+    value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
+    typename std::allocator_traits<bucket_alloc>::pointer m_buckets{};
+    size_t m_num_buckets = 0;
+    size_t m_max_bucket_capacity = 0;
+    float m_max_load_factor = default_max_load_factor;
     Hash m_hash{};
     KeyEqual m_equal{};
-    uint8_t m_shifts = INITIAL_SHIFTS;
+    uint8_t m_shifts = initial_shifts;
+
+    [[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
+        return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
+                   ? 0
+                   : static_cast<value_idx_type>(bucket_idx + 1U);
+    }
+
+    // Helper to access bucket through pointer types
+    [[nodiscard]] static constexpr auto at(typename std::allocator_traits<bucket_alloc>::pointer bucket_ptr, size_t offset)
+        -> Bucket& {
+        return *(bucket_ptr + static_cast<typename std::allocator_traits<bucket_alloc>::difference_type>(offset));
+    }
 
-    [[nodiscard]] auto next(Bucket const* bucket) const -> Bucket const* {
-        return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket + 1 == m_buckets_end) ? m_buckets_start : bucket + 1;
+    // use the dist_inc and dist_dec functions so that uint16_t types work without warning
+    [[nodiscard]] static constexpr auto dist_inc(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+        return static_cast<dist_and_fingerprint_type>(x + Bucket::dist_inc);
     }
 
-    [[nodiscard]] auto next(Bucket* bucket) -> Bucket* {
-        return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket + 1 == m_buckets_end) ? m_buckets_start : bucket + 1;
+    [[nodiscard]] static constexpr auto dist_dec(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
+        return static_cast<dist_and_fingerprint_type>(x - Bucket::dist_inc);
     }
 
+    // The goal of mixed_hash is to always produce a high quality 64bit hash.
     template <typename K>
     [[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
         if constexpr (is_detected_v<detect_avalanching, Hash>) {
-            return m_hash(key);
+            // we know that the hash is good because is_avalanching.
+            if constexpr (sizeof(decltype(m_hash(key))) < sizeof(uint64_t)) {
+                // 32bit hash and is_avalanching => multiply with a constant to avalanche bits upwards
+                return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
+            } else {
+                // 64bit and is_avalanching => only use the hash itself.
+                return m_hash(key);
+            }
         } else {
+            // not is_avalanching => apply wyhash
             return wyhash::hash(m_hash(key));
         }
     }
 
-    [[nodiscard]] constexpr auto dist_and_fingerprint_from_hash(uint64_t hash) const -> uint32_t {
-        return BUCKET_DIST_INC | (hash & BUCKET_FINGERPRINT_MASK);
+    [[nodiscard]] constexpr auto dist_and_fingerprint_from_hash(uint64_t hash) const -> dist_and_fingerprint_type {
+        return Bucket::dist_inc | (static_cast<dist_and_fingerprint_type>(hash) & Bucket::fingerprint_mask);
     }
 
-    [[nodiscard]] constexpr auto bucket_from_hash(uint64_t hash) const -> Bucket const* {
-        return m_buckets_start + (hash >> m_shifts);
-    }
-
-    [[nodiscard]] constexpr auto bucket_from_hash(uint64_t hash) -> Bucket* {
-        return m_buckets_start + (hash >> m_shifts);
+    [[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> value_idx_type {
+        return static_cast<value_idx_type>(hash >> m_shifts);
     }
 
     [[nodiscard]] static constexpr auto get_key(value_type const& vt) -> key_type const& {
@@ -407,51 +491,45 @@ private:
     }
 
     template <typename K>
-    [[nodiscard]] auto next_while_less(K const& key) -> std::pair<uint32_t, Bucket*> {
-        auto const& pair = std::as_const(*this).next_while_less(key);
-        return {pair.first, const_cast<Bucket*>(pair.second)}; // NOLINT(cppcoreguidelines-pro-type-const-cast)
-    }
-
-    template <typename K>
-    [[nodiscard]] auto next_while_less(K const& key) const -> std::pair<uint32_t, Bucket const*> {
+    [[nodiscard]] auto next_while_less(K const& key) const -> Bucket {
         auto hash = mixed_hash(key);
         auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
-        auto const* bucket = bucket_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
 
-        while (dist_and_fingerprint < bucket->dist_and_fingerprint) {
-            dist_and_fingerprint += BUCKET_DIST_INC;
-            bucket = next(bucket);
+        while (dist_and_fingerprint < at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
         }
-        return {dist_and_fingerprint, bucket};
+        return {dist_and_fingerprint, bucket_idx};
     }
 
-    void place_and_shift_up(Bucket bucket, Bucket* place) {
-        while (0 != place->dist_and_fingerprint) {
-            bucket = std::exchange(*place, bucket);
-            bucket.dist_and_fingerprint += BUCKET_DIST_INC;
+    void place_and_shift_up(Bucket bucket, value_idx_type place) {
+        while (0 != at(m_buckets, place).m_dist_and_fingerprint) {
+            bucket = std::exchange(at(m_buckets, place), bucket);
+            bucket.m_dist_and_fingerprint = dist_inc(bucket.m_dist_and_fingerprint);
             place = next(place);
         }
-        *place = bucket;
+        at(m_buckets, place) = bucket;
     }
 
-    [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> uint64_t {
-        return UINT64_C(1) << (64U - shifts);
+    [[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t {
+        return std::min(max_bucket_count(), size_t{1} << (64U - shifts));
     }
 
     [[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t {
-        auto shifts = INITIAL_SHIFTS;
-        while (shifts > 0 && static_cast<uint64_t>(calc_num_buckets(shifts) * max_load_factor()) < s) {
+        auto shifts = initial_shifts;
+        while (shifts > 0 && static_cast<size_t>(static_cast<float>(calc_num_buckets(shifts)) * max_load_factor()) < s) {
             --shifts;
         }
         return shifts;
     }
 
-    // assumes m_values has data, m_buckets_start=m_buckets_end=nullptr, m_shifts is INITIAL_SHIFTS
+    // assumes m_values has data, m_buckets=m_buckets_end=nullptr, m_shifts is INITIAL_SHIFTS
     void copy_buckets(table const& other) {
         if (!empty()) {
             m_shifts = other.m_shifts;
             allocate_buckets_from_shift();
-            std::memcpy(m_buckets_start, other.m_buckets_start, sizeof(Bucket) * bucket_count());
+            std::memcpy(m_buckets, other.m_buckets, sizeof(Bucket) * bucket_count());
         }
     }
 
@@ -463,30 +541,37 @@ private:
     }
 
     void deallocate_buckets() {
-        auto bucket_alloc = BucketAlloc(m_values.get_allocator());
-        BucketAllocTraits::deallocate(bucket_alloc, m_buckets_start, bucket_count());
-        m_buckets_start = nullptr;
-        m_buckets_end = nullptr;
+        auto ba = bucket_alloc(m_values.get_allocator());
+        if (nullptr != m_buckets) {
+            bucket_alloc_traits::deallocate(ba, m_buckets, bucket_count());
+        }
+        m_buckets = nullptr;
+        m_num_buckets = 0;
         m_max_bucket_capacity = 0;
     }
 
     void allocate_buckets_from_shift() {
-        auto bucket_alloc = BucketAlloc(m_values.get_allocator());
-        auto num_buckets = calc_num_buckets(m_shifts);
-        m_buckets_start = BucketAllocTraits::allocate(bucket_alloc, num_buckets);
-        m_buckets_end = m_buckets_start + num_buckets;
-        m_max_bucket_capacity = static_cast<uint64_t>(num_buckets * max_load_factor());
+        auto ba = bucket_alloc(m_values.get_allocator());
+        m_num_buckets = calc_num_buckets(m_shifts);
+        m_buckets = bucket_alloc_traits::allocate(ba, m_num_buckets);
+        if (m_num_buckets == max_bucket_count()) {
+            // reached the maximum, make sure we can use each bucket
+            m_max_bucket_capacity = max_bucket_count();
+        } else {
+            m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
+        }
     }
 
     void clear_buckets() {
-        if (m_buckets_start != nullptr) {
-            std::memset(m_buckets_start, 0, sizeof(Bucket) * bucket_count());
+        if (m_buckets != nullptr) {
+            std::memset(&*m_buckets, 0, sizeof(Bucket) * bucket_count());
         }
     }
 
     void clear_and_fill_buckets_from_values() {
         clear_buckets();
-        for (uint32_t value_idx = 0, end_idx = static_cast<uint32_t>(m_values.size()); value_idx < end_idx; ++value_idx) {
+        for (value_idx_type value_idx = 0, end_idx = static_cast<value_idx_type>(m_values.size()); value_idx < end_idx;
+             ++value_idx) {
             auto const& key = get_key(m_values[value_idx]);
             auto [dist_and_fingerprint, bucket] = next_while_less(key);
 
@@ -496,22 +581,26 @@ private:
     }
 
     void increase_size() {
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(m_max_bucket_capacity == max_bucket_count())) {
+            throw std::overflow_error("ankerl::unordered_dense: reached max bucket size, cannot increase size");
+        }
         --m_shifts;
         deallocate_buckets();
         allocate_buckets_from_shift();
         clear_and_fill_buckets_from_values();
     }
 
-    void do_erase(Bucket* bucket) {
-        auto const value_idx_to_remove = bucket->value_idx;
+    void do_erase(value_idx_type bucket_idx) {
+        auto const value_idx_to_remove = at(m_buckets, bucket_idx).m_value_idx;
 
         // shift down until either empty or an element with correct spot is found
-        auto* next_bucket = next(bucket);
-        while (next_bucket->dist_and_fingerprint >= BUCKET_DIST_INC * 2) {
-            *bucket = {next_bucket->dist_and_fingerprint - BUCKET_DIST_INC, next_bucket->value_idx};
-            bucket = std::exchange(next_bucket, next(next_bucket));
+        auto next_bucket_idx = next(bucket_idx);
+        while (at(m_buckets, next_bucket_idx).m_dist_and_fingerprint >= Bucket::dist_inc * 2) {
+            at(m_buckets, bucket_idx) = {dist_dec(at(m_buckets, next_bucket_idx).m_dist_and_fingerprint),
+                                         at(m_buckets, next_bucket_idx).m_value_idx};
+            bucket_idx = std::exchange(next_bucket_idx, next(next_bucket_idx));
         }
-        *bucket = {};
+        at(m_buckets, bucket_idx) = {};
 
         // update m_values
         if (value_idx_to_remove != m_values.size() - 1) {
@@ -521,13 +610,13 @@ private:
 
             // update the values_idx of the moved entry. No need to play the info game, just look until we find the values_idx
             auto mh = mixed_hash(get_key(val));
-            bucket = bucket_from_hash(mh);
+            bucket_idx = bucket_idx_from_hash(mh);
 
-            auto const values_idx_back = static_cast<uint32_t>(m_values.size() - 1);
-            while (values_idx_back != bucket->value_idx) {
-                bucket = next(bucket);
+            auto const values_idx_back = static_cast<value_idx_type>(m_values.size() - 1);
+            while (values_idx_back != at(m_buckets, bucket_idx).m_value_idx) {
+                bucket_idx = next(bucket_idx);
             }
-            bucket->value_idx = value_idx_to_remove;
+            at(m_buckets, bucket_idx).m_value_idx = value_idx_to_remove;
         }
         m_values.pop_back();
     }
@@ -538,17 +627,18 @@ private:
             return 0;
         }
 
-        auto [dist_and_fingerprint, bucket] = next_while_less(key);
+        auto [dist_and_fingerprint, bucket_idx] = next_while_less(key);
 
-        while (dist_and_fingerprint == bucket->dist_and_fingerprint && !m_equal(key, get_key(m_values[bucket->value_idx]))) {
-            dist_and_fingerprint += BUCKET_DIST_INC;
-            bucket = next(bucket);
+        while (dist_and_fingerprint == at(m_buckets, bucket_idx).m_dist_and_fingerprint &&
+               !m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).m_value_idx]))) {
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
         }
 
-        if (dist_and_fingerprint != bucket->dist_and_fingerprint) {
+        if (dist_and_fingerprint != at(m_buckets, bucket_idx).m_dist_and_fingerprint) {
             return 0;
         }
-        do_erase(bucket);
+        do_erase(bucket_idx);
         return 1;
     }
 
@@ -561,66 +651,83 @@ private:
         return it_isinserted;
     }
 
+    template <typename K, typename... Args>
+    auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, value_idx_type bucket_idx, K&& key, Args&&... args)
+        -> std::pair<iterator, bool> {
+
+        // emplace the new value. If that throws an exception, no harm done; index is still in a valid state
+        m_values.emplace_back(std::piecewise_construct,
+                              std::forward_as_tuple(std::forward<K>(key)),
+                              std::forward_as_tuple(std::forward<Args>(args)...));
+
+        // place element and shift up until we find an empty spot
+        auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
+        place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
+        return {begin() + static_cast<difference_type>(value_idx), true};
+    }
+
     template <typename K, typename... Args>
     auto do_try_emplace(K&& key, Args&&... args) -> std::pair<iterator, bool> {
-        if (is_full()) {
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(is_full())) {
             increase_size();
         }
 
         auto hash = mixed_hash(key);
         auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
-        auto* bucket = bucket_from_hash(hash);
+        auto bucket_idx = bucket_idx_from_hash(hash);
 
-        while (dist_and_fingerprint <= bucket->dist_and_fingerprint) {
-            if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, m_values[bucket->value_idx].first)) {
-                return {begin() + bucket->value_idx, false};
+        while (true) {
+            auto* bucket = &at(m_buckets, bucket_idx);
+            if (dist_and_fingerprint == bucket->m_dist_and_fingerprint) {
+                if (m_equal(key, m_values[bucket->m_value_idx].first)) {
+                    return {begin() + static_cast<difference_type>(bucket->m_value_idx), false};
+                }
+            } else if (dist_and_fingerprint > bucket->m_dist_and_fingerprint) {
+                return do_place_element(dist_and_fingerprint, bucket_idx, std::forward<K>(key), std::forward<Args>(args)...);
             }
-            dist_and_fingerprint += BUCKET_DIST_INC;
-            bucket = next(bucket);
+            dist_and_fingerprint = dist_inc(dist_and_fingerprint);
+            bucket_idx = next(bucket_idx);
         }
-
-        // emplace the new value. If that throws an exception, no harm done; index is still in a valid state
-        m_values.emplace_back(std::piecewise_construct,
-                              std::forward_as_tuple(std::forward<K>(key)),
-                              std::forward_as_tuple(std::forward<Args>(args)...));
-
-        // place element and shift up until we find an empty spot
-        uint32_t value_idx = static_cast<uint32_t>(m_values.size()) - 1;
-        place_and_shift_up({dist_and_fingerprint, value_idx}, bucket);
-        return {begin() + value_idx, true};
     }
 
     template <typename K>
     auto do_find(K const& key) -> iterator {
-        if (empty()) {
+        if (ANKERL_UNORDERED_DENSE_UNLIKELY(empty())) {
             return end();
         }
*** OUTPUT TRUNCATED, 744 LINES SKIPPED ***