IOSS 2.0
Loading...
Searching...
No Matches
hopscotch_growth_policy.h
Go to the documentation of this file.
1/**
2 * MIT License
3 *
4 * Copyright (c) 2018, 2023 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef TSL_HOPSCOTCH_GROWTH_POLICY_H
25#define TSL_HOPSCOTCH_GROWTH_POLICY_H
26
27#include <algorithm>
28#include <array>
29#include <climits>
30#include <cmath>
31#include <cstddef>
32#include <cstdint>
33#include <iterator>
34#include <limits>
35#include <ratio>
36#include <stdexcept>
37
38/**
39 * Only activate tsl_hh_assert if TSL_DEBUG is defined.
40 * This way we avoid the performance hit when NDEBUG is not defined with assert
41 * as tsl_hh_assert is used a lot (people usually compile with "-O3" and not
42 * "-O3 -DNDEBUG").
43 */
44#ifdef TSL_DEBUG
45#define tsl_hh_assert(expr) assert(expr)
46#else
47#define tsl_hh_assert(expr) (static_cast<void>(0))
48#endif
49
50/**
51 * If exceptions are enabled, throw the exception passed in parameter, otherwise
52 * call std::terminate.
53 */
54#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
55 (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
56 !defined(TSL_NO_EXCEPTIONS)
57#define TSL_HH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
58#else
59#define TSL_HH_NO_EXCEPTIONS
60#ifdef NDEBUG
61#define TSL_HH_THROW_OR_TERMINATE(ex, msg) std::terminate()
62#else
63#include <iostream>
64#define TSL_HH_THROW_OR_TERMINATE(ex, msg) \
65 do { \
66 std::cerr << msg << std::endl; \
67 std::terminate(); \
68 } while (0)
69#endif
70#endif
71
72namespace tsl::hh {
73
74 /**
75 * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
76 * power of two. It allows the table to use a mask operation instead of a modulo
77 * operation to map a hash to a bucket.
78 *
79 * GrowthFactor must be a power of two >= 2.
80 */
81 template <std::size_t GrowthFactor> class power_of_two_growth_policy
82 {
83 public:
84 /**
85 * Called on the hash table creation and on rehash. The number of buckets for
86 * the table is passed in parameter. This number is a minimum, the policy may
87 * update this value with a higher value if needed (but not lower).
88 *
89 * If 0 is given, min_bucket_count_in_out must still be 0 after the policy
90 * creation and bucket_for_hash must always return 0 in this case.
91 */
92 explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
93 {
94 if (min_bucket_count_in_out > max_bucket_count()) {
95 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
96 }
97
98 if (min_bucket_count_in_out > 0) {
99 min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
100 m_mask = min_bucket_count_in_out - 1;
101 }
102 else {
103 m_mask = 0;
104 }
105 }
106
107 /**
108 * Return the bucket [0, bucket_count()) to which the hash belongs.
109 * If bucket_count() is 0, it must always return 0.
110 */
111 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
112
113 /**
114 * Return the bucket count to use when the bucket array grows on rehash.
115 */
116 std::size_t next_bucket_count() const
117 {
118 if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
119 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
120 }
121
122 return (m_mask + 1) * GrowthFactor;
123 }
124
125 /**
126 * Return the maximum number of buckets supported by the policy.
127 */
128 std::size_t max_bucket_count() const
129 {
130 // Largest power of two.
131 return (std::numeric_limits<std::size_t>::max() / 2) + 1;
132 }
133
134 /**
135 * Reset the growth policy as if it was created with a bucket count of 0.
136 * After a clear, the policy must always return 0 when bucket_for_hash is
137 * called.
138 */
139 void clear() noexcept { m_mask = 0; }
140
141 private:
142 static std::size_t round_up_to_power_of_two(std::size_t value)
143 {
144 if (is_power_of_two(value)) {
145 return value;
146 }
147
148 if (value == 0) {
149 return 1;
150 }
151
152 --value;
153 for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
154 value |= value >> i;
155 }
156
157 return value + 1;
158 }
159
160 static constexpr bool is_power_of_two(std::size_t value)
161 {
162 return value != 0 && (value & (value - 1)) == 0;
163 }
164
165 private:
166 static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
167 "GrowthFactor must be a power of two >= 2.");
168
169 std::size_t m_mask;
170 };
171
172 /**
173 * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
174 * to map a hash to a bucket. Slower but it can be useful if you want a slower
175 * growth.
176 */
177 template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
178 {
179 public:
180 explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
181 {
182 if (min_bucket_count_in_out > max_bucket_count()) {
183 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
184 }
185
186 if (min_bucket_count_in_out > 0) {
187 m_mod = min_bucket_count_in_out;
188 }
189 else {
190 m_mod = 1;
191 }
192 }
193
194 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
195
196 std::size_t next_bucket_count() const
197 {
198 if (m_mod == max_bucket_count()) {
199 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
200 }
201
202 const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
203 if (!std::isnormal(next_bucket_count)) {
204 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
205 }
206
207 if (next_bucket_count > double(max_bucket_count())) {
208 return max_bucket_count();
209 }
210 else {
211 return std::size_t(next_bucket_count);
212 }
213 }
214
215 std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
216
217 void clear() noexcept { m_mod = 1; }
218
219 private:
220 static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
221 1.0 * GrowthFactor::num / GrowthFactor::den;
222 static const std::size_t MAX_BUCKET_COUNT = std::size_t(
223 double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
224
225 static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
226
227 std::size_t m_mod;
228 };
229
230 namespace detail {
231
232#if SIZE_MAX >= ULLONG_MAX
233#define TSL_HH_NB_PRIMES 51
234#elif SIZE_MAX >= ULONG_MAX
235#define TSL_HH_NB_PRIMES 40
236#else
237#define TSL_HH_NB_PRIMES 23
238#endif
239
240 static constexpr const std::array<std::size_t, TSL_HH_NB_PRIMES> PRIMES = {{
241 1u,
242 5u,
243 17u,
244 29u,
245 37u,
246 53u,
247 67u,
248 79u,
249 97u,
250 131u,
251 193u,
252 257u,
253 389u,
254 521u,
255 769u,
256 1031u,
257 1543u,
258 2053u,
259 3079u,
260 6151u,
261 12289u,
262 24593u,
263 49157u,
264#if SIZE_MAX >= ULONG_MAX
265 98317ul,
266 196613ul,
267 393241ul,
268 786433ul,
269 1572869ul,
270 3145739ul,
271 6291469ul,
272 12582917ul,
273 25165843ul,
274 50331653ul,
275 100663319ul,
276 201326611ul,
277 402653189ul,
278 805306457ul,
279 1610612741ul,
280 3221225473ul,
281 4294967291ul,
282#endif
283#if SIZE_MAX >= ULLONG_MAX
284 6442450939ull,
285 12884901893ull,
286 25769803751ull,
287 51539607551ull,
288 103079215111ull,
289 206158430209ull,
290 412316860441ull,
291 824633720831ull,
292 1649267441651ull,
293 3298534883309ull,
294 6597069766657ull,
295#endif
296 }};
297
298 template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
299 {
300 return hash % PRIMES[IPrime];
301 }
302
303 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
304 // faster modulo as the compiler can optimize the modulo code better with a
305 // constant known at the compilation.
306 static constexpr const std::array<std::size_t (*)(std::size_t), TSL_HH_NB_PRIMES> MOD_PRIME = {{
310#if SIZE_MAX >= ULONG_MAX
313#endif
314#if SIZE_MAX >= ULLONG_MAX
316 &mod<49>, &mod<50>,
317#endif
318 }};
319
320 } // namespace detail
321
322 /**
323 * Grow the hash table by using prime numbers as bucket count. Slower than
324 * tsl::hh::power_of_two_growth_policy in general but will probably distribute
325 * the values around better in the buckets with a poor hash function.
326 *
327 * To allow the compiler to optimize the modulo operation, a lookup table is
328 * used with constant primes numbers.
329 *
330 * With a switch the code would look like:
331 * \code
332 * switch(iprime) { // iprime is the current prime of the hash table
333 * case 0: hash % 5ul;
334 * break;
335 * case 1: hash % 17ul;
336 * break;
337 * case 2: hash % 29ul;
338 * break;
339 * ...
340 * }
341 * \endcode
342 *
343 * Due to the constant variable in the modulo the compiler is able to optimize
344 * the operation by a series of multiplications, substractions and shifts.
345 *
346 * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
347 * * 5' in a 64 bits environment.
348 */
350 {
351 public:
352 explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
353 {
354 auto it_prime =
355 std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
356 if (it_prime == detail::PRIMES.end()) {
357 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
358 }
359
360 m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
361 if (min_bucket_count_in_out > 0) {
362 min_bucket_count_in_out = *it_prime;
363 }
364 else {
365 min_bucket_count_in_out = 0;
366 }
367 }
368
369 std::size_t bucket_for_hash(std::size_t hash) const noexcept
370 {
371 return detail::MOD_PRIME[m_iprime](hash);
372 }
373
374 std::size_t next_bucket_count() const
375 {
376 if (m_iprime + 1 >= detail::PRIMES.size()) {
377 TSL_HH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
378 }
379
380 return detail::PRIMES[m_iprime + 1];
381 }
382
383 std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
384
385 void clear() noexcept { m_iprime = 0; }
386
387 private:
388 unsigned int m_iprime;
389
390 static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
391 "The type of m_iprime is not big enough.");
392 };
393
394} // namespace tsl::hh
395
396#endif
Definition hopscotch_growth_policy.h:178
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition hopscotch_growth_policy.h:194
std::size_t next_bucket_count() const
Definition hopscotch_growth_policy.h:196
std::size_t m_mod
Definition hopscotch_growth_policy.h:227
static const std::size_t MAX_BUCKET_COUNT
Definition hopscotch_growth_policy.h:222
std::size_t max_bucket_count() const
Definition hopscotch_growth_policy.h:215
void clear() noexcept
Definition hopscotch_growth_policy.h:217
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition hopscotch_growth_policy.h:220
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition hopscotch_growth_policy.h:180
Definition hopscotch_growth_policy.h:82
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition hopscotch_growth_policy.h:111
void clear() noexcept
Definition hopscotch_growth_policy.h:139
std::size_t m_mask
Definition hopscotch_growth_policy.h:169
std::size_t max_bucket_count() const
Definition hopscotch_growth_policy.h:128
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition hopscotch_growth_policy.h:142
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition hopscotch_growth_policy.h:92
static constexpr bool is_power_of_two(std::size_t value)
Definition hopscotch_growth_policy.h:160
std::size_t next_bucket_count() const
Definition hopscotch_growth_policy.h:116
Definition hopscotch_growth_policy.h:350
std::size_t next_bucket_count() const
Definition hopscotch_growth_policy.h:374
std::size_t max_bucket_count() const
Definition hopscotch_growth_policy.h:383
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition hopscotch_growth_policy.h:352
void clear() noexcept
Definition hopscotch_growth_policy.h:385
unsigned int m_iprime
Definition hopscotch_growth_policy.h:388
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition hopscotch_growth_policy.h:369
#define TSL_HH_NB_PRIMES
Definition hopscotch_growth_policy.h:233
#define TSL_HH_THROW_OR_TERMINATE(ex, msg)
Definition hopscotch_growth_policy.h:64
static constexpr const std::array< std::size_t(*)(std::size_t), TSL_HH_NB_PRIMES > MOD_PRIME
Definition hopscotch_growth_policy.h:306
static constexpr std::size_t mod(std::size_t hash)
Definition hopscotch_growth_policy.h:298
static constexpr const std::array< std::size_t, TSL_HH_NB_PRIMES > PRIMES
Definition hopscotch_growth_policy.h:240
Definition hopscotch_growth_policy.h:72