IOSS 2.0
Loading...
Searching...
No Matches
robin_growth_policy.h
Go to the documentation of this file.
1/**
2 * MIT License
3 *
4 * Copyright (c) 2017, 2023 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef TSL_ROBIN_GROWTH_POLICY_H
25#define TSL_ROBIN_GROWTH_POLICY_H
26
27#include <algorithm>
28#include <array>
29#include <climits>
30#include <cmath>
31#include <cstddef>
32#include <cstdint>
33#include <iterator>
34#include <limits>
35#include <ratio>
36#include <stdexcept>
37
38#ifdef TSL_DEBUG
39#define tsl_rh_assert(expr) assert(expr)
40#else
41#define tsl_rh_assert(expr) (static_cast<void>(0))
42#endif
43
44/**
45 * If exceptions are enabled, throw the exception passed in parameter, otherwise
46 * call std::terminate.
47 */
48#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
49 (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
50 !defined(TSL_NO_EXCEPTIONS)
51#define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
52#else
53#define TSL_RH_NO_EXCEPTIONS
54#ifdef TSL_DEBUG
55#include <iostream>
56#define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
57 do { \
58 std::cerr << msg << std::endl; \
59 std::terminate(); \
60 } while (0)
61#else
62#define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
63#endif
64#endif
65
66#if defined(__GNUC__) || defined(__clang__)
67#define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
68#else
69#define TSL_RH_LIKELY(exp) (exp)
70#endif
71
72#define TSL_RH_UNUSED(x) static_cast<void>(x)
73
74namespace tsl::rh {
75
76 /**
77 * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
78 * power of two. It allows the table to use a mask operation instead of a modulo
79 * operation to map a hash to a bucket.
80 *
81 * GrowthFactor must be a power of two >= 2.
82 */
83 template <std::size_t GrowthFactor> class power_of_two_growth_policy
84 {
85 public:
86 /**
87 * Called on the hash table creation and on rehash. The number of buckets for
88 * the table is passed in parameter. This number is a minimum, the policy may
89 * update this value with a higher value if needed (but not lower).
90 *
91 * If 0 is given, min_bucket_count_in_out must still be 0 after the policy
92 * creation and bucket_for_hash must always return 0 in this case.
93 */
94 explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
95 {
96 if (min_bucket_count_in_out > max_bucket_count()) {
97 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
98 }
99
100 if (min_bucket_count_in_out > 0) {
101 min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
102 m_mask = min_bucket_count_in_out - 1;
103 }
104 else {
105 m_mask = 0;
106 }
107 }
108
109 /**
110 * Return the bucket [0, bucket_count()) to which the hash belongs.
111 * If bucket_count() is 0, it must always return 0.
112 */
113 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
114
115 /**
116 * Return the number of buckets that should be used on next growth.
117 */
118 std::size_t next_bucket_count() const
119 {
120 if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
121 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
122 }
123
124 return (m_mask + 1) * GrowthFactor;
125 }
126
127 /**
128 * Return the maximum number of buckets supported by the policy.
129 */
130 std::size_t max_bucket_count() const
131 {
132 // Largest power of two.
133 return (std::numeric_limits<std::size_t>::max() / 2) + 1;
134 }
135
136 /**
137 * Reset the growth policy as if it was created with a bucket count of 0.
138 * After a clear, the policy must always return 0 when bucket_for_hash is
139 * called.
140 */
141 void clear() noexcept { m_mask = 0; }
142
143 private:
144 static std::size_t round_up_to_power_of_two(std::size_t value)
145 {
146 if (is_power_of_two(value)) {
147 return value;
148 }
149
150 if (value == 0) {
151 return 1;
152 }
153
154 --value;
155 for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
156 value |= value >> i;
157 }
158
159 return value + 1;
160 }
161
162 static constexpr bool is_power_of_two(std::size_t value)
163 {
164 return value != 0 && (value & (value - 1)) == 0;
165 }
166
167 protected:
168 static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
169 "GrowthFactor must be a power of two >= 2.");
170
171 std::size_t m_mask;
172 };
173
174 /**
175 * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
176 * to map a hash to a bucket. Slower but it can be useful if you want a slower
177 * growth.
178 */
179 template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
180 {
181 public:
182 explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
183 {
184 if (min_bucket_count_in_out > max_bucket_count()) {
185 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
186 }
187
188 if (min_bucket_count_in_out > 0) {
189 m_mod = min_bucket_count_in_out;
190 }
191 else {
192 m_mod = 1;
193 }
194 }
195
196 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
197
198 std::size_t next_bucket_count() const
199 {
200 if (m_mod == max_bucket_count()) {
201 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
202 }
203
204 const double next_bucket_count = std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
205 if (!std::isnormal(next_bucket_count)) {
206 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
207 }
208
209 if (next_bucket_count > double(max_bucket_count())) {
210 return max_bucket_count();
211 }
212 else {
213 return std::size_t(next_bucket_count);
214 }
215 }
216
217 std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
218
219 void clear() noexcept { m_mod = 1; }
220
221 private:
222 static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
223 1.0 * GrowthFactor::num / GrowthFactor::den;
224 static const std::size_t MAX_BUCKET_COUNT = std::size_t(
225 double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
226
227 static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
228
229 std::size_t m_mod;
230 };
231
232 namespace detail {
233
234#if SIZE_MAX >= ULLONG_MAX
235#define TSL_RH_NB_PRIMES 51
236#elif SIZE_MAX >= ULONG_MAX
237#define TSL_RH_NB_PRIMES 40
238#else
239#define TSL_RH_NB_PRIMES 23
240#endif
241
242 static constexpr const std::array<std::size_t, TSL_RH_NB_PRIMES> PRIMES = {{
243 1u,
244 5u,
245 17u,
246 29u,
247 37u,
248 53u,
249 67u,
250 79u,
251 97u,
252 131u,
253 193u,
254 257u,
255 389u,
256 521u,
257 769u,
258 1031u,
259 1543u,
260 2053u,
261 3079u,
262 6151u,
263 12289u,
264 24593u,
265 49157u,
266#if SIZE_MAX >= ULONG_MAX
267 98317ul,
268 196613ul,
269 393241ul,
270 786433ul,
271 1572869ul,
272 3145739ul,
273 6291469ul,
274 12582917ul,
275 25165843ul,
276 50331653ul,
277 100663319ul,
278 201326611ul,
279 402653189ul,
280 805306457ul,
281 1610612741ul,
282 3221225473ul,
283 4294967291ul,
284#endif
285#if SIZE_MAX >= ULLONG_MAX
286 6442450939ull,
287 12884901893ull,
288 25769803751ull,
289 51539607551ull,
290 103079215111ull,
291 206158430209ull,
292 412316860441ull,
293 824633720831ull,
294 1649267441651ull,
295 3298534883309ull,
296 6597069766657ull,
297#endif
298 }};
299
300 template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
301 {
302 return hash % PRIMES[IPrime];
303 }
304
305 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
306 // faster modulo as the compiler can optimize the modulo code better with a
307 // constant known at the compilation.
308 static constexpr const std::array<std::size_t (*)(std::size_t), TSL_RH_NB_PRIMES> MOD_PRIME = {{
309 &mod<0>, &mod<1>, &mod<2>, &mod<3>, &mod<4>, &mod<5>, &mod<6>, &mod<7>, &mod<8>,
310 &mod<9>, &mod<10>, &mod<11>, &mod<12>, &mod<13>, &mod<14>, &mod<15>, &mod<16>, &mod<17>,
311 &mod<18>, &mod<19>, &mod<20>, &mod<21>, &mod<22>,
312#if SIZE_MAX >= ULONG_MAX
313 &mod<23>, &mod<24>, &mod<25>, &mod<26>, &mod<27>, &mod<28>, &mod<29>, &mod<30>, &mod<31>,
314 &mod<32>, &mod<33>, &mod<34>, &mod<35>, &mod<36>, &mod<37>, &mod<38>, &mod<39>,
315#endif
316#if SIZE_MAX >= ULLONG_MAX
317 &mod<40>, &mod<41>, &mod<42>, &mod<43>, &mod<44>, &mod<45>, &mod<46>, &mod<47>, &mod<48>,
318 &mod<49>, &mod<50>,
319#endif
320 }};
321
322 } // namespace detail
323
324 /**
325 * Grow the hash table by using prime numbers as bucket count. Slower than
326 * tsl::rh::power_of_two_growth_policy in general but will probably distribute
327 * the values around better in the buckets with a poor hash function.
328 *
329 * To allow the compiler to optimize the modulo operation, a lookup table is
330 * used with constant primes numbers.
331 *
332 * With a switch the code would look like:
333 * \code
334 * switch(iprime) { // iprime is the current prime of the hash table
335 * case 0: hash % 5ul;
336 * break;
337 * case 1: hash % 17ul;
338 * break;
339 * case 2: hash % 29ul;
340 * break;
341 * ...
342 * }
343 * \endcode
344 *
345 * Due to the constant variable in the modulo the compiler is able to optimize
346 * the operation by a series of multiplications, substractions and shifts.
347 *
348 * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
349 * * 5' in a 64 bits environment.
350 */
352 {
353 public:
354 explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
355 {
356 auto it_prime =
357 std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
358 if (it_prime == detail::PRIMES.end()) {
359 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
360 }
361
362 m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
363 if (min_bucket_count_in_out > 0) {
364 min_bucket_count_in_out = *it_prime;
365 }
366 else {
367 min_bucket_count_in_out = 0;
368 }
369 }
370
371 std::size_t bucket_for_hash(std::size_t hash) const noexcept
372 {
373 return detail::MOD_PRIME[m_iprime](hash);
374 }
375
376 std::size_t next_bucket_count() const
377 {
378 if (m_iprime + 1 >= detail::PRIMES.size()) {
379 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
380 }
381
382 return detail::PRIMES[m_iprime + 1];
383 }
384
385 std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
386
387 void clear() noexcept { m_iprime = 0; }
388
389 private:
390 unsigned int m_iprime;
391
392 static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
393 "The type of m_iprime is not big enough.");
394 };
395
396} // namespace tsl::rh
397
398#endif
Definition robin_growth_policy.h:180
std::size_t m_mod
Definition robin_growth_policy.h:229
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:198
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:217
static const std::size_t MAX_BUCKET_COUNT
Definition robin_growth_policy.h:224
void clear() noexcept
Definition robin_growth_policy.h:219
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:196
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:182
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition robin_growth_policy.h:222
Definition robin_growth_policy.h:84
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:130
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition robin_growth_policy.h:144
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:94
std::size_t m_mask
Definition robin_growth_policy.h:171
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:118
static constexpr bool is_power_of_two(std::size_t value)
Definition robin_growth_policy.h:162
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:113
void clear() noexcept
Definition robin_growth_policy.h:141
Definition robin_growth_policy.h:352
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:371
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:376
unsigned int m_iprime
Definition robin_growth_policy.h:390
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:354
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:385
void clear() noexcept
Definition robin_growth_policy.h:387
static constexpr std::size_t mod(std::size_t hash)
Definition robin_growth_policy.h:300
static constexpr const std::array< std::size_t(*)(std::size_t), TSL_RH_NB_PRIMES > MOD_PRIME
Definition robin_growth_policy.h:308
static constexpr const std::array< std::size_t, TSL_RH_NB_PRIMES > PRIMES
Definition robin_growth_policy.h:242
Definition robin_growth_policy.h:74
#define TSL_RH_NB_PRIMES
Definition robin_growth_policy.h:235
#define TSL_RH_THROW_OR_TERMINATE(ex, msg)
Definition robin_growth_policy.h:62