Aprepro 5.0x
Loading...
Searching...
No Matches
robin_growth_policy.h
Go to the documentation of this file.
1/**
2 * MIT License
3 *
4 * Copyright (c) 2017 Thibaut Goetghebuer-Planchon <tessil@gmx.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24#ifndef TSL_ROBIN_GROWTH_POLICY_H
25#define TSL_ROBIN_GROWTH_POLICY_H
26
27#include <algorithm>
28#include <array>
29#include <climits>
30#include <cmath>
31#include <cstddef>
32#include <cstdint>
33#include <iterator>
34#include <limits>
35#include <ratio>
36#include <stdexcept>
37
38#ifdef TSL_DEBUG
39#define tsl_rh_assert(expr) assert(expr)
40#else
41#define tsl_rh_assert(expr) (static_cast<void>(0))
42#endif
43
44/**
45 * If exceptions are enabled, throw the exception passed in parameter, otherwise
46 * call std::terminate.
47 */
48#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || \
49 (defined(_MSC_VER) && defined(_CPPUNWIND))) && \
50 !defined(TSL_NO_EXCEPTIONS)
51#define TSL_RH_THROW_OR_TERMINATE(ex, msg) throw ex(msg)
52#else
53#define TSL_RH_NO_EXCEPTIONS
54#ifdef NDEBUG
55#define TSL_RH_THROW_OR_TERMINATE(ex, msg) std::terminate()
56#else
57#include <iostream>
58#define TSL_RH_THROW_OR_TERMINATE(ex, msg) \
59 do { \
60 std::cerr << msg << std::endl; \
61 std::terminate(); \
62 } while (0)
63#endif
64#endif
65
66#if defined(__GNUC__) || defined(__clang__)
67#define TSL_RH_LIKELY(exp) (__builtin_expect(!!(exp), true))
68#else
69#define TSL_RH_LIKELY(exp) (exp)
70#endif
71
72#define TSL_RH_UNUSED(x) static_cast<void>(x)
73
74namespace tsl {
75 namespace rh {
76
77 /**
78 * Grow the hash table by a factor of GrowthFactor keeping the bucket count to a
79 * power of two. It allows the table to use a mask operation instead of a modulo
80 * operation to map a hash to a bucket.
81 *
82 * GrowthFactor must be a power of two >= 2.
83 */
84 template <std::size_t GrowthFactor> class power_of_two_growth_policy
85 {
86 public:
87 /**
88 * Called on the hash table creation and on rehash. The number of buckets for
89 * the table is passed in parameter. This number is a minimum, the policy may
90 * update this value with a higher value if needed (but not lower).
91 *
92 * If 0 is given, min_bucket_count_in_out must still be 0 after the policy
93 * creation and bucket_for_hash must always return 0 in this case.
94 */
95 explicit power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
96 {
97 if (min_bucket_count_in_out > max_bucket_count()) {
98 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
99 }
100
101 if (min_bucket_count_in_out > 0) {
102 min_bucket_count_in_out = round_up_to_power_of_two(min_bucket_count_in_out);
103 m_mask = min_bucket_count_in_out - 1;
104 }
105 else {
106 m_mask = 0;
107 }
108 }
109
110 /**
111 * Return the bucket [0, bucket_count()) to which the hash belongs.
112 * If bucket_count() is 0, it must always return 0.
113 */
114 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash & m_mask; }
115
116 /**
117 * Return the number of buckets that should be used on next growth.
118 */
119 std::size_t next_bucket_count() const
120 {
121 if ((m_mask + 1) > max_bucket_count() / GrowthFactor) {
122 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
123 }
124
125 return (m_mask + 1) * GrowthFactor;
126 }
127
128 /**
129 * Return the maximum number of buckets supported by the policy.
130 */
131 std::size_t max_bucket_count() const
132 {
133 // Largest power of two.
134 return (std::numeric_limits<std::size_t>::max() / 2) + 1;
135 }
136
137 /**
138 * Reset the growth policy as if it was created with a bucket count of 0.
139 * After a clear, the policy must always return 0 when bucket_for_hash is
140 * called.
141 */
142 void clear() noexcept { m_mask = 0; }
143
144 private:
145 static std::size_t round_up_to_power_of_two(std::size_t value)
146 {
147 if (is_power_of_two(value)) {
148 return value;
149 }
150
151 if (value == 0) {
152 return 1;
153 }
154
155 --value;
156 for (std::size_t i = 1; i < sizeof(std::size_t) * CHAR_BIT; i *= 2) {
157 value |= value >> i;
158 }
159
160 return value + 1;
161 }
162
163 static constexpr bool is_power_of_two(std::size_t value)
164 {
165 return value != 0 && (value & (value - 1)) == 0;
166 }
167
168 protected:
169 static_assert(is_power_of_two(GrowthFactor) && GrowthFactor >= 2,
170 "GrowthFactor must be a power of two >= 2.");
171
172 std::size_t m_mask;
173 };
174
175 /**
176 * Grow the hash table by GrowthFactor::num / GrowthFactor::den and use a modulo
177 * to map a hash to a bucket. Slower but it can be useful if you want a slower
178 * growth.
179 */
180 template <class GrowthFactor = std::ratio<3, 2>> class mod_growth_policy
181 {
182 public:
183 explicit mod_growth_policy(std::size_t &min_bucket_count_in_out)
184 {
185 if (min_bucket_count_in_out > max_bucket_count()) {
186 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
187 }
188
189 if (min_bucket_count_in_out > 0) {
190 m_mod = min_bucket_count_in_out;
191 }
192 else {
193 m_mod = 1;
194 }
195 }
196
197 std::size_t bucket_for_hash(std::size_t hash) const noexcept { return hash % m_mod; }
198
199 std::size_t next_bucket_count() const
200 {
201 if (m_mod == max_bucket_count()) {
202 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
203 }
204
205 const double next_bucket_count =
206 std::ceil(double(m_mod) * REHASH_SIZE_MULTIPLICATION_FACTOR);
207 if (!std::isnormal(next_bucket_count)) {
208 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
209 }
210
211 if (next_bucket_count > double(max_bucket_count())) {
212 return max_bucket_count();
213 }
214 else {
215 return std::size_t(next_bucket_count);
216 }
217 }
218
219 std::size_t max_bucket_count() const { return MAX_BUCKET_COUNT; }
220
221 void clear() noexcept { m_mod = 1; }
222
223 private:
224 static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR =
225 1.0 * GrowthFactor::num / GrowthFactor::den;
226 static const std::size_t MAX_BUCKET_COUNT = std::size_t(
227 double(std::numeric_limits<std::size_t>::max() / REHASH_SIZE_MULTIPLICATION_FACTOR));
228
229 static_assert(REHASH_SIZE_MULTIPLICATION_FACTOR >= 1.1, "Growth factor should be >= 1.1.");
230
231 std::size_t m_mod;
232 };
233
234 namespace detail {
235
236#if SIZE_MAX >= ULLONG_MAX
237#define TSL_RH_NB_PRIMES 51
238#elif SIZE_MAX >= ULONG_MAX
239#define TSL_RH_NB_PRIMES 40
240#else
241#define TSL_RH_NB_PRIMES 23
242#endif
243
244 static constexpr const std::array<std::size_t, TSL_RH_NB_PRIMES> PRIMES = {{
245 1u,
246 5u,
247 17u,
248 29u,
249 37u,
250 53u,
251 67u,
252 79u,
253 97u,
254 131u,
255 193u,
256 257u,
257 389u,
258 521u,
259 769u,
260 1031u,
261 1543u,
262 2053u,
263 3079u,
264 6151u,
265 12289u,
266 24593u,
267 49157u,
268#if SIZE_MAX >= ULONG_MAX
269 98317ul,
270 196613ul,
271 393241ul,
272 786433ul,
273 1572869ul,
274 3145739ul,
275 6291469ul,
276 12582917ul,
277 25165843ul,
278 50331653ul,
279 100663319ul,
280 201326611ul,
281 402653189ul,
282 805306457ul,
283 1610612741ul,
284 3221225473ul,
285 4294967291ul,
286#endif
287#if SIZE_MAX >= ULLONG_MAX
288 6442450939ull,
289 12884901893ull,
290 25769803751ull,
291 51539607551ull,
292 103079215111ull,
293 206158430209ull,
294 412316860441ull,
295 824633720831ull,
296 1649267441651ull,
297 3298534883309ull,
298 6597069766657ull,
299#endif
300 }};
301
302 template <unsigned int IPrime> static constexpr std::size_t mod(std::size_t hash)
303 {
304 return hash % PRIMES[IPrime];
305 }
306
307 // MOD_PRIME[iprime](hash) returns hash % PRIMES[iprime]. This table allows for
308 // faster modulo as the compiler can optimize the modulo code better with a
309 // constant known at the compilation.
310 static constexpr const std::array<std::size_t (*)(std::size_t), TSL_RH_NB_PRIMES> MOD_PRIME =
311 {{
315#if SIZE_MAX >= ULONG_MAX
318 &mod<39>,
319#endif
320#if SIZE_MAX >= ULLONG_MAX
323#endif
324 }};
325
326 } // namespace detail
327
328 /**
329 * Grow the hash table by using prime numbers as bucket count. Slower than
330 * tsl::rh::power_of_two_growth_policy in general but will probably distribute
331 * the values around better in the buckets with a poor hash function.
332 *
333 * To allow the compiler to optimize the modulo operation, a lookup table is
334 * used with constant primes numbers.
335 *
336 * With a switch the code would look like:
337 * \code
338 * switch(iprime) { // iprime is the current prime of the hash table
339 * case 0: hash % 5ul;
340 * break;
341 * case 1: hash % 17ul;
342 * break;
343 * case 2: hash % 29ul;
344 * break;
345 * ...
346 * }
347 * \endcode
348 *
349 * Due to the constant variable in the modulo the compiler is able to optimize
350 * the operation by a series of multiplications, substractions and shifts.
351 *
352 * The 'hash % 5' could become something like 'hash - (hash * 0xCCCCCCCD) >> 34)
353 * * 5' in a 64 bits environment.
354 */
356 {
357 public:
358 explicit prime_growth_policy(std::size_t &min_bucket_count_in_out)
359 {
360 auto it_prime =
361 std::lower_bound(detail::PRIMES.begin(), detail::PRIMES.end(), min_bucket_count_in_out);
362 if (it_prime == detail::PRIMES.end()) {
363 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
364 }
365
366 m_iprime = static_cast<unsigned int>(std::distance(detail::PRIMES.begin(), it_prime));
367 if (min_bucket_count_in_out > 0) {
368 min_bucket_count_in_out = *it_prime;
369 }
370 else {
371 min_bucket_count_in_out = 0;
372 }
373 }
374
375 std::size_t bucket_for_hash(std::size_t hash) const noexcept
376 {
377 return detail::MOD_PRIME[m_iprime](hash);
378 }
379
380 std::size_t next_bucket_count() const
381 {
382 if (m_iprime + 1 >= detail::PRIMES.size()) {
383 TSL_RH_THROW_OR_TERMINATE(std::length_error, "The hash table exceeds its maximum size.");
384 }
385
386 return detail::PRIMES[m_iprime + 1];
387 }
388
389 std::size_t max_bucket_count() const { return detail::PRIMES.back(); }
390
391 void clear() noexcept { m_iprime = 0; }
392
393 private:
394 unsigned int m_iprime;
395
396 static_assert(std::numeric_limits<decltype(m_iprime)>::max() >= detail::PRIMES.size(),
397 "The type of m_iprime is not big enough.");
398 };
399
400 } // namespace rh
401} // namespace tsl
402
403#endif
#define max(x, y)
Definition apr_aprepro.cc:898
Definition robin_growth_policy.h:181
std::size_t m_mod
Definition robin_growth_policy.h:231
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:199
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:219
static const std::size_t MAX_BUCKET_COUNT
Definition robin_growth_policy.h:226
void clear() noexcept
Definition robin_growth_policy.h:221
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:197
mod_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:183
static constexpr double REHASH_SIZE_MULTIPLICATION_FACTOR
Definition robin_growth_policy.h:224
Definition robin_growth_policy.h:85
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:131
static std::size_t round_up_to_power_of_two(std::size_t value)
Definition robin_growth_policy.h:145
power_of_two_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:95
std::size_t m_mask
Definition robin_growth_policy.h:172
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:119
static constexpr bool is_power_of_two(std::size_t value)
Definition robin_growth_policy.h:163
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:114
void clear() noexcept
Definition robin_growth_policy.h:142
Definition robin_growth_policy.h:356
std::size_t bucket_for_hash(std::size_t hash) const noexcept
Definition robin_growth_policy.h:375
std::size_t next_bucket_count() const
Definition robin_growth_policy.h:380
unsigned int m_iprime
Definition robin_growth_policy.h:394
prime_growth_policy(std::size_t &min_bucket_count_in_out)
Definition robin_growth_policy.h:358
std::size_t max_bucket_count() const
Definition robin_growth_policy.h:389
void clear() noexcept
Definition robin_growth_policy.h:391
static constexpr std::size_t mod(std::size_t hash)
Definition robin_growth_policy.h:302
static constexpr const std::array< std::size_t(*)(std::size_t), TSL_RH_NB_PRIMES > MOD_PRIME
Definition robin_growth_policy.h:310
static constexpr const std::array< std::size_t, TSL_RH_NB_PRIMES > PRIMES
Definition robin_growth_policy.h:244
Definition robin_growth_policy.h:74
#define TSL_RH_NB_PRIMES
Definition robin_growth_policy.h:237
#define TSL_RH_THROW_OR_TERMINATE(ex, msg)
Definition robin_growth_policy.h:58