JASL  [1.2.0]-2018-09-11
jasl_murmurhash3.hpp
1 // JASL: For more information see https://github.com/matepek/jasl
2 //
3 // Copyright (c) 2018 Mate Pek
4 //
5 // This code is licensed under the MIT License (MIT).
6 
7 #pragma once
8 
9 #include "jasl/jasl_internal/jasl_diagnostic.hpp"
10 
11 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.h
12 // https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
13 
14 //-----------------------------------------------------------------------------
15 // MurmurHash3 was written by Austin Appleby, and is placed in the public
16 // domain. The author hereby disclaims copyright to this source code.
17 
18 // Note - The x86 and x64 versions do _not_ produce the same results, as the
19 // algorithms are optimized for their respective platforms. You can still
20 // compile and run any of them on any platform, but your performance with the
21 // non-native version will be less than optimal.
22 
23 //-----------------------------------------------------------------------------
24 // Platform-specific functions and macros
25 
26 // Microsoft Visual Studio
27 
28 #if defined(_MSC_VER) && (_MSC_VER < 1600)
29 
30 typedef unsigned char uint8_t;
31 typedef unsigned int uint32_t;
32 typedef unsigned __int64 uint64_t;
33 
34 // Other compilers
35 
36 #else // defined(_MSC_VER)
37 
38 # include <stdint.h>
39 
40 #endif // !defined(_MSC_VER)
41 
42 // Microsoft Visual Studio
43 
44 #if defined(_MSC_VER)
45 
46 # define FORCE_INLINE __forceinline
47 
48 # include <stdlib.h>
49 
50 # define ROTL32(x, y) _rotl(x, y)
51 # define ROTL64(x, y) _rotl64(x, y)
52 
53 # define BIG_CONSTANT(x) (x)
54 
55 // Other compilers
56 
57 #else // defined(_MSC_VER)
58 
59 # define FORCE_INLINE inline __attribute__((always_inline))
60 
61 namespace jasl {
62 namespace murmurhash3 {
63 
64 inline uint32_t rotl32(uint32_t x, int8_t r) {
65  return (x << r) | (x >> (32 - r));
66 }
67 
68 inline uint64_t rotl64(uint64_t x, int8_t r) {
69  return (x << r) | (x >> (64 - r));
70 }
71 
72 } // namespace murmurhash3
73 } // namespace jasl
74 
75 # define ROTL32(x, y) rotl32(x, y)
76 # define ROTL64(x, y) rotl64(x, y)
77 
78 # define BIG_CONSTANT(x) (x##LLU)
79 
80 #endif // !defined(_MSC_VER)
81 
82 namespace jasl {
83 namespace murmurhash3 {
84 
85 JASL_DIAGNOSTIC_PUSH()
86 JASL_DIAGNOSTIC_IGNORED_CLANG(JASL_WARNING_OLD_STYLE_CAST)
87 JASL_DIAGNOSTIC_IGNORED_CLANG(JASL_WARNING_SIGN_CONVERSION)
88 JASL_DIAGNOSTIC_IGNORED_CLANG(JASL_WARNING_IMPLICIT_FALLTHROUGH)
89 JASL_DIAGNOSTIC_IGNORED_GCC_SINCE7(JASL_WARNING_IMPLICIT_FALLTHROUGH)
90 JASL_DIAGNOSTIC_IGNORED_CLANG(JAS_WARNING_CAST_ALING)
91 
92 #if defined(__clang__)
93 # if __has_attribute(no_sanitize)
94 # define JASL_USAN_IGNORE(type) __attribute__((no_sanitize(# type)))
95 # else
96 # define JASL_USAN_IGNORE(type)
97 # endif
98 #else
99 # define JASL_USAN_IGNORE(type)
100 #endif
101 
102 //-----------------------------------------------------------------------------
103 // Block read - if your platform needs to do endian-swapping or can only
104 // handle aligned reads, do the conversion here
105 
106 FORCE_INLINE uint32_t getblock32(const uint32_t* p, int i) {
107  return p[i];
108 }
109 
110 FORCE_INLINE uint64_t getblock64(const uint64_t* p, int i) {
111  return p[i];
112 }
113 
114 //-----------------------------------------------------------------------------
115 // Finalization mix - force all bits of a hash block to avalanche
116 
117 JASL_USAN_IGNORE(integer) FORCE_INLINE uint32_t fmix32(uint32_t h) {
118  h ^= h >> 16;
119  h *= 0x85ebca6b;
120  h ^= h >> 13;
121  h *= 0xc2b2ae35;
122  h ^= h >> 16;
123 
124  return h;
125 }
126 
127 //----------
128 
129 JASL_USAN_IGNORE(integer) FORCE_INLINE uint64_t fmix64(uint64_t k) {
130  k ^= k >> 33;
131  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
132  k ^= k >> 33;
133  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
134  k ^= k >> 33;
135 
136  return k;
137 }
138 
139 //-----------------------------------------------------------------------------
140 
141 JASL_USAN_IGNORE(integer)
142 void MurmurHash3_x86_128(const void* key,
143  const int len,
144  uint32_t seed,
145  void* out) {
146  const uint8_t* data = (const uint8_t*)key;
147  const int nblocks = len / 16;
148 
149  uint32_t h1 = seed;
150  uint32_t h2 = seed;
151  uint32_t h3 = seed;
152  uint32_t h4 = seed;
153 
154  const uint32_t c1 = 0x239b961b;
155  const uint32_t c2 = 0xab0e9789;
156  const uint32_t c3 = 0x38b34ae5;
157  const uint32_t c4 = 0xa1e38b93;
158 
159  //----------
160  // body
161 
162  const uint32_t* blocks = (const uint32_t*)(data + nblocks * 16);
163 
164  for (int i = -nblocks; i; i++) {
165  uint32_t k1 = getblock32(blocks, i * 4 + 0);
166  uint32_t k2 = getblock32(blocks, i * 4 + 1);
167  uint32_t k3 = getblock32(blocks, i * 4 + 2);
168  uint32_t k4 = getblock32(blocks, i * 4 + 3);
169 
170  k1 *= c1;
171  k1 = ROTL32(k1, 15);
172  k1 *= c2;
173  h1 ^= k1;
174 
175  h1 = ROTL32(h1, 19);
176  h1 += h2;
177  h1 = h1 * 5 + 0x561ccd1b;
178 
179  k2 *= c2;
180  k2 = ROTL32(k2, 16);
181  k2 *= c3;
182  h2 ^= k2;
183 
184  h2 = ROTL32(h2, 17);
185  h2 += h3;
186  h2 = h2 * 5 + 0x0bcaa747;
187 
188  k3 *= c3;
189  k3 = ROTL32(k3, 17);
190  k3 *= c4;
191  h3 ^= k3;
192 
193  h3 = ROTL32(h3, 15);
194  h3 += h4;
195  h3 = h3 * 5 + 0x96cd1c35;
196 
197  k4 *= c4;
198  k4 = ROTL32(k4, 18);
199  k4 *= c1;
200  h4 ^= k4;
201 
202  h4 = ROTL32(h4, 13);
203  h4 += h1;
204  h4 = h4 * 5 + 0x32ac3b17;
205  }
206 
207  //----------
208  // tail
209 
210  const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
211 
212  uint32_t k1 = 0;
213  uint32_t k2 = 0;
214  uint32_t k3 = 0;
215  uint32_t k4 = 0;
216 
217  switch (len & 15) {
218  case 15:
219  k4 ^= tail[14] << 16;
220  case 14:
221  k4 ^= tail[13] << 8;
222  case 13:
223  k4 ^= tail[12] << 0;
224  k4 *= c4;
225  k4 = ROTL32(k4, 18);
226  k4 *= c1;
227  h4 ^= k4;
228 
229  case 12:
230  k3 ^= tail[11] << 24;
231  case 11:
232  k3 ^= tail[10] << 16;
233  case 10:
234  k3 ^= tail[9] << 8;
235  case 9:
236  k3 ^= tail[8] << 0;
237  k3 *= c3;
238  k3 = ROTL32(k3, 17);
239  k3 *= c4;
240  h3 ^= k3;
241 
242  case 8:
243  k2 ^= tail[7] << 24;
244  case 7:
245  k2 ^= tail[6] << 16;
246  case 6:
247  k2 ^= tail[5] << 8;
248  case 5:
249  k2 ^= tail[4] << 0;
250  k2 *= c2;
251  k2 = ROTL32(k2, 16);
252  k2 *= c3;
253  h2 ^= k2;
254 
255  case 4:
256  k1 ^= tail[3] << 24;
257  case 3:
258  k1 ^= tail[2] << 16;
259  case 2:
260  k1 ^= tail[1] << 8;
261  case 1:
262  k1 ^= tail[0] << 0;
263  k1 *= c1;
264  k1 = ROTL32(k1, 15);
265  k1 *= c2;
266  h1 ^= k1;
267  };
268 
269  //----------
270  // finalization
271 
272  h1 ^= len;
273  h2 ^= len;
274  h3 ^= len;
275  h4 ^= len;
276 
277  h1 += h2;
278  h1 += h3;
279  h1 += h4;
280  h2 += h1;
281  h3 += h1;
282  h4 += h1;
283 
284  h1 = fmix32(h1);
285  h2 = fmix32(h2);
286  h3 = fmix32(h3);
287  h4 = fmix32(h4);
288 
289  h1 += h2;
290  h1 += h3;
291  h1 += h4;
292  h2 += h1;
293  h3 += h1;
294  h4 += h1;
295 
296  ((uint32_t*)out)[0] = h1;
297  ((uint32_t*)out)[1] = h2;
298  ((uint32_t*)out)[2] = h3;
299  ((uint32_t*)out)[3] = h4;
300 }
301 
302 //-----------------------------------------------------------------------------
303 
304 JASL_USAN_IGNORE(integer)
305 void MurmurHash3_x64_128(const void* key,
306  const int len,
307  const uint32_t seed,
308  void* out) {
309  const uint8_t* data = (const uint8_t*)key;
310  const int nblocks = len / 16;
311 
312  uint64_t h1 = seed;
313  uint64_t h2 = seed;
314 
315  const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
316  const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
317 
318  //----------
319  // body
320 
321  const uint64_t* blocks = (const uint64_t*)(data);
322 
323  for (int i = 0; i < nblocks; i++) {
324  uint64_t k1 = getblock64(blocks, i * 2 + 0);
325  uint64_t k2 = getblock64(blocks, i * 2 + 1);
326 
327  k1 *= c1;
328  k1 = ROTL64(k1, 31);
329  k1 *= c2;
330  h1 ^= k1;
331 
332  h1 = ROTL64(h1, 27);
333  h1 += h2;
334  h1 = h1 * 5 + 0x52dce729;
335 
336  k2 *= c2;
337  k2 = ROTL64(k2, 33);
338  k2 *= c1;
339  h2 ^= k2;
340 
341  h2 = ROTL64(h2, 31);
342  h2 += h1;
343  h2 = h2 * 5 + 0x38495ab5;
344  }
345 
346  //----------
347  // tail
348 
349  const uint8_t* tail = (const uint8_t*)(data + nblocks * 16);
350 
351  uint64_t k1 = 0;
352  uint64_t k2 = 0;
353 
354  switch (len & 15) {
355  case 15:
356  k2 ^= ((uint64_t)tail[14]) << 48;
357  case 14:
358  k2 ^= ((uint64_t)tail[13]) << 40;
359  case 13:
360  k2 ^= ((uint64_t)tail[12]) << 32;
361  case 12:
362  k2 ^= ((uint64_t)tail[11]) << 24;
363  case 11:
364  k2 ^= ((uint64_t)tail[10]) << 16;
365  case 10:
366  k2 ^= ((uint64_t)tail[9]) << 8;
367  case 9:
368  k2 ^= ((uint64_t)tail[8]) << 0;
369  k2 *= c2;
370  k2 = ROTL64(k2, 33);
371  k2 *= c1;
372  h2 ^= k2;
373 
374  case 8:
375  k1 ^= ((uint64_t)tail[7]) << 56;
376  case 7:
377  k1 ^= ((uint64_t)tail[6]) << 48;
378  case 6:
379  k1 ^= ((uint64_t)tail[5]) << 40;
380  case 5:
381  k1 ^= ((uint64_t)tail[4]) << 32;
382  case 4:
383  k1 ^= ((uint64_t)tail[3]) << 24;
384  case 3:
385  k1 ^= ((uint64_t)tail[2]) << 16;
386  case 2:
387  k1 ^= ((uint64_t)tail[1]) << 8;
388  case 1:
389  k1 ^= ((uint64_t)tail[0]) << 0;
390  k1 *= c1;
391  k1 = ROTL64(k1, 31);
392  k1 *= c2;
393  h1 ^= k1;
394  };
395 
396  //----------
397  // finalization
398 
399  h1 ^= len;
400  h2 ^= len;
401 
402  h1 += h2;
403  h2 += h1;
404 
405  h1 = fmix64(h1);
406  h2 = fmix64(h2);
407 
408  h1 += h2;
409  h2 += h1;
410 
411  ((uint64_t*)out)[0] = h1;
412  ((uint64_t*)out)[1] = h2;
413 }
414 
415 JASL_DIAGNOSTIC_POP()
416 
417 } // namespace murmurhash3
418 } // namespace jasl
419 
420 //-----------------------------------------------------------------------------
Definition: jasl_static_string.hpp:18