LAL  7.5.0.1-b72065a
LALCityHash.c
Go to the documentation of this file.
1 // Copyright (c) 2011 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // CityHash, by Geoff Pike and Jyrki Alakuijala
22 //
23 // This file provides CityHash64() and related functions.
24 //
25 // It's probably possible to create even faster hash functions by
26 // writing a program that systematically explores some of the space of
27 // possible hash functions, by using SIMD instructions, or by
28 // compromising on hash quality.
29 
30 // Converted to C by John Veitch
31 
32 #include <config.h>
33 #include <lal/LALHashFunc.h>
34 
35 #include <stdlib.h> // for size_t.
36 #include <stdint.h>
37 #include <string.h> // for memcpy and memset
38 
39 typedef struct tagUINT16 {UINT8 first,second;} UINT16;
40 
41 static inline UINT8 Uint128Low64(const UINT16 *x) { return x->first; }
42 static inline UINT8 Uint128High64(const UINT16 *x) { return x->second; }
43 
44 /** Hash 128 input bits down to 64 bits of output.
45  * This is intended to be a reasonably good hash function.
46  */
47 static inline UINT8 Hash128to64(const UINT16 *x) {
48  // Murmur-inspired hashing.
49  const UINT8 kMul = 0x9ddfea08eb382d69ULL;
50  UINT8 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
51  a ^= (a >> 47);
52  UINT8 b = (Uint128High64(x) ^ a) * kMul;
53  b ^= (b >> 47);
54  b *= kMul;
55  return b;
56 }
57 
58 static UINT8 UNALIGNED_LOAD64(const char *p) {
59  UINT8 result;
60  memcpy(&result, p, sizeof(result));
61  return result;
62 }
63 
64 static UINT4 UNALIGNED_LOAD32(const char *p) {
65  UINT4 result;
66  memcpy(&result, p, sizeof(result));
67  return result;
68 }
69 
70 #if defined(_WIN32) && !defined(__CYGWIN__)
71 
72 #include <stdlib.h>
73 #define bswap_32(x) _byteswap_ulong(x)
74 #define bswap_64(x) _byteswap_UINT8(x)
75 
76 #elif defined(__APPLE__)
77 
78 // Mac OS X / Darwin features
79 #include <libkern/OSByteOrder.h>
80 #define bswap_32(x) OSSwapInt32(x)
81 #define bswap_64(x) OSSwapInt64(x)
82 
83 #elif defined(__sun) || defined(sun)
84 
85 #include <sys/byteorder.h>
86 #define bswap_32(x) BSWAP_32(x)
87 #define bswap_64(x) BSWAP_64(x)
88 
89 #elif defined(__FreeBSD__)
90 
91 #include <sys/endian.h>
92 #define bswap_32(x) bswap32(x)
93 #define bswap_64(x) bswap64(x)
94 
95 #elif defined(__OpenBSD__)
96 
97 #include <sys/types.h>
98 #define bswap_32(x) swap32(x)
99 #define bswap_64(x) swap64(x)
100 
101 #elif defined(__NetBSD__)
102 
103 #include <sys/types.h>
104 #include <machine/bswap.h>
105 #if defined(__BSWAP_RENAME) && !defined(__bswap_32)
106 #define bswap_32(x) bswap32(x)
107 #define bswap_64(x) bswap64(x)
108 #endif
109 
110 #else
111 
112 #include <byteswap.h>
113 
114 #endif
115 
116 #ifdef WORDS_BIGENDIAN
117 #define UINT4_in_expected_order(x) (bswap_32(x))
118 #define UINT8_in_expected_order(x) (bswap_64(x))
119 #else
120 #define UINT4_in_expected_order(x) (x)
121 #define UINT8_in_expected_order(x) (x)
122 #endif
123 
124 #if !defined(LIKELY)
125 #if HAVE_BUILTIN_EXPECT
126 #define LIKELY(x) (__builtin_expect(!!(x), 1))
127 #else
128 #define LIKELY(x) (x)
129 #endif
130 #endif
131 
132 static UINT8 Fetch64(const char *p) {
134 }
135 
136 static UINT4 Fetch32(const char *p) {
138 }
139 
140 // Some primes between 2^63 and 2^64 for various uses.
141 static const UINT8 k0 = 0xc3a5c85c97cb3127ULL;
142 static const UINT8 k1 = 0xb492b66fbe98f273ULL;
143 static const UINT8 k2 = 0x9ae16a3b2f90404fULL;
144 
145 // Magic numbers for 32-bit hashing. Copied from Murmur3.
146 static const UINT4 c1 = 0xcc9e2d51;
147 static const UINT4 c2 = 0x1b873593;
148 
149 // A 32-bit to 32-bit integer hash copied from Murmur3.
150 static UINT4 fmix(UINT4 h)
151 {
152  h ^= h >> 16;
153  h *= 0x85ebca6b;
154  h ^= h >> 13;
155  h *= 0xc2b2ae35;
156  h ^= h >> 16;
157  return h;
158 }
159 
160 static UINT4 Rotate32(UINT4 val, int shift) {
161  // Avoid shifting by 32: doing so yields an undefined result.
162  return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
163 }
164 
165 #define SWAP(a,b) {a^=b; b^=a; a^=b;}
166 
167 #undef PERMUTE3
168 #define PERMUTE3(a, b, c) do { SWAP(a, b); SWAP(a, c); } while (0)
169 
170 static UINT4 Mur(UINT4 a, UINT4 h) {
171  // Helper from Murmur3 for combining two 32-bit values.
172  a *= c1;
173  a = Rotate32(a, 17);
174  a *= c2;
175  h ^= a;
176  h = Rotate32(h, 19);
177  return h * 5 + 0xe6546b64;
178 }
179 
180 static UINT4 Hash32Len13to24(const char *s, size_t len) {
181  UINT4 a = Fetch32(s - 4 + (len >> 1));
182  UINT4 b = Fetch32(s + 4);
183  UINT4 c = Fetch32(s + len - 8);
184  UINT4 d = Fetch32(s + (len >> 1));
185  UINT4 e = Fetch32(s);
186  UINT4 f = Fetch32(s + len - 4);
187  UINT4 h = len;
188 
189  return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
190 }
191 
192 static UINT4 Hash32Len0to4(const char *s, size_t len) {
193  UINT4 b = 0;
194  UINT4 c = 9;
195  for (size_t i = 0; i < len; i++) {
196  signed char v = s[i];
197  b = b * c1 + v;
198  c ^= b;
199  }
200  return fmix(Mur(b, Mur(len, c)));
201 }
202 
203 static UINT4 Hash32Len5to12(const char *s, size_t len) {
204  UINT4 a = len, b = len * 5, c = 9, d = b;
205  a += Fetch32(s);
206  b += Fetch32(s + len - 4);
207  c += Fetch32(s + ((len >> 1) & 4));
208  return fmix(Mur(c, Mur(b, Mur(a, d))));
209 }
210 
211 UINT4 XLALCityHash32(const char *s, size_t len) {
212  if (len <= 24) {
213  return len <= 12 ?
214  (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
215  Hash32Len13to24(s, len);
216  }
217 
218  // len > 24
219  UINT4 h = len, g = c1 * len, f = g;
220  UINT4 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
221  UINT4 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
222  UINT4 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
223  UINT4 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
224  UINT4 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
225  h ^= a0;
226  h = Rotate32(h, 19);
227  h = h * 5 + 0xe6546b64;
228  h ^= a2;
229  h = Rotate32(h, 19);
230  h = h * 5 + 0xe6546b64;
231  g ^= a1;
232  g = Rotate32(g, 19);
233  g = g * 5 + 0xe6546b64;
234  g ^= a3;
235  g = Rotate32(g, 19);
236  g = g * 5 + 0xe6546b64;
237  f += a4;
238  f = Rotate32(f, 19);
239  f = f * 5 + 0xe6546b64;
240  size_t iters = (len - 1) / 20;
241  do {
242  a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
243  a1 = Fetch32(s + 4);
244  a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
245  a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
246  a4 = Fetch32(s + 16);
247  h ^= a0;
248  h = Rotate32(h, 18);
249  h = h * 5 + 0xe6546b64;
250  f += a1;
251  f = Rotate32(f, 19);
252  f = f * c1;
253  g += a2;
254  g = Rotate32(g, 18);
255  g = g * 5 + 0xe6546b64;
256  h ^= a3 + a1;
257  h = Rotate32(h, 19);
258  h = h * 5 + 0xe6546b64;
259  g ^= a4;
260  g = bswap_32(g) * 5;
261  h += a4 * 5;
262  h = bswap_32(h);
263  f += a0;
264  PERMUTE3(f, h, g);
265  s += 20;
266  } while (--iters != 0);
267  g = Rotate32(g, 11) * c1;
268  g = Rotate32(g, 17) * c1;
269  f = Rotate32(f, 11) * c1;
270  f = Rotate32(f, 17) * c1;
271  h = Rotate32(h + g, 19);
272  h = h * 5 + 0xe6546b64;
273  h = Rotate32(h, 17) * c1;
274  h = Rotate32(h + f, 19);
275  h = h * 5 + 0xe6546b64;
276  h = Rotate32(h, 17) * c1;
277  return h;
278 }
279 
280 // Bitwise right rotate. Normally this will compile to a single
281 // instruction, especially if the shift is a manifest constant.
282 static UINT8 Rotate(UINT8 val, int shift) {
283  // Avoid shifting by 64: doing so yields an undefined result.
284  return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
285 }
286 
287 static UINT8 ShiftMix(UINT8 val) {
288  return val ^ (val >> 47);
289 }
290 
291 static UINT8 HashLen16(UINT8 u, UINT8 v) {
292  UINT16 uv={.first=u,.second=v};
293  return Hash128to64(&uv);
294 }
295 
296 static UINT8 HashLen16mul(UINT8 u, UINT8 v, UINT8 mul) {
297  // Murmur-inspired hashing.
298  UINT8 a = (u ^ v) * mul;
299  a ^= (a >> 47);
300  UINT8 b = (v ^ a) * mul;
301  b ^= (b >> 47);
302  b *= mul;
303  return b;
304 }
305 
306 static UINT8 HashLen0to16(const char *s, size_t len) {
307  if (len >= 8) {
308  UINT8 mul = k2 + len * 2;
309  UINT8 a = Fetch64(s) + k2;
310  UINT8 b = Fetch64(s + len - 8);
311  UINT8 c = Rotate(b, 37) * mul + a;
312  UINT8 d = (Rotate(a, 25) + b) * mul;
313  return HashLen16mul(c, d, mul);
314  }
315  if (len >= 4) {
316  UINT8 mul = k2 + len * 2;
317  UINT8 a = Fetch32(s);
318  return HashLen16mul(len + (a << 3), Fetch32(s + len - 4), mul);
319  }
320  if (len > 0) {
321  UCHAR a = s[0];
322  UCHAR b = s[len >> 1];
323  UCHAR c = s[len - 1];
324  UINT4 y = (UINT4)(a) + ((UINT4)(b) << 8);
325  UINT4 z = len + ((UINT4)(c) << 2);
326  return ShiftMix(y * k2 ^ z * k0) * k2;
327  }
328  return k2;
329 }
330 
331 // This probably works well for 16-byte strings as well, but it may be overkill
332 // in that case.
333 static UINT8 HashLen17to32(const char *s, size_t len) {
334  UINT8 mul = k2 + len * 2;
335  UINT8 a = Fetch64(s) * k1;
336  UINT8 b = Fetch64(s + 8);
337  UINT8 c = Fetch64(s + len - 8) * mul;
338  UINT8 d = Fetch64(s + len - 16) * k2;
339  return HashLen16mul(Rotate(a + b, 43) + Rotate(c, 30) + d,
340  a + Rotate(b + k2, 18) + c, mul);
341 }
342 
343 // Return a 16-byte hash for 48 bytes. Quick and dirty.
344 // Callers do best to use "random-looking" values for a and b.
346 {
347  a += w;
348  b = Rotate(b + a + z, 21);
349  UINT8 c = a;
350  a += x;
351  a += y;
352  b += Rotate(a, 44);
353  UINT16 pair={.first=a+z,.second=b+c};
354  return pair;
355 }
356 
357 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
358 static UINT16 WeakHashLen32WithSeeds(const char* s, UINT8 a, UINT8 b) {
360  Fetch64(s + 8),
361  Fetch64(s + 16),
362  Fetch64(s + 24),
363  a,
364  b);
365 }
366 
367 // Return an 8-byte hash for 33 to 64 bytes.
368 static UINT8 HashLen33to64(const char *s, size_t len) {
369  UINT8 mul = k2 + len * 2;
370  UINT8 a = Fetch64(s) * k2;
371  UINT8 b = Fetch64(s + 8);
372  UINT8 c = Fetch64(s + len - 24);
373  UINT8 d = Fetch64(s + len - 32);
374  UINT8 e = Fetch64(s + 16) * k2;
375  UINT8 f = Fetch64(s + 24) * 9;
376  UINT8 g = Fetch64(s + len - 8);
377  UINT8 h = Fetch64(s + len - 16) * mul;
378  UINT8 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
379  UINT8 v = ((a + g) ^ d) + f + 1;
380  UINT8 w = bswap_64((u + v) * mul) + h;
381  UINT8 x = Rotate(e + f, 42) + c;
382  UINT8 y = (bswap_64((v + w) * mul) + g) * mul;
383  UINT8 z = e + f + c;
384  a = bswap_64((x + z) * mul + y) + b;
385  b = ShiftMix((z + a) * mul + d + h) * mul;
386  return b + x;
387 }
388 
389 UINT8 XLALCityHash64(const char *s, size_t len) {
390  if (len <= 32) {
391  if (len <= 16) {
392  return HashLen0to16(s, len);
393  } else {
394  return HashLen17to32(s, len);
395  }
396  } else if (len <= 64) {
397  return HashLen33to64(s, len);
398  }
399 
400  // For strings over 64 bytes we hash the end first, and then as we
401  // loop we keep 56 bytes of state: v, w, x, y, and z.
402  UINT8 x = Fetch64(s + len - 40);
403  UINT8 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
404  UINT8 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
405  UINT16 v = WeakHashLen32WithSeeds(s + len - 64, len, z);
406  UINT16 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
407  x = x * k1 + Fetch64(s);
408 
409  // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
410  len = (len - 1) & ~((size_t)63);
411  do {
412  x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
413  y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
414  x ^= w.second;
415  y += v.first + Fetch64(s + 40);
416  z = Rotate(z + w.first, 33) * k1;
417  v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
418  w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
419  SWAP(z, x);
420  s += 64;
421  len -= 64;
422  } while (len != 0);
423  return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
424  HashLen16(v.second, w.second) + x);
425 }
426 
427 UINT8 XLALCityHash64WithSeed(const char *s, size_t len, UINT8 seed) {
428  return XLALCityHash64WithSeeds(s, len, k2, seed);
429 }
430 
431 UINT8 XLALCityHash64WithSeeds(const char *s, size_t len,
432  UINT8 seed0, UINT8 seed1) {
433  return HashLen16(XLALCityHash64(s, len) - seed0, seed1);
434 }
static UINT8 Uint128Low64(const UINT16 *x)
Definition: LALCityHash.c:41
static UINT16 WeakHashLen32WithSeeds(const char *s, UINT8 a, UINT8 b)
Definition: LALCityHash.c:358
static UINT4 UNALIGNED_LOAD32(const char *p)
Definition: LALCityHash.c:64
static UINT16 WeakHashLen32WithSeeds_5(UINT8 w, UINT8 x, UINT8 y, UINT8 z, UINT8 a, UINT8 b)
Definition: LALCityHash.c:345
static UINT8 Fetch64(const char *p)
Definition: LALCityHash.c:132
static const UINT8 k1
Definition: LALCityHash.c:142
static UINT8 Hash128to64(const UINT16 *x)
Hash 128 input bits down to 64 bits of output.
Definition: LALCityHash.c:47
static const UINT8 k0
Definition: LALCityHash.c:141
static UINT4 Hash32Len5to12(const char *s, size_t len)
Definition: LALCityHash.c:203
static const UINT4 c1
Definition: LALCityHash.c:146
static UINT8 HashLen16(UINT8 u, UINT8 v)
Definition: LALCityHash.c:291
static UINT8 UNALIGNED_LOAD64(const char *p)
Definition: LALCityHash.c:58
static const UINT4 c2
Definition: LALCityHash.c:147
#define PERMUTE3(a, b, c)
Definition: LALCityHash.c:168
static UINT4 Hash32Len13to24(const char *s, size_t len)
Definition: LALCityHash.c:180
static UINT8 HashLen17to32(const char *s, size_t len)
Definition: LALCityHash.c:333
static UINT8 HashLen33to64(const char *s, size_t len)
Definition: LALCityHash.c:368
static UINT4 Mur(UINT4 a, UINT4 h)
Definition: LALCityHash.c:170
static UINT4 Hash32Len0to4(const char *s, size_t len)
Definition: LALCityHash.c:192
static UINT4 Fetch32(const char *p)
Definition: LALCityHash.c:136
static UINT8 ShiftMix(UINT8 val)
Definition: LALCityHash.c:287
static const UINT8 k2
Definition: LALCityHash.c:143
#define SWAP(a, b)
Definition: LALCityHash.c:165
static UINT8 Uint128High64(const UINT16 *x)
Definition: LALCityHash.c:42
#define UINT4_in_expected_order(x)
Definition: LALCityHash.c:120
static UINT8 HashLen0to16(const char *s, size_t len)
Definition: LALCityHash.c:306
#define UINT8_in_expected_order(x)
Definition: LALCityHash.c:121
static UINT4 fmix(UINT4 h)
Definition: LALCityHash.c:150
static UINT4 Rotate32(UINT4 val, int shift)
Definition: LALCityHash.c:160
static UINT8 Rotate(UINT8 val, int shift)
Definition: LALCityHash.c:282
static UINT8 HashLen16mul(UINT8 u, UINT8 v, UINT8 mul)
Definition: LALCityHash.c:296
static double f(double theta, double y, double xi)
Definition: XLALMarcumQ.c:258
unsigned char UCHAR
One-byte unsigned integer, see Headers LAL(Atomic)Datatypes.h for more details.
uint64_t UINT8
Eight-byte unsigned integer; on some platforms this is equivalent to unsigned long int instead.
uint32_t UINT4
Four-byte unsigned integer.
UINT8 XLALCityHash64WithSeeds(const char *s, size_t len, UINT8 seed0, UINT8 seed1)
Hash function for a byte array.
Definition: LALCityHash.c:431
UINT8 XLALCityHash64(const char *s, size_t len)
Hash function for a byte array.
Definition: LALCityHash.c:389
UINT4 XLALCityHash32(const char *s, size_t len)
Hash function for a byte array.
Definition: LALCityHash.c:211
UINT8 XLALCityHash64WithSeed(const char *s, size_t len, UINT8 seed)
Hash function for a byte array.
Definition: LALCityHash.c:427
static const INT4 a
Definition: Random.c:79
UINT8 first
Definition: LALCityHash.c:39
UINT8 second
Definition: LALCityHash.c:39