Loading [MathJax]/extensions/TeX/AMSsymbols.js
LAL 7.7.0.1-da3b9d3
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
LALCityHash.c
Go to the documentation of this file.
1// Copyright (c) 2011 Google, Inc.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20//
21// CityHash, by Geoff Pike and Jyrki Alakuijala
22//
23// This file provides CityHash64() and related functions.
24//
25// It's probably possible to create even faster hash functions by
26// writing a program that systematically explores some of the space of
27// possible hash functions, by using SIMD instructions, or by
28// compromising on hash quality.
29
30// Converted to C by John Veitch
31
32#include <config.h>
33#include <lal/LALHashFunc.h>
34
35#include <stdlib.h> // for size_t.
36#include <stdint.h>
37#include <string.h> // for memcpy and memset
38
39typedef struct tagUINT16 {UINT8 first,second;} UINT16;
40
41static inline UINT8 Uint128Low64(const UINT16 *x) { return x->first; }
42static inline UINT8 Uint128High64(const UINT16 *x) { return x->second; }
43
44/** Hash 128 input bits down to 64 bits of output.
45 * This is intended to be a reasonably good hash function.
46 */
47static inline UINT8 Hash128to64(const UINT16 *x) {
48 // Murmur-inspired hashing.
49 const UINT8 kMul = 0x9ddfea08eb382d69ULL;
50 UINT8 a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
51 a ^= (a >> 47);
52 UINT8 b = (Uint128High64(x) ^ a) * kMul;
53 b ^= (b >> 47);
54 b *= kMul;
55 return b;
56}
57
58static UINT8 UNALIGNED_LOAD64(const char *p) {
59 UINT8 result;
60 memcpy(&result, p, sizeof(result));
61 return result;
62}
63
64static UINT4 UNALIGNED_LOAD32(const char *p) {
65 UINT4 result;
66 memcpy(&result, p, sizeof(result));
67 return result;
68}
69
70#if defined(_WIN32) && !defined(__CYGWIN__)
71
72#include <stdlib.h>
73#define bswap_32(x) _byteswap_ulong(x)
74#define bswap_64(x) _byteswap_UINT8(x)
75
76#elif defined(__APPLE__)
77
78// Mac OS X / Darwin features
79#include <libkern/OSByteOrder.h>
80#define bswap_32(x) OSSwapInt32(x)
81#define bswap_64(x) OSSwapInt64(x)
82
83#elif defined(__sun) || defined(sun)
84
85#include <sys/byteorder.h>
86#define bswap_32(x) BSWAP_32(x)
87#define bswap_64(x) BSWAP_64(x)
88
89#elif defined(__FreeBSD__)
90
91#include <sys/endian.h>
92#define bswap_32(x) bswap32(x)
93#define bswap_64(x) bswap64(x)
94
95#elif defined(__OpenBSD__)
96
97#include <sys/types.h>
98#define bswap_32(x) swap32(x)
99#define bswap_64(x) swap64(x)
100
101#elif defined(__NetBSD__)
102
103#include <sys/types.h>
104#include <machine/bswap.h>
105#if defined(__BSWAP_RENAME) && !defined(__bswap_32)
106#define bswap_32(x) bswap32(x)
107#define bswap_64(x) bswap64(x)
108#endif
109
110#else
111
112#include <byteswap.h>
113
114#endif
115
116#ifdef WORDS_BIGENDIAN
117#define UINT4_in_expected_order(x) (bswap_32(x))
118#define UINT8_in_expected_order(x) (bswap_64(x))
119#else
120#define UINT4_in_expected_order(x) (x)
121#define UINT8_in_expected_order(x) (x)
122#endif
123
124#if !defined(LIKELY)
125#if HAVE_BUILTIN_EXPECT
126#define LIKELY(x) (__builtin_expect(!!(x), 1))
127#else
128#define LIKELY(x) (x)
129#endif
130#endif
131
132static UINT8 Fetch64(const char *p) {
134}
135
136static UINT4 Fetch32(const char *p) {
138}
139
140// Some primes between 2^63 and 2^64 for various uses.
141static const UINT8 k0 = 0xc3a5c85c97cb3127ULL;
142static const UINT8 k1 = 0xb492b66fbe98f273ULL;
143static const UINT8 k2 = 0x9ae16a3b2f90404fULL;
144
145// Magic numbers for 32-bit hashing. Copied from Murmur3.
146static const UINT4 c1 = 0xcc9e2d51;
147static const UINT4 c2 = 0x1b873593;
148
149// A 32-bit to 32-bit integer hash copied from Murmur3.
150static UINT4 fmix(UINT4 h)
151{
152 h ^= h >> 16;
153 h *= 0x85ebca6b;
154 h ^= h >> 13;
155 h *= 0xc2b2ae35;
156 h ^= h >> 16;
157 return h;
158}
159
160static UINT4 Rotate32(UINT4 val, int shift) {
161 // Avoid shifting by 32: doing so yields an undefined result.
162 return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
163}
164
165#define SWAP(a,b) {a^=b; b^=a; a^=b;}
166
167#undef PERMUTE3
168#define PERMUTE3(a, b, c) do { SWAP(a, b); SWAP(a, c); } while (0)
169
170static UINT4 Mur(UINT4 a, UINT4 h) {
171 // Helper from Murmur3 for combining two 32-bit values.
172 a *= c1;
173 a = Rotate32(a, 17);
174 a *= c2;
175 h ^= a;
176 h = Rotate32(h, 19);
177 return h * 5 + 0xe6546b64;
178}
179
180static UINT4 Hash32Len13to24(const char *s, size_t len) {
181 UINT4 a = Fetch32(s - 4 + (len >> 1));
182 UINT4 b = Fetch32(s + 4);
183 UINT4 c = Fetch32(s + len - 8);
184 UINT4 d = Fetch32(s + (len >> 1));
185 UINT4 e = Fetch32(s);
186 UINT4 f = Fetch32(s + len - 4);
187 UINT4 h = len;
188
189 return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
190}
191
192static UINT4 Hash32Len0to4(const char *s, size_t len) {
193 UINT4 b = 0;
194 UINT4 c = 9;
195 for (size_t i = 0; i < len; i++) {
196 signed char v = s[i];
197 b = b * c1 + v;
198 c ^= b;
199 }
200 return fmix(Mur(b, Mur(len, c)));
201}
202
203static UINT4 Hash32Len5to12(const char *s, size_t len) {
204 UINT4 a = len, b = len * 5, c = 9, d = b;
205 a += Fetch32(s);
206 b += Fetch32(s + len - 4);
207 c += Fetch32(s + ((len >> 1) & 4));
208 return fmix(Mur(c, Mur(b, Mur(a, d))));
209}
210
211UINT4 XLALCityHash32(const char *s, size_t len) {
212 if (len <= 24) {
213 return len <= 12 ?
214 (len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
215 Hash32Len13to24(s, len);
216 }
217
218 // len > 24
219 UINT4 h = len, g = c1 * len, f = g;
220 UINT4 a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
221 UINT4 a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
222 UINT4 a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
223 UINT4 a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
224 UINT4 a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
225 h ^= a0;
226 h = Rotate32(h, 19);
227 h = h * 5 + 0xe6546b64;
228 h ^= a2;
229 h = Rotate32(h, 19);
230 h = h * 5 + 0xe6546b64;
231 g ^= a1;
232 g = Rotate32(g, 19);
233 g = g * 5 + 0xe6546b64;
234 g ^= a3;
235 g = Rotate32(g, 19);
236 g = g * 5 + 0xe6546b64;
237 f += a4;
238 f = Rotate32(f, 19);
239 f = f * 5 + 0xe6546b64;
240 size_t iters = (len - 1) / 20;
241 do {
242 a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
243 a1 = Fetch32(s + 4);
244 a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
245 a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
246 a4 = Fetch32(s + 16);
247 h ^= a0;
248 h = Rotate32(h, 18);
249 h = h * 5 + 0xe6546b64;
250 f += a1;
251 f = Rotate32(f, 19);
252 f = f * c1;
253 g += a2;
254 g = Rotate32(g, 18);
255 g = g * 5 + 0xe6546b64;
256 h ^= a3 + a1;
257 h = Rotate32(h, 19);
258 h = h * 5 + 0xe6546b64;
259 g ^= a4;
260 g = bswap_32(g) * 5;
261 h += a4 * 5;
262 h = bswap_32(h);
263 f += a0;
264 PERMUTE3(f, h, g);
265 s += 20;
266 } while (--iters != 0);
267 g = Rotate32(g, 11) * c1;
268 g = Rotate32(g, 17) * c1;
269 f = Rotate32(f, 11) * c1;
270 f = Rotate32(f, 17) * c1;
271 h = Rotate32(h + g, 19);
272 h = h * 5 + 0xe6546b64;
273 h = Rotate32(h, 17) * c1;
274 h = Rotate32(h + f, 19);
275 h = h * 5 + 0xe6546b64;
276 h = Rotate32(h, 17) * c1;
277 return h;
278}
279
280// Bitwise right rotate. Normally this will compile to a single
281// instruction, especially if the shift is a manifest constant.
282static UINT8 Rotate(UINT8 val, int shift) {
283 // Avoid shifting by 64: doing so yields an undefined result.
284 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
285}
286
287static UINT8 ShiftMix(UINT8 val) {
288 return val ^ (val >> 47);
289}
290
292 UINT16 uv={.first=u,.second=v};
293 return Hash128to64(&uv);
294}
295
296static UINT8 HashLen16mul(UINT8 u, UINT8 v, UINT8 mul) {
297 // Murmur-inspired hashing.
298 UINT8 a = (u ^ v) * mul;
299 a ^= (a >> 47);
300 UINT8 b = (v ^ a) * mul;
301 b ^= (b >> 47);
302 b *= mul;
303 return b;
304}
305
306static UINT8 HashLen0to16(const char *s, size_t len) {
307 if (len >= 8) {
308 UINT8 mul = k2 + len * 2;
309 UINT8 a = Fetch64(s) + k2;
310 UINT8 b = Fetch64(s + len - 8);
311 UINT8 c = Rotate(b, 37) * mul + a;
312 UINT8 d = (Rotate(a, 25) + b) * mul;
313 return HashLen16mul(c, d, mul);
314 }
315 if (len >= 4) {
316 UINT8 mul = k2 + len * 2;
317 UINT8 a = Fetch32(s);
318 return HashLen16mul(len + (a << 3), Fetch32(s + len - 4), mul);
319 }
320 if (len > 0) {
321 UCHAR a = s[0];
322 UCHAR b = s[len >> 1];
323 UCHAR c = s[len - 1];
324 UINT4 y = (UINT4)(a) + ((UINT4)(b) << 8);
325 UINT4 z = len + ((UINT4)(c) << 2);
326 return ShiftMix(y * k2 ^ z * k0) * k2;
327 }
328 return k2;
329}
330
331// This probably works well for 16-byte strings as well, but it may be overkill
332// in that case.
333static UINT8 HashLen17to32(const char *s, size_t len) {
334 UINT8 mul = k2 + len * 2;
335 UINT8 a = Fetch64(s) * k1;
336 UINT8 b = Fetch64(s + 8);
337 UINT8 c = Fetch64(s + len - 8) * mul;
338 UINT8 d = Fetch64(s + len - 16) * k2;
339 return HashLen16mul(Rotate(a + b, 43) + Rotate(c, 30) + d,
340 a + Rotate(b + k2, 18) + c, mul);
341}
342
343// Return a 16-byte hash for 48 bytes. Quick and dirty.
344// Callers do best to use "random-looking" values for a and b.
346{
347 a += w;
348 b = Rotate(b + a + z, 21);
349 UINT8 c = a;
350 a += x;
351 a += y;
352 b += Rotate(a, 44);
353 UINT16 pair={.first=a+z,.second=b+c};
354 return pair;
355}
356
357// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty.
358static UINT16 WeakHashLen32WithSeeds(const char* s, UINT8 a, UINT8 b) {
360 Fetch64(s + 8),
361 Fetch64(s + 16),
362 Fetch64(s + 24),
363 a,
364 b);
365}
366
367// Return an 8-byte hash for 33 to 64 bytes.
368static UINT8 HashLen33to64(const char *s, size_t len) {
369 UINT8 mul = k2 + len * 2;
370 UINT8 a = Fetch64(s) * k2;
371 UINT8 b = Fetch64(s + 8);
372 UINT8 c = Fetch64(s + len - 24);
373 UINT8 d = Fetch64(s + len - 32);
374 UINT8 e = Fetch64(s + 16) * k2;
375 UINT8 f = Fetch64(s + 24) * 9;
376 UINT8 g = Fetch64(s + len - 8);
377 UINT8 h = Fetch64(s + len - 16) * mul;
378 UINT8 u = Rotate(a + g, 43) + (Rotate(b, 30) + c) * 9;
379 UINT8 v = ((a + g) ^ d) + f + 1;
380 UINT8 w = bswap_64((u + v) * mul) + h;
381 UINT8 x = Rotate(e + f, 42) + c;
382 UINT8 y = (bswap_64((v + w) * mul) + g) * mul;
383 UINT8 z = e + f + c;
384 a = bswap_64((x + z) * mul + y) + b;
385 b = ShiftMix((z + a) * mul + d + h) * mul;
386 return b + x;
387}
388
389UINT8 XLALCityHash64(const char *s, size_t len) {
390 if (len <= 32) {
391 if (len <= 16) {
392 return HashLen0to16(s, len);
393 } else {
394 return HashLen17to32(s, len);
395 }
396 } else if (len <= 64) {
397 return HashLen33to64(s, len);
398 }
399
400 // For strings over 64 bytes we hash the end first, and then as we
401 // loop we keep 56 bytes of state: v, w, x, y, and z.
402 UINT8 x = Fetch64(s + len - 40);
403 UINT8 y = Fetch64(s + len - 16) + Fetch64(s + len - 56);
404 UINT8 z = HashLen16(Fetch64(s + len - 48) + len, Fetch64(s + len - 24));
405 UINT16 v = WeakHashLen32WithSeeds(s + len - 64, len, z);
406 UINT16 w = WeakHashLen32WithSeeds(s + len - 32, y + k1, x);
407 x = x * k1 + Fetch64(s);
408
409 // Decrease len to the nearest multiple of 64, and operate on 64-byte chunks.
410 len = (len - 1) & ~((size_t)63);
411 do {
412 x = Rotate(x + y + v.first + Fetch64(s + 8), 37) * k1;
413 y = Rotate(y + v.second + Fetch64(s + 48), 42) * k1;
414 x ^= w.second;
415 y += v.first + Fetch64(s + 40);
416 z = Rotate(z + w.first, 33) * k1;
417 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
418 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch64(s + 16));
419 SWAP(z, x);
420 s += 64;
421 len -= 64;
422 } while (len != 0);
423 return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
424 HashLen16(v.second, w.second) + x);
425}
426
427UINT8 XLALCityHash64WithSeed(const char *s, size_t len, UINT8 seed) {
428 return XLALCityHash64WithSeeds(s, len, k2, seed);
429}
430
431UINT8 XLALCityHash64WithSeeds(const char *s, size_t len,
432 UINT8 seed0, UINT8 seed1) {
433 return HashLen16(XLALCityHash64(s, len) - seed0, seed1);
434}
static UINT8 Uint128Low64(const UINT16 *x)
Definition: LALCityHash.c:41
static UINT16 WeakHashLen32WithSeeds(const char *s, UINT8 a, UINT8 b)
Definition: LALCityHash.c:358
static UINT4 UNALIGNED_LOAD32(const char *p)
Definition: LALCityHash.c:64
static UINT16 WeakHashLen32WithSeeds_5(UINT8 w, UINT8 x, UINT8 y, UINT8 z, UINT8 a, UINT8 b)
Definition: LALCityHash.c:345
static UINT8 Fetch64(const char *p)
Definition: LALCityHash.c:132
static const UINT8 k1
Definition: LALCityHash.c:142
static UINT8 Hash128to64(const UINT16 *x)
Hash 128 input bits down to 64 bits of output.
Definition: LALCityHash.c:47
static const UINT8 k0
Definition: LALCityHash.c:141
static UINT4 Hash32Len5to12(const char *s, size_t len)
Definition: LALCityHash.c:203
static const UINT4 c1
Definition: LALCityHash.c:146
static UINT8 HashLen16(UINT8 u, UINT8 v)
Definition: LALCityHash.c:291
static UINT8 UNALIGNED_LOAD64(const char *p)
Definition: LALCityHash.c:58
static const UINT4 c2
Definition: LALCityHash.c:147
#define PERMUTE3(a, b, c)
Definition: LALCityHash.c:168
static UINT4 Hash32Len13to24(const char *s, size_t len)
Definition: LALCityHash.c:180
static UINT8 HashLen17to32(const char *s, size_t len)
Definition: LALCityHash.c:333
static UINT8 HashLen33to64(const char *s, size_t len)
Definition: LALCityHash.c:368
static UINT4 Mur(UINT4 a, UINT4 h)
Definition: LALCityHash.c:170
static UINT4 Hash32Len0to4(const char *s, size_t len)
Definition: LALCityHash.c:192
static UINT4 Fetch32(const char *p)
Definition: LALCityHash.c:136
static UINT8 ShiftMix(UINT8 val)
Definition: LALCityHash.c:287
static const UINT8 k2
Definition: LALCityHash.c:143
#define SWAP(a, b)
Definition: LALCityHash.c:165
static UINT8 Uint128High64(const UINT16 *x)
Definition: LALCityHash.c:42
#define UINT4_in_expected_order(x)
Definition: LALCityHash.c:120
static UINT8 HashLen0to16(const char *s, size_t len)
Definition: LALCityHash.c:306
#define UINT8_in_expected_order(x)
Definition: LALCityHash.c:121
static UINT4 fmix(UINT4 h)
Definition: LALCityHash.c:150
static UINT4 Rotate32(UINT4 val, int shift)
Definition: LALCityHash.c:160
static UINT8 Rotate(UINT8 val, int shift)
Definition: LALCityHash.c:282
static UINT8 HashLen16mul(UINT8 u, UINT8 v, UINT8 mul)
Definition: LALCityHash.c:296
static double f(double theta, double y, double xi)
Definition: XLALMarcumQ.c:258
unsigned char UCHAR
One-byte unsigned integer, see Headers LAL(Atomic)Datatypes.h for more details.
uint64_t UINT8
Eight-byte unsigned integer; on some platforms this is equivalent to unsigned long int instead.
uint32_t UINT4
Four-byte unsigned integer.
UINT8 XLALCityHash64WithSeeds(const char *s, size_t len, UINT8 seed0, UINT8 seed1)
Hash function for a byte array.
Definition: LALCityHash.c:431
UINT8 XLALCityHash64(const char *s, size_t len)
Hash function for a byte array.
Definition: LALCityHash.c:389
UINT4 XLALCityHash32(const char *s, size_t len)
Hash function for a byte array.
Definition: LALCityHash.c:211
UINT8 XLALCityHash64WithSeed(const char *s, size_t len, UINT8 seed)
Hash function for a byte array.
Definition: LALCityHash.c:427
static const INT4 a
Definition: Random.c:79
UINT8 first
Definition: LALCityHash.c:39
UINT8 second
Definition: LALCityHash.c:39