• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1From eb130493c8042280a01e03c28bb89bd5ae0c5d18 Mon Sep 17 00:00:00 2001
2From: Kaixi Hou <kaixih@nvidia.com>
3Date: Tue, 23 Mar 2021 12:49:18 -0700
4Subject: [PATCH] Add device modifiers for GPUs
5
6---
7 src/{farmhash.cc => farmhash_gpu.h} | 95 +++++++++++++++++++++++------
8 1 file changed, 75 insertions(+), 20 deletions(-)
9 rename src/{farmhash.cc => farmhash_gpu.h} (99%)
10
11diff --git a/src/farmhash.cc b/src/farmhash_gpu.h
12similarity index 99%
13rename from src/farmhash.cc
14rename to src/farmhash_gpu.h
15index cfd4a47..50994b6 100644
16--- a/src/farmhash.cc
17+++ b/src/farmhash_gpu.h
18@@ -20,6 +20,17 @@
19 //
20 // FarmHash, by Geoff Pike
21
22+#ifndef FARM_HASH_GPU_H_
23+#define FARM_HASH_GPU_H_
24+
25+#include <cstdint>
26+#include <string.h>   // for memcpy and memset
27+
28+#define NAMESPACE_FOR_HASH_FUNCTIONS_GPU util_gpu
29+#define DEVICE_MODIFIER __device__ __host__
30+
31+// We use DEVICE_MODIFIER to remove those code unused by GPUs.
32+#ifndef DEVICE_MODIFIER
33 #include "farmhash.h"
34 // FARMHASH ASSUMPTIONS: Modify as needed, or use -DFARMHASH_ASSUME_SSE42 etc.
35 // Note that if you use -DFARMHASH_ASSUME_SSE42 you likely need -msse42
36@@ -187,7 +198,14 @@
37 #define uint64_in_expected_order(x) (x)
38 #endif
39
40-namespace NAMESPACE_FOR_HASH_FUNCTIONS {
41+#endif // DEVICE_MODIFIER
42+
43+#define uint32_in_expected_order(x) (x)
44+#define uint64_in_expected_order(x) (x)
45+
46+#define STATIC_INLINE DEVICE_MODIFIER inline
47+
48+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU {
49
50 STATIC_INLINE uint64_t Fetch64(const char *p) {
51   uint64_t result;
52@@ -201,6 +219,7 @@ STATIC_INLINE uint32_t Fetch32(const char *p) {
53   return uint32_in_expected_order(result);
54 }
55
56+#ifndef DEVICE_MODIFIER
57 STATIC_INLINE uint32_t Bswap32(uint32_t val) { return bswap_32(val); }
58 STATIC_INLINE uint64_t Bswap64(uint64_t val) { return bswap_64(val); }
59
60@@ -210,12 +229,14 @@ STATIC_INLINE uint32_t BasicRotate32(uint32_t val, int shift) {
61   // Avoid shifting by 32: doing so yields an undefined result.
62   return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
63 }
64+#endif // DEVICE_MODIFIER
65
66 STATIC_INLINE uint64_t BasicRotate64(uint64_t val, int shift) {
67   // Avoid shifting by 64: doing so yields an undefined result.
68   return shift == 0 ? val : ((val >> shift) | (val << (64 - shift)));
69 }
70
71+#ifndef DEVICE_MODIFIER
72 #if defined(_WIN32) && defined(FARMHASH_ROTR)
73
74 STATIC_INLINE uint32_t Rotate32(uint32_t val, int shift) {
75@@ -240,12 +261,18 @@ STATIC_INLINE uint64_t Rotate64(uint64_t val, int shift) {
76 }
77
78 #endif
79+#endif // DEVICE_MODIFIER
80
81-}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
82+STATIC_INLINE uint64_t Rotate64(uint64_t val, int shift) {
83+  return BasicRotate64(val, shift);
84+}
85+
86+}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU
87
88 // FARMHASH PORTABILITY LAYER: debug mode or max speed?
89 // One may use -DFARMHASH_DEBUG=1 or -DFARMHASH_DEBUG=0 to force the issue.
90
91+#ifndef DEVICE_MODIFIER
92 #if !defined(FARMHASH_DEBUG) && (!defined(NDEBUG) || defined(_DEBUG))
93 #define FARMHASH_DEBUG 1
94 #endif
95@@ -345,14 +372,21 @@ STATIC_INLINE __m128i Fetch128(const char* s) {
96
97 #undef PERMUTE3
98 #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0)
99+#endif // DEVICE_MODIFIER
100+
101+struct Pair {
102+  uint64_t first;
103+  uint64_t second;
104+};
105
106-namespace NAMESPACE_FOR_HASH_FUNCTIONS {
107+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU {
108
109 // Some primes between 2^63 and 2^64 for various uses.
110 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL;
111 static const uint64_t k1 = 0xb492b66fbe98f273ULL;
112 static const uint64_t k2 = 0x9ae16a3b2f90404fULL;
113
114+#ifndef DEVICE_MODIFIER
115 // Magic numbers for 32-bit hashing.  Copied from Murmur3.
116 static const uint32_t c1 = 0xcc9e2d51;
117 static const uint32_t c2 = 0x1b873593;
118@@ -399,28 +433,34 @@ template <> uint128_t DebugTweak(uint128_t x) {
119   }
120   return x;
121 }
122+#endif // DEVICE_MODIFIER
123+}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU
124
125-}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
126-
127+#ifndef DEVICE_MODIFIER
128 using namespace std;
129-using namespace NAMESPACE_FOR_HASH_FUNCTIONS;
130-namespace farmhashna {
131+#endif // DEVICE_MODIFIER
132+using namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU;
133+namespace farmhashna_gpu {
134 #undef Fetch
135 #define Fetch Fetch64
136
137 #undef Rotate
138 #define Rotate Rotate64
139
140+#ifndef DEVICE_MODIFIER
141 #undef Bswap
142 #define Bswap Bswap64
143+#endif // DEVICE_MODIFIER
144
145 STATIC_INLINE uint64_t ShiftMix(uint64_t val) {
146   return val ^ (val >> 47);
147 }
148
149+#ifndef DEVICE_MODIFIER
150 STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) {
151   return Hash128to64(Uint128(u, v));
152 }
153+#endif // DEVICE_MODIFIER
154
155 STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) {
156   // Murmur-inspired hashing.
157@@ -471,7 +511,7 @@ STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) {
158
159 // Return a 16-byte hash for 48 bytes.  Quick and dirty.
160 // Callers do best to use "random-looking" values for a and b.
161-STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
162+STATIC_INLINE Pair WeakHashLen32WithSeeds(
163     uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) {
164   a += w;
165   b = Rotate(b + a + z, 21);
166@@ -479,11 +519,11 @@ STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
167   a += x;
168   a += y;
169   b += Rotate(a, 44);
170-  return make_pair(a + z, b + c);
171+  return Pair{a + z, b + c};
172 }
173
174 // Return a 16-byte hash for s[0] ... s[31], a, and b.  Quick and dirty.
175-STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds(
176+STATIC_INLINE Pair WeakHashLen32WithSeeds(
177     const char* s, uint64_t a, uint64_t b) {
178   return WeakHashLen32WithSeeds(Fetch(s),
179                                 Fetch(s + 8),
180@@ -510,7 +550,7 @@ STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) {
181                    e + Rotate(f + a, 18) + g, mul);
182 }
183
184-uint64_t Hash64(const char *s, size_t len) {
185+DEVICE_MODIFIER uint64_t Hash64(const char *s, size_t len) {
186   const uint64_t seed = 81;
187   if (len <= 32) {
188     if (len <= 16) {
189@@ -527,8 +567,8 @@ uint64_t Hash64(const char *s, size_t len) {
190   uint64_t x = seed;
191   uint64_t y = seed * k1 + 113;
192   uint64_t z = ShiftMix(y * k2 + 113) * k2;
193-  pair<uint64_t, uint64_t> v = make_pair(0, 0);
194-  pair<uint64_t, uint64_t> w = make_pair(0, 0);
195+  Pair v = {0, 0};
196+  Pair w = {0, 0};
197   x = x * k2 + Fetch(s);
198
199   // Set end so that after the loop we have 1 to 64 bytes left to process.
200@@ -543,7 +583,9 @@ uint64_t Hash64(const char *s, size_t len) {
201     z = Rotate(z + w.first, 33) * k1;
202     v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
203     w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16));
204-    std::swap(z, x);
205+    auto tmp = z;
206+    z = x;
207+    x = tmp;
208     s += 64;
209   } while (s != end);
210   uint64_t mul = k1 + ((z & 0xff) << 1);
211@@ -559,12 +601,15 @@ uint64_t Hash64(const char *s, size_t len) {
212   z = Rotate(z + w.first, 33) * mul;
213   v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first);
214   w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16));
215-  std::swap(z, x);
216+  auto tmp = z;
217+  z = x;
218+  x = tmp;
219   return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z,
220                    HashLen16(v.second, w.second, mul) + x,
221                    mul);
222 }
223
224+#ifndef DEVICE_MODIFIER
225 uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1);
226
227 uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) {
228@@ -574,7 +619,9 @@ uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) {
229 uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) {
230   return HashLen16(Hash64(s, len) - seed0, seed1);
231 }
232-}  // namespace farmhashna
233+#endif // DEVICE_MODIFIER
234+}  // namespace farmhashna_gpu
235+#ifndef DEVICE_MODIFIER
236 namespace farmhashuo {
237 #undef Fetch
238 #define Fetch Fetch64
239@@ -1864,8 +1911,10 @@ uint128_t Fingerprint128(const char* s, size_t len) {
240   return CityHash128(s, len);
241 }
242 }  // namespace farmhashcc
243-namespace NAMESPACE_FOR_HASH_FUNCTIONS {
244+#endif // DEVICE_MODIFIER
245+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU {
246
247+#ifndef DEVICE_MODIFIER
248 // BASIC STRING HASHING
249
250 // Hash function for a byte array.  See also Hash(), below.
251@@ -1948,12 +1997,14 @@ uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed) {
252 uint32_t Fingerprint32(const char* s, size_t len) {
253   return farmhashmk::Hash32(s, len);
254 }
255+#endif // DEVICE_MODIFIER
256
257 // Fingerprint function for a byte array.
258-uint64_t Fingerprint64(const char* s, size_t len) {
259-  return farmhashna::Hash64(s, len);
260+DEVICE_MODIFIER uint64_t Fingerprint64(const char* s, size_t len) {
261+  return farmhashna_gpu::Hash64(s, len);
262 }
263
264+#ifndef DEVICE_MODIFIER
265 // Fingerprint function for a byte array.
266 uint128_t Fingerprint128(const char* s, size_t len) {
267   return farmhashcc::Fingerprint128(s, len);
268@@ -1961,9 +2012,11 @@ uint128_t Fingerprint128(const char* s, size_t len) {
269
270 // Older and still available but perhaps not as fast as the above:
271 //   farmhashns::Hash32{,WithSeed}()
272+#endif // DEVICE_MODIFIER
273
274-}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
275+}  // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU
276
277+#ifndef DEVICE_MODIFIER
278 #if FARMHASHSELFTEST
279
280 #ifndef FARMHASH_SELF_TEST_GUARD
281@@ -11829,3 +11882,5 @@ int main() {
282 }
283
284 #endif  // FARMHASHSELFTEST
285+#endif // DEVICE_MODIFIER
286+#endif // FARM_HASH_GPU_H_
287--
2882.17.1
289
290