1From eb130493c8042280a01e03c28bb89bd5ae0c5d18 Mon Sep 17 00:00:00 2001 2From: Kaixi Hou <kaixih@nvidia.com> 3Date: Tue, 23 Mar 2021 12:49:18 -0700 4Subject: [PATCH] Add device modifiers for GPUs 5 6--- 7 src/{farmhash.cc => farmhash_gpu.h} | 95 +++++++++++++++++++++++------ 8 1 file changed, 75 insertions(+), 20 deletions(-) 9 rename src/{farmhash.cc => farmhash_gpu.h} (99%) 10 11diff --git a/src/farmhash.cc b/src/farmhash_gpu.h 12similarity index 99% 13rename from src/farmhash.cc 14rename to src/farmhash_gpu.h 15index cfd4a47..50994b6 100644 16--- a/src/farmhash.cc 17+++ b/src/farmhash_gpu.h 18@@ -20,6 +20,17 @@ 19 // 20 // FarmHash, by Geoff Pike 21 22+#ifndef FARM_HASH_GPU_H_ 23+#define FARM_HASH_GPU_H_ 24+ 25+#include <cstdint> 26+#include <string.h> // for memcpy and memset 27+ 28+#define NAMESPACE_FOR_HASH_FUNCTIONS_GPU util_gpu 29+#define DEVICE_MODIFIER __device__ __host__ 30+ 31+// We use DEVICE_MODIFIER to remove those code unused by GPUs. 32+#ifndef DEVICE_MODIFIER 33 #include "farmhash.h" 34 // FARMHASH ASSUMPTIONS: Modify as needed, or use -DFARMHASH_ASSUME_SSE42 etc. 35 // Note that if you use -DFARMHASH_ASSUME_SSE42 you likely need -msse42 36@@ -187,7 +198,14 @@ 37 #define uint64_in_expected_order(x) (x) 38 #endif 39 40-namespace NAMESPACE_FOR_HASH_FUNCTIONS { 41+#endif // DEVICE_MODIFIER 42+ 43+#define uint32_in_expected_order(x) (x) 44+#define uint64_in_expected_order(x) (x) 45+ 46+#define STATIC_INLINE DEVICE_MODIFIER inline 47+ 48+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU { 49 50 STATIC_INLINE uint64_t Fetch64(const char *p) { 51 uint64_t result; 52@@ -201,6 +219,7 @@ STATIC_INLINE uint32_t Fetch32(const char *p) { 53 return uint32_in_expected_order(result); 54 } 55 56+#ifndef DEVICE_MODIFIER 57 STATIC_INLINE uint32_t Bswap32(uint32_t val) { return bswap_32(val); } 58 STATIC_INLINE uint64_t Bswap64(uint64_t val) { return bswap_64(val); } 59 60@@ -210,12 +229,14 @@ STATIC_INLINE uint32_t BasicRotate32(uint32_t val, int shift) { 61 // Avoid shifting by 32: doing so yields an undefined result. 62 return shift == 0 ? val : ((val >> shift) | (val << (32 - shift))); 63 } 64+#endif // DEVICE_MODIFIER 65 66 STATIC_INLINE uint64_t BasicRotate64(uint64_t val, int shift) { 67 // Avoid shifting by 64: doing so yields an undefined result. 68 return shift == 0 ? val : ((val >> shift) | (val << (64 - shift))); 69 } 70 71+#ifndef DEVICE_MODIFIER 72 #if defined(_WIN32) && defined(FARMHASH_ROTR) 73 74 STATIC_INLINE uint32_t Rotate32(uint32_t val, int shift) { 75@@ -240,12 +261,18 @@ STATIC_INLINE uint64_t Rotate64(uint64_t val, int shift) { 76 } 77 78 #endif 79+#endif // DEVICE_MODIFIER 80 81-} // namespace NAMESPACE_FOR_HASH_FUNCTIONS 82+STATIC_INLINE uint64_t Rotate64(uint64_t val, int shift) { 83+ return BasicRotate64(val, shift); 84+} 85+ 86+} // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU 87 88 // FARMHASH PORTABILITY LAYER: debug mode or max speed? 89 // One may use -DFARMHASH_DEBUG=1 or -DFARMHASH_DEBUG=0 to force the issue. 90 91+#ifndef DEVICE_MODIFIER 92 #if !defined(FARMHASH_DEBUG) && (!defined(NDEBUG) || defined(_DEBUG)) 93 #define FARMHASH_DEBUG 1 94 #endif 95@@ -345,14 +372,21 @@ STATIC_INLINE __m128i Fetch128(const char* s) { 96 97 #undef PERMUTE3 98 #define PERMUTE3(a, b, c) do { std::swap(a, b); std::swap(a, c); } while (0) 99+#endif // DEVICE_MODIFIER 100+ 101+struct Pair { 102+ uint64_t first; 103+ uint64_t second; 104+}; 105 106-namespace NAMESPACE_FOR_HASH_FUNCTIONS { 107+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU { 108 109 // Some primes between 2^63 and 2^64 for various uses. 110 static const uint64_t k0 = 0xc3a5c85c97cb3127ULL; 111 static const uint64_t k1 = 0xb492b66fbe98f273ULL; 112 static const uint64_t k2 = 0x9ae16a3b2f90404fULL; 113 114+#ifndef DEVICE_MODIFIER 115 // Magic numbers for 32-bit hashing. Copied from Murmur3. 116 static const uint32_t c1 = 0xcc9e2d51; 117 static const uint32_t c2 = 0x1b873593; 118@@ -399,28 +433,34 @@ template <> uint128_t DebugTweak(uint128_t x) { 119 } 120 return x; 121 } 122+#endif // DEVICE_MODIFIER 123+} // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU 124 125-} // namespace NAMESPACE_FOR_HASH_FUNCTIONS 126- 127+#ifndef DEVICE_MODIFIER 128 using namespace std; 129-using namespace NAMESPACE_FOR_HASH_FUNCTIONS; 130-namespace farmhashna { 131+#endif // DEVICE_MODIFIER 132+using namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU; 133+namespace farmhashna_gpu { 134 #undef Fetch 135 #define Fetch Fetch64 136 137 #undef Rotate 138 #define Rotate Rotate64 139 140+#ifndef DEVICE_MODIFIER 141 #undef Bswap 142 #define Bswap Bswap64 143+#endif // DEVICE_MODIFIER 144 145 STATIC_INLINE uint64_t ShiftMix(uint64_t val) { 146 return val ^ (val >> 47); 147 } 148 149+#ifndef DEVICE_MODIFIER 150 STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) { 151 return Hash128to64(Uint128(u, v)); 152 } 153+#endif // DEVICE_MODIFIER 154 155 STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { 156 // Murmur-inspired hashing. 157@@ -471,7 +511,7 @@ STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) { 158 159 // Return a 16-byte hash for 48 bytes. Quick and dirty. 160 // Callers do best to use "random-looking" values for a and b. 161-STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( 162+STATIC_INLINE Pair WeakHashLen32WithSeeds( 163 uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) { 164 a += w; 165 b = Rotate(b + a + z, 21); 166@@ -479,11 +519,11 @@ STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( 167 a += x; 168 a += y; 169 b += Rotate(a, 44); 170- return make_pair(a + z, b + c); 171+ return Pair{a + z, b + c}; 172 } 173 174 // Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. 175-STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( 176+STATIC_INLINE Pair WeakHashLen32WithSeeds( 177 const char* s, uint64_t a, uint64_t b) { 178 return WeakHashLen32WithSeeds(Fetch(s), 179 Fetch(s + 8), 180@@ -510,7 +550,7 @@ STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) { 181 e + Rotate(f + a, 18) + g, mul); 182 } 183 184-uint64_t Hash64(const char *s, size_t len) { 185+DEVICE_MODIFIER uint64_t Hash64(const char *s, size_t len) { 186 const uint64_t seed = 81; 187 if (len <= 32) { 188 if (len <= 16) { 189@@ -527,8 +567,8 @@ uint64_t Hash64(const char *s, size_t len) { 190 uint64_t x = seed; 191 uint64_t y = seed * k1 + 113; 192 uint64_t z = ShiftMix(y * k2 + 113) * k2; 193- pair<uint64_t, uint64_t> v = make_pair(0, 0); 194- pair<uint64_t, uint64_t> w = make_pair(0, 0); 195+ Pair v = {0, 0}; 196+ Pair w = {0, 0}; 197 x = x * k2 + Fetch(s); 198 199 // Set end so that after the loop we have 1 to 64 bytes left to process. 200@@ -543,7 +583,9 @@ uint64_t Hash64(const char *s, size_t len) { 201 z = Rotate(z + w.first, 33) * k1; 202 v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); 203 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); 204- std::swap(z, x); 205+ auto tmp = z; 206+ z = x; 207+ x = tmp; 208 s += 64; 209 } while (s != end); 210 uint64_t mul = k1 + ((z & 0xff) << 1); 211@@ -559,12 +601,15 @@ uint64_t Hash64(const char *s, size_t len) { 212 z = Rotate(z + w.first, 33) * mul; 213 v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first); 214 w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); 215- std::swap(z, x); 216+ auto tmp = z; 217+ z = x; 218+ x = tmp; 219 return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z, 220 HashLen16(v.second, w.second, mul) + x, 221 mul); 222 } 223 224+#ifndef DEVICE_MODIFIER 225 uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1); 226 227 uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) { 228@@ -574,7 +619,9 @@ uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) { 229 uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) { 230 return HashLen16(Hash64(s, len) - seed0, seed1); 231 } 232-} // namespace farmhashna 233+#endif // DEVICE_MODIFIER 234+} // namespace farmhashna_gpu 235+#ifndef DEVICE_MODIFIER 236 namespace farmhashuo { 237 #undef Fetch 238 #define Fetch Fetch64 239@@ -1864,8 +1911,10 @@ uint128_t Fingerprint128(const char* s, size_t len) { 240 return CityHash128(s, len); 241 } 242 } // namespace farmhashcc 243-namespace NAMESPACE_FOR_HASH_FUNCTIONS { 244+#endif // DEVICE_MODIFIER 245+namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU { 246 247+#ifndef DEVICE_MODIFIER 248 // BASIC STRING HASHING 249 250 // Hash function for a byte array. See also Hash(), below. 251@@ -1948,12 +1997,14 @@ uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed) { 252 uint32_t Fingerprint32(const char* s, size_t len) { 253 return farmhashmk::Hash32(s, len); 254 } 255+#endif // DEVICE_MODIFIER 256 257 // Fingerprint function for a byte array. 258-uint64_t Fingerprint64(const char* s, size_t len) { 259- return farmhashna::Hash64(s, len); 260+DEVICE_MODIFIER uint64_t Fingerprint64(const char* s, size_t len) { 261+ return farmhashna_gpu::Hash64(s, len); 262 } 263 264+#ifndef DEVICE_MODIFIER 265 // Fingerprint function for a byte array. 266 uint128_t Fingerprint128(const char* s, size_t len) { 267 return farmhashcc::Fingerprint128(s, len); 268@@ -1961,9 +2012,11 @@ uint128_t Fingerprint128(const char* s, size_t len) { 269 270 // Older and still available but perhaps not as fast as the above: 271 // farmhashns::Hash32{,WithSeed}() 272+#endif // DEVICE_MODIFIER 273 274-} // namespace NAMESPACE_FOR_HASH_FUNCTIONS 275+} // namespace NAMESPACE_FOR_HASH_FUNCTIONS_GPU 276 277+#ifndef DEVICE_MODIFIER 278 #if FARMHASHSELFTEST 279 280 #ifndef FARMHASH_SELF_TEST_GUARD 281@@ -11829,3 +11882,5 @@ int main() { 282 } 283 284 #endif // FARMHASHSELFTEST 285+#endif // DEVICE_MODIFIER 286+#endif // FARM_HASH_GPU_H_ 287-- 2882.17.1 289 290