1 //===- StringHash.h -------------------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #ifndef MCLD_STRING_HASH_FUNCTION_H 10 #define MCLD_STRING_HASH_FUNCTION_H 11 #ifdef ENABLE_UNITTEST 12 #include <gtest.h> 13 #endif 14 #include <llvm/ADT/StringRef.h> 15 #include <llvm/Support/DataTypes.h> 16 #include <llvm/Support/ErrorHandling.h> 17 #include <cctype> 18 #include <functional> 19 20 namespace mcld { 21 namespace hash { 22 23 enum Type { 24 RS, 25 JS, 26 PJW, 27 ELF, 28 BKDR, 29 SDBM, 30 DJB, 31 DEK, 32 BP, 33 FNV, 34 AP, 35 ES 36 }; 37 38 /** \class template<uint32_t TYPE> StringHash 39 * \brief the template StringHash class, for specification 40 */ 41 template<uint32_t TYPE> 42 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t> 43 { operatorStringHash44 uint32_t operator()(const llvm::StringRef& pKey) const 45 { 46 llvm::report_fatal_error("Undefined StringHash function.\n"); 47 } 48 }; 49 50 /** \class StringHash<RSHash> 51 * \brief RS StringHash funciton 52 */ 53 template<> 54 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t> 55 { 56 uint32_t operator()(const llvm::StringRef& pKey) const 57 { 58 const unsigned int b = 378551; 59 uint32_t a = 63689; 60 uint32_t hash_val = 0; 61 62 for(unsigned int i = 0; i < pKey.size(); ++i) { 63 hash_val = hash_val * a + pKey[i]; 64 a = a * b; 65 } 66 return hash_val; 67 } 68 }; 69 70 /** \class StringHash<JSHash> 71 * \brief JS hash funciton 72 */ 73 template<> 74 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t> 75 { 76 uint32_t operator()(const llvm::StringRef& pKey) const 77 { 78 uint32_t hash_val = 1315423911; 79 80 for(unsigned int i = 0; i < pKey.size(); ++i) { 81 hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2)); 82 } 83 return hash_val; 84 } 85 }; 86 87 /** \class StringHash<PJW> 88 * \brief P.J. Weinberger hash function 89 */ 90 template<> 91 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t> 92 { 93 uint32_t operator()(const llvm::StringRef& pKey) const 94 { 95 const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8); 96 const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4); 97 const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8); 98 const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); 99 uint32_t hash_val = 0; 100 uint32_t test = 0; 101 102 for(unsigned int i = 0; i < pKey.size(); ++i) { 103 hash_val = (hash_val << OneEighth) + pKey[i]; 104 105 if((test = hash_val & HighBits) != 0) { 106 hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits)); 107 } 108 } 109 return hash_val; 110 } 111 }; 112 113 /** \class StringHash<ELF> 114 * \brief ELF hash function. 115 */ 116 template<> 117 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t> 118 { 119 uint32_t operator()(const llvm::StringRef& pKey) const 120 { 121 uint32_t hash_val = 0; 122 uint32_t x = 0; 123 124 for (unsigned int i = 0; i < pKey.size(); ++i) { 125 hash_val = (hash_val << 4) + pKey[i]; 126 if((x = hash_val & 0xF0000000L) != 0) 127 hash_val ^= (x >> 24); 128 hash_val &= ~x; 129 } 130 return hash_val; 131 } 132 }; 133 134 /** \class StringHash<BKDR> 135 * \brief BKDR hash function 136 */ 137 template<> 138 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t> 139 { 140 uint32_t operator()(const llvm::StringRef& pKey) const 141 { 142 const uint32_t seed = 131; 143 uint32_t hash_val = 0; 144 145 for(uint32_t i = 0; i < pKey.size(); ++i) 146 hash_val = (hash_val * seed) + pKey[i]; 147 return hash_val; 148 } 149 }; 150 151 152 /** \class StringHash<SDBM> 153 * \brief SDBM hash function 154 * 0.049s in 100000 test 155 */ 156 template<> 157 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t> 158 { 159 uint32_t operator()(const llvm::StringRef& pKey) const 160 { 161 uint32_t hash_val = 0; 162 163 for(uint32_t i = 0; i < pKey.size(); ++i) 164 hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val; 165 return hash_val; 166 } 167 }; 168 169 /** \class StringHash<DJB> 170 * \brief DJB hash function 171 * 0.057s in 100000 test 172 */ 173 template<> 174 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t> 175 { 176 uint32_t operator()(const llvm::StringRef& pKey) const 177 { 178 uint32_t hash_val = 5381; 179 180 for(uint32_t i = 0; i < pKey.size(); ++i) 181 hash_val = ((hash_val << 5) + hash_val) + pKey[i]; 182 183 return hash_val; 184 } 185 }; 186 187 /** \class StringHash<DEK> 188 * \brief DEK hash function 189 * 0.60s 190 */ 191 template<> 192 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t> 193 { 194 uint32_t operator()(const llvm::StringRef& pKey) const 195 { 196 uint32_t hash_val = pKey.size(); 197 198 for(uint32_t i = 0; i < pKey.size(); ++i) 199 hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i]; 200 201 return hash_val; 202 } 203 }; 204 205 /** \class StringHash<BP> 206 * \brief BP hash function 207 * 0.057s 208 */ 209 template<> 210 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t> 211 { 212 uint32_t operator()(const llvm::StringRef& pKey) const 213 { 214 uint32_t hash_val = 0; 215 for(uint32_t i = 0; i < pKey.size(); ++i) 216 hash_val = hash_val << 7 ^ pKey[i]; 217 218 return hash_val; 219 } 220 }; 221 222 /** \class StringHash<FNV> 223 * \brief FNV hash function 224 * 0.058s 225 */ 226 template<> 227 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t> 228 { 229 uint32_t operator()(const llvm::StringRef& pKey) const 230 { 231 const uint32_t fnv_prime = 0x811C9DC5; 232 uint32_t hash_val = 0; 233 for(uint32_t i = 0; i < pKey.size(); ++i) { 234 hash_val *= fnv_prime; 235 hash_val ^= pKey[i]; 236 } 237 238 return hash_val; 239 } 240 }; 241 242 /** \class StringHash<AP> 243 * \brief AP hash function 244 * 0.060s 245 */ 246 template<> 247 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t> 248 { 249 uint32_t operator()(const llvm::StringRef& pKey) const 250 { 251 unsigned int hash_val = 0xAAAAAAAA; 252 253 for(uint32_t i = 0; i < pKey.size(); ++i) { 254 hash_val ^= ((i & 1) == 0)? 255 ((hash_val << 7) ^ pKey[i] * (hash_val >> 3)): 256 (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5)))); 257 } 258 259 return hash_val; 260 } 261 }; 262 263 /** \class StringHash<ES> 264 * \brief This is a revision of Edward Sayers' string characteristic function. 265 * 266 * 31-28 27 26 25 - 0 267 * +----+---+---+------------+ 268 * | . | N | - | a/A ~ z/Z | 269 * +----+---+---+------------+ 270 * 271 * . (bit 31~28) - The number of '.' characters 272 * N (bit 27) - Are there any numbers in the string 273 * - (bit 26) - Does the string have '-' character 274 * bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and 275 * Bit 24 is set only if ... 'b' or 'B', ... 276 */ 277 template<> 278 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t> 279 { 280 uint32_t operator()(const llvm::StringRef& pString) const 281 { 282 uint32_t result = 0x0; 283 unsigned int dot = 0; 284 std::string::size_type idx; 285 for (idx = 0; idx < pString.size(); ++idx) { 286 int cur_char = pString[idx]; 287 288 if ('.' == cur_char) { 289 ++dot; 290 continue; 291 } 292 293 if (isdigit(cur_char)) { 294 result |= (1 << 27); 295 continue; 296 } 297 298 if ('_' == cur_char) { 299 result |= (1 << 26); 300 continue; 301 } 302 303 if (isupper(cur_char)) { 304 result |= (1 << (cur_char - 'A')); 305 continue; 306 } 307 308 if (islower(cur_char)) { 309 result |= (1 << (cur_char - 'a')); 310 continue; 311 } 312 } 313 result |= (dot << 28); 314 return result; 315 } 316 317 318 /** \func may_include 319 * \brief is it possible that pRule is a sub-string of pInString 320 */ 321 static bool may_include(uint32_t pRule, uint32_t pInString) 322 { 323 uint32_t in_c = pInString << 4; 324 uint32_t r_c = pRule << 4; 325 326 uint32_t res = (in_c ^ r_c) & r_c; 327 if (0 != res) 328 return false; 329 330 uint32_t in_dot = pInString >> 28; 331 uint32_t r_dot = pRule >> 28; 332 if (r_dot > in_dot) 333 return false; 334 335 return true; 336 } 337 }; 338 339 /** \class template<uint32_t TYPE> StringCompare 340 * \brief the template StringCompare class, for specification 341 */ 342 template<typename STRING_TYPE> 343 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool> 344 { 345 bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const 346 { return X == Y; } 347 }; 348 349 template<> 350 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool> 351 { 352 bool operator()(const char* X, const char* Y) const 353 { return (0 == std::strcmp(X, Y)); } 354 }; 355 356 template<> 357 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool> 358 { 359 bool operator()(const char* X, const char* Y) const 360 { return (0 == std::strcmp(X, Y)); } 361 }; 362 363 } // namespace of hash 364 } // namespace of mcld 365 366 #endif 367 368