1 //===- StringHash.h -------------------------------------------------------===// 2 // 3 // The MCLinker Project 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #ifndef MCLD_ADT_STRINGHASH_H 10 #define MCLD_ADT_STRINGHASH_H 11 #include <llvm/ADT/StringRef.h> 12 #include <llvm/Support/DataTypes.h> 13 #include <cctype> 14 #include <cassert> 15 #include <functional> 16 17 namespace mcld { 18 namespace hash { 19 20 enum Type { 21 RS, 22 JS, 23 PJW, 24 ELF, 25 BKDR, 26 SDBM, 27 DJB, 28 DEK, 29 BP, 30 FNV, 31 AP, 32 ES 33 }; 34 35 /** \class template<uint32_t TYPE> StringHash 36 * \brief the template StringHash class, for specification 37 */ 38 template<uint32_t TYPE> 39 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t> 40 { operatorStringHash41 uint32_t operator()(const llvm::StringRef& pKey) const 42 { 43 assert(false && "Undefined StringHash function.\n"); 44 return 0; 45 } 46 }; 47 48 /** \class StringHash<RSHash> 49 * \brief RS StringHash funciton 50 */ 51 template<> 52 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t> 53 { 54 uint32_t operator()(const llvm::StringRef& pKey) const 55 { 56 const unsigned int b = 378551; 57 uint32_t a = 63689; 58 uint32_t hash_val = 0; 59 60 for(unsigned int i = 0; i < pKey.size(); ++i) { 61 hash_val = hash_val * a + pKey[i]; 62 a = a * b; 63 } 64 return hash_val; 65 } 66 }; 67 68 /** \class StringHash<JSHash> 69 * \brief JS hash funciton 70 */ 71 template<> 72 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t> 73 { 74 uint32_t operator()(const llvm::StringRef& pKey) const 75 { 76 uint32_t hash_val = 1315423911; 77 78 for(unsigned int i = 0; i < pKey.size(); ++i) { 79 hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2)); 80 } 81 return hash_val; 82 } 83 }; 84 85 /** \class StringHash<PJW> 86 * \brief P.J. Weinberger hash function 87 */ 88 template<> 89 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t> 90 { 91 uint32_t operator()(const llvm::StringRef& pKey) const 92 { 93 const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8); 94 const unsigned int ThreeQuarters = (unsigned int)((BitsInUnsignedInt * 3) / 4); 95 const unsigned int OneEighth = (unsigned int)(BitsInUnsignedInt / 8); 96 const unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); 97 uint32_t hash_val = 0; 98 uint32_t test = 0; 99 100 for(unsigned int i = 0; i < pKey.size(); ++i) { 101 hash_val = (hash_val << OneEighth) + pKey[i]; 102 103 if((test = hash_val & HighBits) != 0) { 104 hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits)); 105 } 106 } 107 return hash_val; 108 } 109 }; 110 111 /** \class StringHash<ELF> 112 * \brief ELF hash function. 113 */ 114 template<> 115 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t> 116 { 117 uint32_t operator()(const llvm::StringRef& pKey) const 118 { 119 uint32_t hash_val = 0; 120 uint32_t x = 0; 121 122 for (unsigned int i = 0; i < pKey.size(); ++i) { 123 hash_val = (hash_val << 4) + pKey[i]; 124 if((x = hash_val & 0xF0000000L) != 0) 125 hash_val ^= (x >> 24); 126 hash_val &= ~x; 127 } 128 return hash_val; 129 } 130 }; 131 132 /** \class StringHash<BKDR> 133 * \brief BKDR hash function 134 */ 135 template<> 136 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t> 137 { 138 uint32_t operator()(const llvm::StringRef& pKey) const 139 { 140 const uint32_t seed = 131; 141 uint32_t hash_val = 0; 142 143 for(uint32_t i = 0; i < pKey.size(); ++i) 144 hash_val = (hash_val * seed) + pKey[i]; 145 return hash_val; 146 } 147 }; 148 149 150 /** \class StringHash<SDBM> 151 * \brief SDBM hash function 152 * 0.049s in 100000 test 153 */ 154 template<> 155 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t> 156 { 157 uint32_t operator()(const llvm::StringRef& pKey) const 158 { 159 uint32_t hash_val = 0; 160 161 for(uint32_t i = 0; i < pKey.size(); ++i) 162 hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val; 163 return hash_val; 164 } 165 }; 166 167 /** \class StringHash<DJB> 168 * \brief DJB hash function 169 * 0.057s in 100000 test 170 */ 171 template<> 172 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t> 173 { 174 uint32_t operator()(const llvm::StringRef& pKey) const 175 { 176 uint32_t hash_val = 5381; 177 178 for(uint32_t i = 0; i < pKey.size(); ++i) 179 hash_val = ((hash_val << 5) + hash_val) + pKey[i]; 180 181 return hash_val; 182 } 183 }; 184 185 /** \class StringHash<DEK> 186 * \brief DEK hash function 187 * 0.60s 188 */ 189 template<> 190 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t> 191 { 192 uint32_t operator()(const llvm::StringRef& pKey) const 193 { 194 uint32_t hash_val = pKey.size(); 195 196 for(uint32_t i = 0; i < pKey.size(); ++i) 197 hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i]; 198 199 return hash_val; 200 } 201 }; 202 203 /** \class StringHash<BP> 204 * \brief BP hash function 205 * 0.057s 206 */ 207 template<> 208 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t> 209 { 210 uint32_t operator()(const llvm::StringRef& pKey) const 211 { 212 uint32_t hash_val = 0; 213 for(uint32_t i = 0; i < pKey.size(); ++i) 214 hash_val = hash_val << 7 ^ pKey[i]; 215 216 return hash_val; 217 } 218 }; 219 220 /** \class StringHash<FNV> 221 * \brief FNV hash function 222 * 0.058s 223 */ 224 template<> 225 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t> 226 { 227 uint32_t operator()(const llvm::StringRef& pKey) const 228 { 229 const uint32_t fnv_prime = 0x811C9DC5; 230 uint32_t hash_val = 0; 231 for(uint32_t i = 0; i < pKey.size(); ++i) { 232 hash_val *= fnv_prime; 233 hash_val ^= pKey[i]; 234 } 235 236 return hash_val; 237 } 238 }; 239 240 /** \class StringHash<AP> 241 * \brief AP hash function 242 * 0.060s 243 */ 244 template<> 245 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t> 246 { 247 uint32_t operator()(const llvm::StringRef& pKey) const 248 { 249 unsigned int hash_val = 0xAAAAAAAA; 250 251 for(uint32_t i = 0; i < pKey.size(); ++i) { 252 hash_val ^= ((i & 1) == 0)? 253 ((hash_val << 7) ^ pKey[i] * (hash_val >> 3)): 254 (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5)))); 255 } 256 257 return hash_val; 258 } 259 }; 260 261 /** \class StringHash<ES> 262 * \brief This is a revision of Edward Sayers' string characteristic function. 263 * 264 * 31-28 27 26 25 - 0 265 * +----+---+---+------------+ 266 * | . | N | - | a/A ~ z/Z | 267 * +----+---+---+------------+ 268 * 269 * . (bit 31~28) - The number of '.' characters 270 * N (bit 27) - Are there any numbers in the string 271 * - (bit 26) - Does the string have '-' character 272 * bit 25~0 - Bit 25 is set only if the string contains a 'a' or 'A', and 273 * Bit 24 is set only if ... 'b' or 'B', ... 274 */ 275 template<> 276 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t> 277 { 278 uint32_t operator()(const llvm::StringRef& pString) const 279 { 280 uint32_t result = 0x0; 281 unsigned int dot = 0; 282 std::string::size_type idx; 283 for (idx = 0; idx < pString.size(); ++idx) { 284 int cur_char = pString[idx]; 285 286 if ('.' == cur_char) { 287 ++dot; 288 continue; 289 } 290 291 if (isdigit(cur_char)) { 292 result |= (1 << 27); 293 continue; 294 } 295 296 if ('_' == cur_char) { 297 result |= (1 << 26); 298 continue; 299 } 300 301 if (isupper(cur_char)) { 302 result |= (1 << (cur_char - 'A')); 303 continue; 304 } 305 306 if (islower(cur_char)) { 307 result |= (1 << (cur_char - 'a')); 308 continue; 309 } 310 } 311 result |= (dot << 28); 312 return result; 313 } 314 315 316 /** \func may_include 317 * \brief is it possible that pRule is a sub-string of pInString 318 */ 319 static bool may_include(uint32_t pRule, uint32_t pInString) 320 { 321 uint32_t in_c = pInString << 4; 322 uint32_t r_c = pRule << 4; 323 324 uint32_t res = (in_c ^ r_c) & r_c; 325 if (0 != res) 326 return false; 327 328 uint32_t in_dot = pInString >> 28; 329 uint32_t r_dot = pRule >> 28; 330 if (r_dot > in_dot) 331 return false; 332 333 return true; 334 } 335 }; 336 337 /** \class template<uint32_t TYPE> StringCompare 338 * \brief the template StringCompare class, for specification 339 */ 340 template<typename STRING_TYPE> 341 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool> 342 { 343 bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const 344 { return X == Y; } 345 }; 346 347 template<> 348 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool> 349 { 350 bool operator()(const char* X, const char* Y) const 351 { return (0 == std::strcmp(X, Y)); } 352 }; 353 354 template<> 355 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool> 356 { 357 bool operator()(const char* X, const char* Y) const 358 { return (0 == std::strcmp(X, Y)); } 359 }; 360 361 } // namespace of hash 362 } // namespace of mcld 363 364 #endif 365 366