• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- StringHash.h -------------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #ifndef MCLD_STRING_HASH_FUNCTION_H
10 #define MCLD_STRING_HASH_FUNCTION_H
11 #ifdef ENABLE_UNITTEST
12 #include <gtest.h>
13 #endif
14 #include <llvm/ADT/StringRef.h>
15 #include <llvm/Support/DataTypes.h>
16 #include <llvm/Support/ErrorHandling.h>
17 #include <cctype>
18 #include <functional>
19 
20 namespace mcld {
21 namespace hash {
22 
23 enum Type {
24   RS,
25   JS,
26   PJW,
27   ELF,
28   BKDR,
29   SDBM,
30   DJB,
31   DEK,
32   BP,
33   FNV,
34   AP,
35   ES
36 };
37 
38 /** \class template<uint32_t TYPE> StringHash
39  *  \brief the template StringHash class, for specification
40  */
41 template<uint32_t TYPE>
42 struct StringHash : public std::unary_function<const llvm::StringRef&, uint32_t>
43 {
operatorStringHash44   uint32_t operator()(const llvm::StringRef& pKey) const
45   {
46     llvm::report_fatal_error("Undefined StringHash function.\n");
47   }
48 };
49 
50 /** \class StringHash<RSHash>
51  *  \brief RS StringHash funciton
52  */
53 template<>
54 struct StringHash<RS> : public std::unary_function<const llvm::StringRef&, uint32_t>
55 {
56   uint32_t operator()(const llvm::StringRef& pKey) const
57   {
58     const unsigned int b = 378551;
59     uint32_t a = 63689;
60     uint32_t hash_val = 0;
61 
62     for(unsigned int i = 0; i < pKey.size(); ++i) {
63       hash_val = hash_val * a + pKey[i];
64       a = a * b;
65     }
66     return hash_val;
67   }
68 };
69 
70 /** \class StringHash<JSHash>
71  *  \brief JS hash funciton
72  */
73 template<>
74 struct StringHash<JS> : public std::unary_function<const llvm::StringRef&, uint32_t>
75 {
76   uint32_t operator()(const llvm::StringRef& pKey) const
77   {
78     uint32_t hash_val = 1315423911;
79 
80     for(unsigned int i = 0; i < pKey.size(); ++i) {
81        hash_val ^= ((hash_val << 5) + pKey[i] + (hash_val >> 2));
82     }
83     return hash_val;
84   }
85 };
86 
87 /** \class StringHash<PJW>
88  *  \brief P.J. Weinberger hash function
89  */
90 template<>
91 struct StringHash<PJW> : public std::unary_function<const llvm::StringRef&, uint32_t>
92 {
93   uint32_t operator()(const llvm::StringRef& pKey) const
94   {
95     const unsigned int BitsInUnsignedInt = (unsigned int)(sizeof(unsigned int) * 8);
96     const unsigned int ThreeQuarters     = (unsigned int)((BitsInUnsignedInt  * 3) / 4);
97     const unsigned int OneEighth         = (unsigned int)(BitsInUnsignedInt / 8);
98     const unsigned int HighBits          = (unsigned int)(0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth);
99     uint32_t hash_val = 0;
100     uint32_t test = 0;
101 
102     for(unsigned int i = 0; i < pKey.size(); ++i) {
103       hash_val = (hash_val << OneEighth) + pKey[i];
104 
105       if((test = hash_val & HighBits) != 0) {
106         hash_val = (( hash_val ^ (test >> ThreeQuarters)) & (~HighBits));
107       }
108     }
109     return hash_val;
110   }
111 };
112 
113 /** \class StringHash<ELF>
114  *  \brief ELF hash function.
115  */
116 template<>
117 struct StringHash<ELF> : public std::unary_function<const llvm::StringRef&, uint32_t>
118 {
119   uint32_t operator()(const llvm::StringRef& pKey) const
120   {
121     uint32_t hash_val = 0;
122     uint32_t x = 0;
123 
124     for (unsigned int i = 0; i < pKey.size(); ++i) {
125       hash_val = (hash_val << 4) + pKey[i];
126       if((x = hash_val & 0xF0000000L) != 0)
127         hash_val ^= (x >> 24);
128       hash_val &= ~x;
129     }
130     return hash_val;
131   }
132 };
133 
134 /** \class StringHash<BKDR>
135  *  \brief BKDR hash function
136  */
137 template<>
138 struct StringHash<BKDR> : public std::unary_function<const llvm::StringRef&, uint32_t>
139 {
140   uint32_t operator()(const llvm::StringRef& pKey) const
141   {
142     const uint32_t seed = 131;
143     uint32_t hash_val = 0;
144 
145     for(uint32_t i = 0; i < pKey.size(); ++i)
146       hash_val = (hash_val * seed) + pKey[i];
147     return hash_val;
148   }
149 };
150 
151 
152 /** \class StringHash<SDBM>
153  *  \brief SDBM hash function
154  *  0.049s in 100000 test
155  */
156 template<>
157 struct StringHash<SDBM> : public std::unary_function<const llvm::StringRef&, uint32_t>
158 {
159   uint32_t operator()(const llvm::StringRef& pKey) const
160   {
161     uint32_t hash_val = 0;
162 
163     for(uint32_t i = 0; i < pKey.size(); ++i)
164       hash_val = pKey[i] + (hash_val << 6) + (hash_val << 16) - hash_val;
165     return hash_val;
166   }
167 };
168 
169 /** \class StringHash<DJB>
170  *  \brief DJB hash function
171  *  0.057s in 100000 test
172  */
173 template<>
174 struct StringHash<DJB> : public std::unary_function<const llvm::StringRef&, uint32_t>
175 {
176   uint32_t operator()(const llvm::StringRef& pKey) const
177   {
178     uint32_t hash_val = 5381;
179 
180     for(uint32_t i = 0; i < pKey.size(); ++i)
181       hash_val = ((hash_val << 5) + hash_val) + pKey[i];
182 
183     return hash_val;
184   }
185 };
186 
187 /** \class StringHash<DEK>
188  *  \brief DEK hash function
189  *  0.60s
190  */
191 template<>
192 struct StringHash<DEK> : public std::unary_function<const llvm::StringRef&, uint32_t>
193 {
194   uint32_t operator()(const llvm::StringRef& pKey) const
195   {
196     uint32_t hash_val = pKey.size();
197 
198     for(uint32_t i = 0; i < pKey.size(); ++i)
199       hash_val = ((hash_val << 5) ^ (hash_val >> 27)) ^ pKey[i];
200 
201     return hash_val;
202   }
203 };
204 
205 /** \class StringHash<BP>
206  *  \brief BP hash function
207  *  0.057s
208  */
209 template<>
210 struct StringHash<BP> : public std::unary_function<const llvm::StringRef&, uint32_t>
211 {
212   uint32_t operator()(const llvm::StringRef& pKey) const
213   {
214     uint32_t hash_val = 0;
215     for(uint32_t i = 0; i < pKey.size(); ++i)
216       hash_val = hash_val << 7 ^ pKey[i];
217 
218     return hash_val;
219   }
220 };
221 
222 /** \class StringHash<FNV>
223  *  \brief FNV hash function
224  *  0.058s
225  */
226 template<>
227 struct StringHash<FNV> : public std::unary_function<const llvm::StringRef&, uint32_t>
228 {
229   uint32_t operator()(const llvm::StringRef& pKey) const
230   {
231     const uint32_t fnv_prime = 0x811C9DC5;
232     uint32_t hash_val = 0;
233     for(uint32_t i = 0; i < pKey.size(); ++i) {
234       hash_val *= fnv_prime;
235       hash_val ^= pKey[i];
236     }
237 
238     return hash_val;
239   }
240 };
241 
242 /** \class StringHash<AP>
243  *  \brief AP hash function
244  *  0.060s
245  */
246 template<>
247 struct StringHash<AP> : public std::unary_function<const llvm::StringRef&, uint32_t>
248 {
249   uint32_t operator()(const llvm::StringRef& pKey) const
250   {
251     unsigned int hash_val = 0xAAAAAAAA;
252 
253     for(uint32_t i = 0; i < pKey.size(); ++i) {
254       hash_val ^= ((i & 1) == 0)?
255                           ((hash_val <<  7) ^ pKey[i] * (hash_val >> 3)):
256                           (~((hash_val << 11) + (pKey[i] ^ (hash_val >> 5))));
257     }
258 
259     return hash_val;
260   }
261 };
262 
263 /** \class StringHash<ES>
264  *  \brief This is a revision of Edward Sayers' string characteristic function.
265  *
266  *  31-28  27  26  25   -   0
267  *  +----+---+---+------------+
268  *  | .  | N | - | a/A  ~ z/Z |
269  *  +----+---+---+------------+
270  *
271  *  . (bit 31~28) - The number of '.' characters
272  *  N (bit 27)    - Are there any numbers in the string
273  *  - (bit 26)    - Does the string have '-' character
274  *  bit 25~0      - Bit 25 is set only if the string contains a 'a' or 'A', and
275  *                  Bit 24 is set only if ...                   'b' or 'B', ...
276  */
277 template<>
278 struct StringHash<ES> : public std::unary_function<const llvm::StringRef&, uint32_t>
279 {
280   uint32_t operator()(const llvm::StringRef& pString) const
281   {
282     uint32_t result = 0x0;
283     unsigned int dot = 0;
284     std::string::size_type idx;
285     for (idx = 0; idx < pString.size(); ++idx) {
286       int cur_char = pString[idx];
287 
288       if ('.' == cur_char) {
289         ++dot;
290         continue;
291       }
292 
293       if (isdigit(cur_char)) {
294         result |= (1 << 27);
295         continue;
296       }
297 
298       if ('_' == cur_char) {
299         result |= (1 << 26);
300         continue;
301       }
302 
303       if (isupper(cur_char)) {
304         result |= (1 << (cur_char - 'A'));
305         continue;
306       }
307 
308       if (islower(cur_char)) {
309         result |= (1 << (cur_char - 'a'));
310         continue;
311       }
312     }
313     result |= (dot << 28);
314     return result;
315   }
316 
317 
318   /** \func may_include
319    *  \brief is it possible that pRule is a sub-string of pInString
320    */
321   static bool may_include(uint32_t pRule, uint32_t pInString)
322   {
323     uint32_t in_c = pInString << 4;
324     uint32_t r_c  = pRule << 4;
325 
326     uint32_t res = (in_c ^ r_c) & r_c;
327     if (0 != res)
328       return false;
329 
330     uint32_t in_dot = pInString >> 28;
331     uint32_t r_dot  = pRule >> 28;
332     if (r_dot > in_dot)
333       return false;
334 
335     return true;
336   }
337 };
338 
339 /** \class template<uint32_t TYPE> StringCompare
340  *  \brief the template StringCompare class, for specification
341  */
342 template<typename STRING_TYPE>
343 struct StringCompare : public std::binary_function<const STRING_TYPE&, const STRING_TYPE&, bool>
344 {
345   bool operator()(const STRING_TYPE& X, const STRING_TYPE& Y) const
346   { return X == Y; }
347 };
348 
349 template<>
350 struct StringCompare<const char*> : public std::binary_function<const char*, const char*, bool>
351 {
352   bool operator()(const char* X, const char* Y) const
353   { return (0 == std::strcmp(X, Y)); }
354 };
355 
356 template<>
357 struct StringCompare<char*> : public std::binary_function<const char*, const char*, bool>
358 {
359   bool operator()(const char* X, const char* Y) const
360   { return (0 == std::strcmp(X, Y)); }
361 };
362 
363 } // namespace of hash
364 } // namespace of mcld
365 
366 #endif
367 
368