• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Library General Public
6  * License as published by the Free Software Foundation; either
7  * version 2 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Library General Public License for more details.
13  *
14  * You should have received a copy of the GNU Library General Public License
15  * along with this library; see the file COPYING.LIB.  If not, write to
16  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  *
19  */
20 
21 #ifndef StringHash_h
22 #define StringHash_h
23 
24 #include "AtomicString.h"
25 #include "PlatformString.h"
26 #include <wtf/HashFunctions.h>
27 #include <wtf/HashTraits.h>
28 #include <wtf/unicode/Unicode.h>
29 
30 namespace WebCore {
31 
32     // FIXME: We should really figure out a way to put the computeHash function that's
33     // currently a member function of StringImpl into this file so we can be a little
34     // closer to having all the nearly-identical hash functions in one place.
35 
36     struct StringHash {
hashStringHash37         static unsigned hash(StringImpl* key) { return key->hash(); }
equalStringHash38         static bool equal(StringImpl* a, StringImpl* b)
39         {
40             if (a == b)
41                 return true;
42             if (!a || !b)
43                 return false;
44 
45             unsigned aLength = a->length();
46             unsigned bLength = b->length();
47             if (aLength != bLength)
48                 return false;
49 
50 #if PLATFORM(ARM) || PLATFORM(SH4)
51             return memcmp(a->characters(), b->characters(), sizeof(UChar) * aLength) == 0;
52 #else
53             const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters());
54             const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters());
55 
56             unsigned halfLength = aLength >> 1;
57             for (unsigned i = 0; i != halfLength; ++i)
58                 if (*aChars++ != *bChars++)
59                     return false;
60 
61             if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
62                 return false;
63 
64             return true;
65 #endif
66         }
67 
hashStringHash68         static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
equalStringHash69         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
70         {
71             return equal(a.get(), b.get());
72         }
73 
hashStringHash74         static unsigned hash(const String& key) { return key.impl()->hash(); }
equalStringHash75         static bool equal(const String& a, const String& b)
76         {
77             return equal(a.impl(), b.impl());
78         }
79 
80         static const bool safeToCompareToEmptyOrDeleted = false;
81     };
82 
83     class CaseFoldingHash {
84     public:
85         // Paul Hsieh's SuperFastHash
86         // http://www.azillionmonkeys.com/qed/hash.html
hash(const UChar * data,unsigned length)87         static unsigned hash(const UChar* data, unsigned length)
88         {
89             unsigned l = length;
90             const UChar* s = data;
91             uint32_t hash = WTF::stringHashingStartValue;
92             uint32_t tmp;
93 
94             int rem = l & 1;
95             l >>= 1;
96 
97             // Main loop.
98             for (; l > 0; l--) {
99                 hash += WTF::Unicode::foldCase(s[0]);
100                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
101                 hash = (hash << 16) ^ tmp;
102                 s += 2;
103                 hash += hash >> 11;
104             }
105 
106             // Handle end case.
107             if (rem) {
108                 hash += WTF::Unicode::foldCase(s[0]);
109                 hash ^= hash << 11;
110                 hash += hash >> 17;
111             }
112 
113             // Force "avalanching" of final 127 bits.
114             hash ^= hash << 3;
115             hash += hash >> 5;
116             hash ^= hash << 2;
117             hash += hash >> 15;
118             hash ^= hash << 10;
119 
120             // This avoids ever returning a hash code of 0, since that is used to
121             // signal "hash not computed yet", using a value that is likely to be
122             // effectively the same as 0 when the low bits are masked.
123             hash |= !hash << 31;
124 
125             return hash;
126         }
127 
hash(StringImpl * str)128         static unsigned hash(StringImpl* str)
129         {
130             return hash(str->characters(), str->length());
131         }
132 
hash(const char * str,unsigned length)133         static unsigned hash(const char* str, unsigned length)
134         {
135             // This hash is designed to work on 16-bit chunks at a time. But since the normal case
136             // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
137             // were 16-bit chunks, which will give matching results.
138 
139             unsigned l = length;
140             const char* s = str;
141             uint32_t hash = WTF::stringHashingStartValue;
142             uint32_t tmp;
143 
144             int rem = l & 1;
145             l >>= 1;
146 
147             // Main loop
148             for (; l > 0; l--) {
149                 hash += WTF::Unicode::foldCase(s[0]);
150                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
151                 hash = (hash << 16) ^ tmp;
152                 s += 2;
153                 hash += hash >> 11;
154             }
155 
156             // Handle end case
157             if (rem) {
158                 hash += WTF::Unicode::foldCase(s[0]);
159                 hash ^= hash << 11;
160                 hash += hash >> 17;
161             }
162 
163             // Force "avalanching" of final 127 bits
164             hash ^= hash << 3;
165             hash += hash >> 5;
166             hash ^= hash << 2;
167             hash += hash >> 15;
168             hash ^= hash << 10;
169 
170             // this avoids ever returning a hash code of 0, since that is used to
171             // signal "hash not computed yet", using a value that is likely to be
172             // effectively the same as 0 when the low bits are masked
173             hash |= !hash << 31;
174 
175             return hash;
176         }
177 
equal(StringImpl * a,StringImpl * b)178         static bool equal(StringImpl* a, StringImpl* b)
179         {
180             if (a == b)
181                 return true;
182             if (!a || !b)
183                 return false;
184             unsigned length = a->length();
185             if (length != b->length())
186                 return false;
187             return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0;
188         }
189 
hash(const RefPtr<StringImpl> & key)190         static unsigned hash(const RefPtr<StringImpl>& key)
191         {
192             return hash(key.get());
193         }
194 
equal(const RefPtr<StringImpl> & a,const RefPtr<StringImpl> & b)195         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
196         {
197             return equal(a.get(), b.get());
198         }
199 
hash(const String & key)200         static unsigned hash(const String& key)
201         {
202             return hash(key.impl());
203         }
hash(const AtomicString & key)204         static unsigned hash(const AtomicString& key)
205         {
206             return hash(key.impl());
207         }
equal(const String & a,const String & b)208         static bool equal(const String& a, const String& b)
209         {
210             return equal(a.impl(), b.impl());
211         }
equal(const AtomicString & a,const AtomicString & b)212         static bool equal(const AtomicString& a, const AtomicString& b)
213         {
214             return (a == b) || equal(a.impl(), b.impl());
215         }
216 
217         static const bool safeToCompareToEmptyOrDeleted = false;
218     };
219 
220     // This hash can be used in cases where the key is a hash of a string, but we don't
221     // want to store the string. It's not really specific to string hashing, but all our
222     // current uses of it are for strings.
223     struct AlreadyHashed : IntHash<unsigned> {
hashAlreadyHashed224         static unsigned hash(unsigned key) { return key; }
225 
226         // To use a hash value as a key for a hash table, we need to eliminate the
227         // "deleted" value, which is negative one. That could be done by changing
228         // the string hash function to never generate negative one, but this works
229         // and is still relatively efficient.
avoidDeletedValueAlreadyHashed230         static unsigned avoidDeletedValue(unsigned hash)
231         {
232             ASSERT(hash);
233             unsigned newHash = hash | (!(hash + 1) << 31);
234             ASSERT(newHash);
235             ASSERT(newHash != 0xFFFFFFFF);
236             return newHash;
237         }
238     };
239 
240 }
241 
242 namespace WTF {
243 
244     template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
245         static const bool emptyValueIsZero = true;
246         static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); }
247         static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); }
248     };
249 
250 }
251 
252 #endif
253