• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved
3  * Copyright (C) Research In Motion Limited 2009. All rights reserved.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  */
21 
22 #ifndef StringHash_h
23 #define StringHash_h
24 
25 #include "AtomicString.h"
26 #include "PlatformString.h"
27 #include <wtf/HashTraits.h>
28 #include <wtf/StringHashFunctions.h>
29 #include <wtf/unicode/Unicode.h>
30 
31 namespace WebCore {
32 
33     // The hash() functions on StringHash and CaseFoldingHash do not support
34     // null strings. get(), contains(), and add() on HashMap<String,..., StringHash>
35     // cause a null-pointer dereference when passed null strings.
36 
37     // FIXME: We should really figure out a way to put the computeHash function that's
38     // currently a member function of StringImpl into this file so we can be a little
39     // closer to having all the nearly-identical hash functions in one place.
40 
41     struct StringHash {
hashStringHash42         static unsigned hash(StringImpl* key) { return key->hash(); }
equalStringHash43         static bool equal(StringImpl* a, StringImpl* b)
44         {
45             if (a == b)
46                 return true;
47             if (!a || !b)
48                 return false;
49 
50             unsigned aLength = a->length();
51             unsigned bLength = b->length();
52             if (aLength != bLength)
53                 return false;
54 
55             // FIXME: perhaps we should have a more abstract macro that indicates when
56             // going 4 bytes at a time is unsafe
57 #if CPU(ARM) || CPU(SH4)
58             const UChar* aChars = a->characters();
59             const UChar* bChars = b->characters();
60             for (unsigned i = 0; i != aLength; ++i) {
61                 if (*aChars++ != *bChars++)
62                     return false;
63             }
64             return true;
65 #else
66             /* Do it 4-bytes-at-a-time on architectures where it's safe */
67             const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters());
68             const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters());
69 
70             unsigned halfLength = aLength >> 1;
71             for (unsigned i = 0; i != halfLength; ++i)
72                 if (*aChars++ != *bChars++)
73                     return false;
74 
75             if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars))
76                 return false;
77 
78             return true;
79 #endif
80         }
81 
hashStringHash82         static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); }
equalStringHash83         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
84         {
85             return equal(a.get(), b.get());
86         }
87 
hashStringHash88         static unsigned hash(const String& key) { return key.impl()->hash(); }
equalStringHash89         static bool equal(const String& a, const String& b)
90         {
91             return equal(a.impl(), b.impl());
92         }
93 
94         static const bool safeToCompareToEmptyOrDeleted = false;
95     };
96 
97     class CaseFoldingHash {
98     public:
99         // Paul Hsieh's SuperFastHash
100         // http://www.azillionmonkeys.com/qed/hash.html
hash(const UChar * data,unsigned length)101         static unsigned hash(const UChar* data, unsigned length)
102         {
103             unsigned l = length;
104             const UChar* s = data;
105             uint32_t hash = WTF::stringHashingStartValue;
106             uint32_t tmp;
107 
108             int rem = l & 1;
109             l >>= 1;
110 
111             // Main loop.
112             for (; l > 0; l--) {
113                 hash += WTF::Unicode::foldCase(s[0]);
114                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
115                 hash = (hash << 16) ^ tmp;
116                 s += 2;
117                 hash += hash >> 11;
118             }
119 
120             // Handle end case.
121             if (rem) {
122                 hash += WTF::Unicode::foldCase(s[0]);
123                 hash ^= hash << 11;
124                 hash += hash >> 17;
125             }
126 
127             // Force "avalanching" of final 127 bits.
128             hash ^= hash << 3;
129             hash += hash >> 5;
130             hash ^= hash << 2;
131             hash += hash >> 15;
132             hash ^= hash << 10;
133 
134             // This avoids ever returning a hash code of 0, since that is used to
135             // signal "hash not computed yet", using a value that is likely to be
136             // effectively the same as 0 when the low bits are masked.
137             hash |= !hash << 31;
138 
139             return hash;
140         }
141 
hash(StringImpl * str)142         static unsigned hash(StringImpl* str)
143         {
144             return hash(str->characters(), str->length());
145         }
146 
hash(const char * str,unsigned length)147         static unsigned hash(const char* str, unsigned length)
148         {
149             // This hash is designed to work on 16-bit chunks at a time. But since the normal case
150             // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
151             // were 16-bit chunks, which will give matching results.
152 
153             unsigned l = length;
154             const char* s = str;
155             uint32_t hash = WTF::stringHashingStartValue;
156             uint32_t tmp;
157 
158             int rem = l & 1;
159             l >>= 1;
160 
161             // Main loop
162             for (; l > 0; l--) {
163                 hash += WTF::Unicode::foldCase(s[0]);
164                 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash;
165                 hash = (hash << 16) ^ tmp;
166                 s += 2;
167                 hash += hash >> 11;
168             }
169 
170             // Handle end case
171             if (rem) {
172                 hash += WTF::Unicode::foldCase(s[0]);
173                 hash ^= hash << 11;
174                 hash += hash >> 17;
175             }
176 
177             // Force "avalanching" of final 127 bits
178             hash ^= hash << 3;
179             hash += hash >> 5;
180             hash ^= hash << 2;
181             hash += hash >> 15;
182             hash ^= hash << 10;
183 
184             // this avoids ever returning a hash code of 0, since that is used to
185             // signal "hash not computed yet", using a value that is likely to be
186             // effectively the same as 0 when the low bits are masked
187             hash |= !hash << 31;
188 
189             return hash;
190         }
191 
equal(StringImpl * a,StringImpl * b)192         static bool equal(StringImpl* a, StringImpl* b)
193         {
194             if (a == b)
195                 return true;
196             if (!a || !b)
197                 return false;
198             unsigned length = a->length();
199             if (length != b->length())
200                 return false;
201             return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0;
202         }
203 
hash(const RefPtr<StringImpl> & key)204         static unsigned hash(const RefPtr<StringImpl>& key)
205         {
206             return hash(key.get());
207         }
208 
equal(const RefPtr<StringImpl> & a,const RefPtr<StringImpl> & b)209         static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b)
210         {
211             return equal(a.get(), b.get());
212         }
213 
hash(const String & key)214         static unsigned hash(const String& key)
215         {
216             return hash(key.impl());
217         }
hash(const AtomicString & key)218         static unsigned hash(const AtomicString& key)
219         {
220             return hash(key.impl());
221         }
equal(const String & a,const String & b)222         static bool equal(const String& a, const String& b)
223         {
224             return equal(a.impl(), b.impl());
225         }
equal(const AtomicString & a,const AtomicString & b)226         static bool equal(const AtomicString& a, const AtomicString& b)
227         {
228             return (a == b) || equal(a.impl(), b.impl());
229         }
230 
231         static const bool safeToCompareToEmptyOrDeleted = false;
232     };
233 
234     // This hash can be used in cases where the key is a hash of a string, but we don't
235     // want to store the string. It's not really specific to string hashing, but all our
236     // current uses of it are for strings.
237     struct AlreadyHashed : IntHash<unsigned> {
hashAlreadyHashed238         static unsigned hash(unsigned key) { return key; }
239 
240         // To use a hash value as a key for a hash table, we need to eliminate the
241         // "deleted" value, which is negative one. That could be done by changing
242         // the string hash function to never generate negative one, but this works
243         // and is still relatively efficient.
avoidDeletedValueAlreadyHashed244         static unsigned avoidDeletedValue(unsigned hash)
245         {
246             ASSERT(hash);
247             unsigned newHash = hash | (!(hash + 1) << 31);
248             ASSERT(newHash);
249             ASSERT(newHash != 0xFFFFFFFF);
250             return newHash;
251         }
252     };
253 
254 }
255 
256 namespace WTF {
257 
258     template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> {
259         static const bool emptyValueIsZero = true;
260         static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); }
261         static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); }
262     };
263 
264 }
265 
266 #endif
267