1 /* 2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved 3 * Copyright (C) Research In Motion Limited 2009. All rights reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef StringHash_h 23 #define StringHash_h 24 25 #include "AtomicString.h" 26 #include "PlatformString.h" 27 #include <wtf/HashTraits.h> 28 #include <wtf/StringHashFunctions.h> 29 #include <wtf/unicode/Unicode.h> 30 31 namespace WebCore { 32 33 // The hash() functions on StringHash and CaseFoldingHash do not support 34 // null strings. get(), contains(), and add() on HashMap<String,..., StringHash> 35 // cause a null-pointer dereference when passed null strings. 36 37 // FIXME: We should really figure out a way to put the computeHash function that's 38 // currently a member function of StringImpl into this file so we can be a little 39 // closer to having all the nearly-identical hash functions in one place. 40 41 struct StringHash { hashStringHash42 static unsigned hash(StringImpl* key) { return key->hash(); } equalStringHash43 static bool equal(StringImpl* a, StringImpl* b) 44 { 45 if (a == b) 46 return true; 47 if (!a || !b) 48 return false; 49 50 unsigned aLength = a->length(); 51 unsigned bLength = b->length(); 52 if (aLength != bLength) 53 return false; 54 55 // FIXME: perhaps we should have a more abstract macro that indicates when 56 // going 4 bytes at a time is unsafe 57 #if CPU(ARM) || CPU(SH4) 58 const UChar* aChars = a->characters(); 59 const UChar* bChars = b->characters(); 60 for (unsigned i = 0; i != aLength; ++i) { 61 if (*aChars++ != *bChars++) 62 return false; 63 } 64 return true; 65 #else 66 /* Do it 4-bytes-at-a-time on architectures where it's safe */ 67 const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); 68 const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); 69 70 unsigned halfLength = aLength >> 1; 71 for (unsigned i = 0; i != halfLength; ++i) 72 if (*aChars++ != *bChars++) 73 return false; 74 75 if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) 76 return false; 77 78 return true; 79 #endif 80 } 81 hashStringHash82 static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } equalStringHash83 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 84 { 85 return equal(a.get(), b.get()); 86 } 87 hashStringHash88 static unsigned hash(const String& key) { return key.impl()->hash(); } equalStringHash89 static bool equal(const String& a, const String& b) 90 { 91 return equal(a.impl(), b.impl()); 92 } 93 94 static const bool safeToCompareToEmptyOrDeleted = false; 95 }; 96 97 class CaseFoldingHash { 98 public: 99 // Paul Hsieh's SuperFastHash 100 // http://www.azillionmonkeys.com/qed/hash.html hash(const UChar * data,unsigned length)101 static unsigned hash(const UChar* data, unsigned length) 102 { 103 unsigned l = length; 104 const UChar* s = data; 105 uint32_t hash = WTF::stringHashingStartValue; 106 uint32_t tmp; 107 108 int rem = l & 1; 109 l >>= 1; 110 111 // Main loop. 112 for (; l > 0; l--) { 113 hash += WTF::Unicode::foldCase(s[0]); 114 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; 115 hash = (hash << 16) ^ tmp; 116 s += 2; 117 hash += hash >> 11; 118 } 119 120 // Handle end case. 121 if (rem) { 122 hash += WTF::Unicode::foldCase(s[0]); 123 hash ^= hash << 11; 124 hash += hash >> 17; 125 } 126 127 // Force "avalanching" of final 127 bits. 128 hash ^= hash << 3; 129 hash += hash >> 5; 130 hash ^= hash << 2; 131 hash += hash >> 15; 132 hash ^= hash << 10; 133 134 // This avoids ever returning a hash code of 0, since that is used to 135 // signal "hash not computed yet", using a value that is likely to be 136 // effectively the same as 0 when the low bits are masked. 137 hash |= !hash << 31; 138 139 return hash; 140 } 141 hash(StringImpl * str)142 static unsigned hash(StringImpl* str) 143 { 144 return hash(str->characters(), str->length()); 145 } 146 hash(const char * str,unsigned length)147 static unsigned hash(const char* str, unsigned length) 148 { 149 // This hash is designed to work on 16-bit chunks at a time. But since the normal case 150 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they 151 // were 16-bit chunks, which will give matching results. 152 153 unsigned l = length; 154 const char* s = str; 155 uint32_t hash = WTF::stringHashingStartValue; 156 uint32_t tmp; 157 158 int rem = l & 1; 159 l >>= 1; 160 161 // Main loop 162 for (; l > 0; l--) { 163 hash += WTF::Unicode::foldCase(s[0]); 164 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; 165 hash = (hash << 16) ^ tmp; 166 s += 2; 167 hash += hash >> 11; 168 } 169 170 // Handle end case 171 if (rem) { 172 hash += WTF::Unicode::foldCase(s[0]); 173 hash ^= hash << 11; 174 hash += hash >> 17; 175 } 176 177 // Force "avalanching" of final 127 bits 178 hash ^= hash << 3; 179 hash += hash >> 5; 180 hash ^= hash << 2; 181 hash += hash >> 15; 182 hash ^= hash << 10; 183 184 // this avoids ever returning a hash code of 0, since that is used to 185 // signal "hash not computed yet", using a value that is likely to be 186 // effectively the same as 0 when the low bits are masked 187 hash |= !hash << 31; 188 189 return hash; 190 } 191 equal(StringImpl * a,StringImpl * b)192 static bool equal(StringImpl* a, StringImpl* b) 193 { 194 if (a == b) 195 return true; 196 if (!a || !b) 197 return false; 198 unsigned length = a->length(); 199 if (length != b->length()) 200 return false; 201 return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; 202 } 203 hash(const RefPtr<StringImpl> & key)204 static unsigned hash(const RefPtr<StringImpl>& key) 205 { 206 return hash(key.get()); 207 } 208 equal(const RefPtr<StringImpl> & a,const RefPtr<StringImpl> & b)209 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 210 { 211 return equal(a.get(), b.get()); 212 } 213 hash(const String & key)214 static unsigned hash(const String& key) 215 { 216 return hash(key.impl()); 217 } hash(const AtomicString & key)218 static unsigned hash(const AtomicString& key) 219 { 220 return hash(key.impl()); 221 } equal(const String & a,const String & b)222 static bool equal(const String& a, const String& b) 223 { 224 return equal(a.impl(), b.impl()); 225 } equal(const AtomicString & a,const AtomicString & b)226 static bool equal(const AtomicString& a, const AtomicString& b) 227 { 228 return (a == b) || equal(a.impl(), b.impl()); 229 } 230 231 static const bool safeToCompareToEmptyOrDeleted = false; 232 }; 233 234 // This hash can be used in cases where the key is a hash of a string, but we don't 235 // want to store the string. It's not really specific to string hashing, but all our 236 // current uses of it are for strings. 237 struct AlreadyHashed : IntHash<unsigned> { hashAlreadyHashed238 static unsigned hash(unsigned key) { return key; } 239 240 // To use a hash value as a key for a hash table, we need to eliminate the 241 // "deleted" value, which is negative one. That could be done by changing 242 // the string hash function to never generate negative one, but this works 243 // and is still relatively efficient. avoidDeletedValueAlreadyHashed244 static unsigned avoidDeletedValue(unsigned hash) 245 { 246 ASSERT(hash); 247 unsigned newHash = hash | (!(hash + 1) << 31); 248 ASSERT(newHash); 249 ASSERT(newHash != 0xFFFFFFFF); 250 return newHash; 251 } 252 }; 253 254 } 255 256 namespace WTF { 257 258 template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> { 259 static const bool emptyValueIsZero = true; 260 static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); } 261 static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); } 262 }; 263 264 } 265 266 #endif 267