1 /* 2 * Copyright (C) 2006, 2007, 2008 Apple Inc. All rights reserved 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Library General Public 6 * License as published by the Free Software Foundation; either 7 * version 2 of the License, or (at your option) any later version. 8 * 9 * This library is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Library General Public License for more details. 13 * 14 * You should have received a copy of the GNU Library General Public License 15 * along with this library; see the file COPYING.LIB. If not, write to 16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 17 * Boston, MA 02110-1301, USA. 18 * 19 */ 20 21 #ifndef StringHash_h 22 #define StringHash_h 23 24 #include "AtomicString.h" 25 #include "PlatformString.h" 26 #include <wtf/HashFunctions.h> 27 #include <wtf/HashTraits.h> 28 #include <wtf/unicode/Unicode.h> 29 30 namespace WebCore { 31 32 // FIXME: We should really figure out a way to put the computeHash function that's 33 // currently a member function of StringImpl into this file so we can be a little 34 // closer to having all the nearly-identical hash functions in one place. 35 36 struct StringHash { hashStringHash37 static unsigned hash(StringImpl* key) { return key->hash(); } equalStringHash38 static bool equal(StringImpl* a, StringImpl* b) 39 { 40 if (a == b) 41 return true; 42 if (!a || !b) 43 return false; 44 45 unsigned aLength = a->length(); 46 unsigned bLength = b->length(); 47 if (aLength != bLength) 48 return false; 49 50 #if PLATFORM(ARM) || PLATFORM(SH4) 51 return memcmp(a->characters(), b->characters(), sizeof(UChar) * aLength) == 0; 52 #else 53 const uint32_t* aChars = reinterpret_cast<const uint32_t*>(a->characters()); 54 const uint32_t* bChars = reinterpret_cast<const uint32_t*>(b->characters()); 55 56 unsigned halfLength = aLength >> 1; 57 for (unsigned i = 0; i != halfLength; ++i) 58 if (*aChars++ != *bChars++) 59 return false; 60 61 if (aLength & 1 && *reinterpret_cast<const uint16_t*>(aChars) != *reinterpret_cast<const uint16_t*>(bChars)) 62 return false; 63 64 return true; 65 #endif 66 } 67 hashStringHash68 static unsigned hash(const RefPtr<StringImpl>& key) { return key->hash(); } equalStringHash69 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 70 { 71 return equal(a.get(), b.get()); 72 } 73 hashStringHash74 static unsigned hash(const String& key) { return key.impl()->hash(); } equalStringHash75 static bool equal(const String& a, const String& b) 76 { 77 return equal(a.impl(), b.impl()); 78 } 79 80 static const bool safeToCompareToEmptyOrDeleted = false; 81 }; 82 83 class CaseFoldingHash { 84 public: 85 // Paul Hsieh's SuperFastHash 86 // http://www.azillionmonkeys.com/qed/hash.html hash(const UChar * data,unsigned length)87 static unsigned hash(const UChar* data, unsigned length) 88 { 89 unsigned l = length; 90 const UChar* s = data; 91 uint32_t hash = WTF::stringHashingStartValue; 92 uint32_t tmp; 93 94 int rem = l & 1; 95 l >>= 1; 96 97 // Main loop. 98 for (; l > 0; l--) { 99 hash += WTF::Unicode::foldCase(s[0]); 100 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; 101 hash = (hash << 16) ^ tmp; 102 s += 2; 103 hash += hash >> 11; 104 } 105 106 // Handle end case. 107 if (rem) { 108 hash += WTF::Unicode::foldCase(s[0]); 109 hash ^= hash << 11; 110 hash += hash >> 17; 111 } 112 113 // Force "avalanching" of final 127 bits. 114 hash ^= hash << 3; 115 hash += hash >> 5; 116 hash ^= hash << 2; 117 hash += hash >> 15; 118 hash ^= hash << 10; 119 120 // This avoids ever returning a hash code of 0, since that is used to 121 // signal "hash not computed yet", using a value that is likely to be 122 // effectively the same as 0 when the low bits are masked. 123 hash |= !hash << 31; 124 125 return hash; 126 } 127 hash(StringImpl * str)128 static unsigned hash(StringImpl* str) 129 { 130 return hash(str->characters(), str->length()); 131 } 132 hash(const char * str,unsigned length)133 static unsigned hash(const char* str, unsigned length) 134 { 135 // This hash is designed to work on 16-bit chunks at a time. But since the normal case 136 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they 137 // were 16-bit chunks, which will give matching results. 138 139 unsigned l = length; 140 const char* s = str; 141 uint32_t hash = WTF::stringHashingStartValue; 142 uint32_t tmp; 143 144 int rem = l & 1; 145 l >>= 1; 146 147 // Main loop 148 for (; l > 0; l--) { 149 hash += WTF::Unicode::foldCase(s[0]); 150 tmp = (WTF::Unicode::foldCase(s[1]) << 11) ^ hash; 151 hash = (hash << 16) ^ tmp; 152 s += 2; 153 hash += hash >> 11; 154 } 155 156 // Handle end case 157 if (rem) { 158 hash += WTF::Unicode::foldCase(s[0]); 159 hash ^= hash << 11; 160 hash += hash >> 17; 161 } 162 163 // Force "avalanching" of final 127 bits 164 hash ^= hash << 3; 165 hash += hash >> 5; 166 hash ^= hash << 2; 167 hash += hash >> 15; 168 hash ^= hash << 10; 169 170 // this avoids ever returning a hash code of 0, since that is used to 171 // signal "hash not computed yet", using a value that is likely to be 172 // effectively the same as 0 when the low bits are masked 173 hash |= !hash << 31; 174 175 return hash; 176 } 177 equal(StringImpl * a,StringImpl * b)178 static bool equal(StringImpl* a, StringImpl* b) 179 { 180 if (a == b) 181 return true; 182 if (!a || !b) 183 return false; 184 unsigned length = a->length(); 185 if (length != b->length()) 186 return false; 187 return WTF::Unicode::umemcasecmp(a->characters(), b->characters(), length) == 0; 188 } 189 hash(const RefPtr<StringImpl> & key)190 static unsigned hash(const RefPtr<StringImpl>& key) 191 { 192 return hash(key.get()); 193 } 194 equal(const RefPtr<StringImpl> & a,const RefPtr<StringImpl> & b)195 static bool equal(const RefPtr<StringImpl>& a, const RefPtr<StringImpl>& b) 196 { 197 return equal(a.get(), b.get()); 198 } 199 hash(const String & key)200 static unsigned hash(const String& key) 201 { 202 return hash(key.impl()); 203 } hash(const AtomicString & key)204 static unsigned hash(const AtomicString& key) 205 { 206 return hash(key.impl()); 207 } equal(const String & a,const String & b)208 static bool equal(const String& a, const String& b) 209 { 210 return equal(a.impl(), b.impl()); 211 } equal(const AtomicString & a,const AtomicString & b)212 static bool equal(const AtomicString& a, const AtomicString& b) 213 { 214 return (a == b) || equal(a.impl(), b.impl()); 215 } 216 217 static const bool safeToCompareToEmptyOrDeleted = false; 218 }; 219 220 // This hash can be used in cases where the key is a hash of a string, but we don't 221 // want to store the string. It's not really specific to string hashing, but all our 222 // current uses of it are for strings. 223 struct AlreadyHashed : IntHash<unsigned> { hashAlreadyHashed224 static unsigned hash(unsigned key) { return key; } 225 226 // To use a hash value as a key for a hash table, we need to eliminate the 227 // "deleted" value, which is negative one. That could be done by changing 228 // the string hash function to never generate negative one, but this works 229 // and is still relatively efficient. avoidDeletedValueAlreadyHashed230 static unsigned avoidDeletedValue(unsigned hash) 231 { 232 ASSERT(hash); 233 unsigned newHash = hash | (!(hash + 1) << 31); 234 ASSERT(newHash); 235 ASSERT(newHash != 0xFFFFFFFF); 236 return newHash; 237 } 238 }; 239 240 } 241 242 namespace WTF { 243 244 template<> struct HashTraits<WebCore::String> : GenericHashTraits<WebCore::String> { 245 static const bool emptyValueIsZero = true; 246 static void constructDeletedValue(WebCore::String& slot) { new (&slot) WebCore::String(HashTableDeletedValue); } 247 static bool isDeletedValue(const WebCore::String& slot) { return slot.isHashTableDeletedValue(); } 248 }; 249 250 } 251 252 #endif 253