1 /* 2 * Copyright (C) 2005, 2006, 2008, 2010, 2013 Apple Inc. All rights reserved. 3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com> 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef WTF_StringHasher_h 23 #define WTF_StringHasher_h 24 25 #include "wtf/unicode/Unicode.h" 26 27 namespace WTF { 28 29 // Paul Hsieh's SuperFastHash 30 // http://www.azillionmonkeys.com/qed/hash.html 31 32 // LChar data is interpreted as Latin-1-encoded (zero extended to 16 bits). 33 34 // NOTE: The hash computation here must stay in sync with the create_hash_table script in 35 // JavaScriptCore and the CodeGeneratorJS.pm script in WebCore. 36 37 // Golden ratio. Arbitrary start value to avoid mapping all zeros to a hash value of zero. 38 static const unsigned stringHashingStartValue = 0x9E3779B9U; 39 40 class StringHasher { 41 public: 42 static const unsigned flagCount = 8; // Save 8 bits for StringImpl to use as flags. 43 StringHasher()44 StringHasher() 45 : m_hash(stringHashingStartValue) 46 , m_hasPendingCharacter(false) 47 , m_pendingCharacter(0) 48 { 49 } 50 51 // The hasher hashes two characters at a time, and thus an "aligned" hasher is one 52 // where an even number of characters have been added. Callers that always add 53 // characters two at a time can use the "assuming aligned" functions. addCharactersAssumingAligned(UChar a,UChar b)54 void addCharactersAssumingAligned(UChar a, UChar b) 55 { 56 ASSERT(!m_hasPendingCharacter); 57 m_hash += a; 58 m_hash = (m_hash << 16) ^ ((b << 11) ^ m_hash); 59 m_hash += m_hash >> 11; 60 } 61 addCharacter(UChar character)62 void addCharacter(UChar character) 63 { 64 if (m_hasPendingCharacter) { 65 m_hasPendingCharacter = false; 66 addCharactersAssumingAligned(m_pendingCharacter, character); 67 return; 68 } 69 70 m_pendingCharacter = character; 71 m_hasPendingCharacter = true; 72 } 73 addCharacters(UChar a,UChar b)74 void addCharacters(UChar a, UChar b) 75 { 76 if (m_hasPendingCharacter) { 77 #if ENABLE(ASSERT) 78 m_hasPendingCharacter = false; 79 #endif 80 addCharactersAssumingAligned(m_pendingCharacter, a); 81 m_pendingCharacter = b; 82 #if ENABLE(ASSERT) 83 m_hasPendingCharacter = true; 84 #endif 85 return; 86 } 87 88 addCharactersAssumingAligned(a, b); 89 } 90 addCharactersAssumingAligned(const T * data,unsigned length)91 template<typename T, UChar Converter(T)> void addCharactersAssumingAligned(const T* data, unsigned length) 92 { 93 ASSERT(!m_hasPendingCharacter); 94 95 bool remainder = length & 1; 96 length >>= 1; 97 98 while (length--) { 99 addCharactersAssumingAligned(Converter(data[0]), Converter(data[1])); 100 data += 2; 101 } 102 103 if (remainder) 104 addCharacter(Converter(*data)); 105 } 106 addCharactersAssumingAligned(const T * data,unsigned length)107 template<typename T> void addCharactersAssumingAligned(const T* data, unsigned length) 108 { 109 addCharactersAssumingAligned<T, defaultConverter>(data, length); 110 } 111 addCharacters(const T * data,unsigned length)112 template<typename T, UChar Converter(T)> void addCharacters(const T* data, unsigned length) 113 { 114 if (m_hasPendingCharacter && length) { 115 m_hasPendingCharacter = false; 116 addCharactersAssumingAligned(m_pendingCharacter, Converter(*data++)); 117 --length; 118 } 119 addCharactersAssumingAligned<T, Converter>(data, length); 120 } 121 addCharacters(const T * data,unsigned length)122 template<typename T> void addCharacters(const T* data, unsigned length) 123 { 124 addCharacters<T, defaultConverter>(data, length); 125 } 126 hashWithTop8BitsMasked()127 unsigned hashWithTop8BitsMasked() const 128 { 129 unsigned result = avalancheBits(); 130 131 // Reserving space from the high bits for flags preserves most of the hash's 132 // value, since hash lookup typically masks out the high bits anyway. 133 result &= (1U << (sizeof(result) * 8 - flagCount)) - 1; 134 135 // This avoids ever returning a hash code of 0, since that is used to 136 // signal "hash not computed yet". Setting the high bit maintains 137 // reasonable fidelity to a hash code of 0 because it is likely to yield 138 // exactly 0 when hash lookup masks out the high bits. 139 if (!result) 140 result = 0x80000000 >> flagCount; 141 142 return result; 143 } 144 hash()145 unsigned hash() const 146 { 147 unsigned result = avalancheBits(); 148 149 // This avoids ever returning a hash code of 0, since that is used to 150 // signal "hash not computed yet". Setting the high bit maintains 151 // reasonable fidelity to a hash code of 0 because it is likely to yield 152 // exactly 0 when hash lookup masks out the high bits. 153 if (!result) 154 result = 0x80000000; 155 156 return result; 157 } 158 computeHashAndMaskTop8Bits(const T * data,unsigned length)159 template<typename T, UChar Converter(T)> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) 160 { 161 StringHasher hasher; 162 hasher.addCharactersAssumingAligned<T, Converter>(data, length); 163 return hasher.hashWithTop8BitsMasked(); 164 } 165 computeHashAndMaskTop8Bits(const T * data,unsigned length)166 template<typename T> static unsigned computeHashAndMaskTop8Bits(const T* data, unsigned length) 167 { 168 return computeHashAndMaskTop8Bits<T, defaultConverter>(data, length); 169 } 170 computeHash(const T * data,unsigned length)171 template<typename T, UChar Converter(T)> static unsigned computeHash(const T* data, unsigned length) 172 { 173 StringHasher hasher; 174 hasher.addCharactersAssumingAligned<T, Converter>(data, length); 175 return hasher.hash(); 176 } 177 computeHash(const T * data,unsigned length)178 template<typename T> static unsigned computeHash(const T* data, unsigned length) 179 { 180 return computeHash<T, defaultConverter>(data, length); 181 } 182 hashMemory(const void * data,unsigned length)183 static unsigned hashMemory(const void* data, unsigned length) 184 { 185 // FIXME: Why does this function use the version of the hash that drops the top 8 bits? 186 // We want that for all string hashing so we can use those bits in StringImpl and hash 187 // strings consistently, but I don't see why we'd want that for general memory hashing. 188 ASSERT(!(length % 2)); 189 return computeHashAndMaskTop8Bits<UChar>(static_cast<const UChar*>(data), length / sizeof(UChar)); 190 } 191 hashMemory(const void * data)192 template<size_t length> static unsigned hashMemory(const void* data) 193 { 194 COMPILE_ASSERT(!(length % 2), length_must_be_a_multiple_of_two); 195 return hashMemory(data, length); 196 } 197 198 private: defaultConverter(UChar character)199 static UChar defaultConverter(UChar character) 200 { 201 return character; 202 } 203 defaultConverter(LChar character)204 static UChar defaultConverter(LChar character) 205 { 206 return character; 207 } 208 avalancheBits()209 unsigned avalancheBits() const 210 { 211 unsigned result = m_hash; 212 213 // Handle end case. 214 if (m_hasPendingCharacter) { 215 result += m_pendingCharacter; 216 result ^= result << 11; 217 result += result >> 17; 218 } 219 220 // Force "avalanching" of final 31 bits. 221 result ^= result << 3; 222 result += result >> 5; 223 result ^= result << 2; 224 result += result >> 15; 225 result ^= result << 10; 226 227 return result; 228 } 229 230 unsigned m_hash; 231 bool m_hasPendingCharacter; 232 UChar m_pendingCharacter; 233 }; 234 235 } // namespace WTF 236 237 using WTF::StringHasher; 238 239 #endif // WTF_StringHasher_h 240