1 /*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22 #include "config.h"
23
24 #include "AtomicString.h"
25
26 #include "StringHash.h"
27 #include <wtf/HashSet.h>
28 #include <wtf/Threading.h>
29 #include <wtf/WTFThreadData.h>
30 #include <wtf/unicode/UTF8.h>
31
32 namespace WTF {
33
34 using namespace Unicode;
35
36 COMPILE_ASSERT(sizeof(AtomicString) == sizeof(String), atomic_string_and_string_must_be_same_size);
37
38 class AtomicStringTable {
39 public:
create()40 static AtomicStringTable* create()
41 {
42 AtomicStringTable* table = new AtomicStringTable;
43
44 WTFThreadData& data = wtfThreadData();
45 data.m_atomicStringTable = table;
46 data.m_atomicStringTableDestructor = AtomicStringTable::destroy;
47
48 return table;
49 }
50
table()51 HashSet<StringImpl*>& table()
52 {
53 return m_table;
54 }
55
56 private:
destroy(AtomicStringTable * table)57 static void destroy(AtomicStringTable* table)
58 {
59 HashSet<StringImpl*>::iterator end = table->m_table.end();
60 for (HashSet<StringImpl*>::iterator iter = table->m_table.begin(); iter != end; ++iter)
61 (*iter)->setIsAtomic(false);
62 delete table;
63 }
64
65 HashSet<StringImpl*> m_table;
66 };
67
stringTable()68 static inline HashSet<StringImpl*>& stringTable()
69 {
70 // Once possible we should make this non-lazy (constructed in WTFThreadData's constructor).
71 AtomicStringTable* table = wtfThreadData().atomicStringTable();
72 if (UNLIKELY(!table))
73 table = AtomicStringTable::create();
74 return table->table();
75 }
76
77 template<typename T, typename HashTranslator>
addToStringTable(const T & value)78 static inline PassRefPtr<StringImpl> addToStringTable(const T& value)
79 {
80 pair<HashSet<StringImpl*>::iterator, bool> addResult = stringTable().add<T, HashTranslator>(value);
81
82 // If the string is newly-translated, then we need to adopt it.
83 // The boolean in the pair tells us if that is so.
84 return addResult.second ? adoptRef(*addResult.first) : *addResult.first;
85 }
86
87 struct CStringTranslator {
hashWTF::CStringTranslator88 static unsigned hash(const char* c)
89 {
90 return StringHasher::computeHash(c);
91 }
92
equalWTF::CStringTranslator93 static bool equal(StringImpl* r, const char* s)
94 {
95 int length = r->length();
96 const UChar* d = r->characters();
97 for (int i = 0; i != length; ++i) {
98 unsigned char c = s[i];
99 if (d[i] != c)
100 return false;
101 }
102 return !s[length];
103 }
104
translateWTF::CStringTranslator105 static void translate(StringImpl*& location, const char* const& c, unsigned hash)
106 {
107 location = StringImpl::create(c).leakRef();
108 location->setHash(hash);
109 location->setIsAtomic(true);
110 }
111 };
112
operator ==(const AtomicString & a,const char * b)113 bool operator==(const AtomicString& a, const char* b)
114 {
115 StringImpl* impl = a.impl();
116 if ((!impl || !impl->characters()) && !b)
117 return true;
118 if ((!impl || !impl->characters()) || !b)
119 return false;
120 return CStringTranslator::equal(impl, b);
121 }
122
add(const char * c)123 PassRefPtr<StringImpl> AtomicString::add(const char* c)
124 {
125 if (!c)
126 return 0;
127 if (!*c)
128 return StringImpl::empty();
129
130 return addToStringTable<const char*, CStringTranslator>(c);
131 }
132
133 struct UCharBuffer {
134 const UChar* s;
135 unsigned length;
136 };
137
equal(StringImpl * string,const UChar * characters,unsigned length)138 static inline bool equal(StringImpl* string, const UChar* characters, unsigned length)
139 {
140 if (string->length() != length)
141 return false;
142
143 // FIXME: perhaps we should have a more abstract macro that indicates when
144 // going 4 bytes at a time is unsafe
145 #if CPU(ARM) || CPU(SH4) || CPU(MIPS) || CPU(SPARC)
146 const UChar* stringCharacters = string->characters();
147 for (unsigned i = 0; i != length; ++i) {
148 if (*stringCharacters++ != *characters++)
149 return false;
150 }
151 return true;
152 #else
153 /* Do it 4-bytes-at-a-time on architectures where it's safe */
154
155 const uint32_t* stringCharacters = reinterpret_cast<const uint32_t*>(string->characters());
156 const uint32_t* bufferCharacters = reinterpret_cast<const uint32_t*>(characters);
157
158 unsigned halfLength = length >> 1;
159 for (unsigned i = 0; i != halfLength; ++i) {
160 if (*stringCharacters++ != *bufferCharacters++)
161 return false;
162 }
163
164 if (length & 1 && *reinterpret_cast<const uint16_t*>(stringCharacters) != *reinterpret_cast<const uint16_t*>(bufferCharacters))
165 return false;
166
167 return true;
168 #endif
169 }
170
operator ==(const AtomicString & string,const Vector<UChar> & vector)171 bool operator==(const AtomicString& string, const Vector<UChar>& vector)
172 {
173 return string.impl() && equal(string.impl(), vector.data(), vector.size());
174 }
175
176 struct UCharBufferTranslator {
hashWTF::UCharBufferTranslator177 static unsigned hash(const UCharBuffer& buf)
178 {
179 return StringHasher::computeHash(buf.s, buf.length);
180 }
181
equalWTF::UCharBufferTranslator182 static bool equal(StringImpl* const& str, const UCharBuffer& buf)
183 {
184 return WTF::equal(str, buf.s, buf.length);
185 }
186
translateWTF::UCharBufferTranslator187 static void translate(StringImpl*& location, const UCharBuffer& buf, unsigned hash)
188 {
189 location = StringImpl::create(buf.s, buf.length).leakRef();
190 location->setHash(hash);
191 location->setIsAtomic(true);
192 }
193 };
194
195 struct HashAndCharacters {
196 unsigned hash;
197 const UChar* characters;
198 unsigned length;
199 };
200
201 struct HashAndCharactersTranslator {
hashWTF::HashAndCharactersTranslator202 static unsigned hash(const HashAndCharacters& buffer)
203 {
204 ASSERT(buffer.hash == StringHasher::computeHash(buffer.characters, buffer.length));
205 return buffer.hash;
206 }
207
equalWTF::HashAndCharactersTranslator208 static bool equal(StringImpl* const& string, const HashAndCharacters& buffer)
209 {
210 return WTF::equal(string, buffer.characters, buffer.length);
211 }
212
translateWTF::HashAndCharactersTranslator213 static void translate(StringImpl*& location, const HashAndCharacters& buffer, unsigned hash)
214 {
215 location = StringImpl::create(buffer.characters, buffer.length).leakRef();
216 location->setHash(hash);
217 location->setIsAtomic(true);
218 }
219 };
220
221 struct HashAndUTF8Characters {
222 unsigned hash;
223 const char* characters;
224 unsigned length;
225 unsigned utf16Length;
226 };
227
228 struct HashAndUTF8CharactersTranslator {
hashWTF::HashAndUTF8CharactersTranslator229 static unsigned hash(const HashAndUTF8Characters& buffer)
230 {
231 return buffer.hash;
232 }
233
equalWTF::HashAndUTF8CharactersTranslator234 static bool equal(StringImpl* const& string, const HashAndUTF8Characters& buffer)
235 {
236 if (buffer.utf16Length != string->length())
237 return false;
238
239 const UChar* stringCharacters = string->characters();
240
241 // If buffer contains only ASCII characters UTF-8 and UTF16 length are the same.
242 if (buffer.utf16Length != buffer.length)
243 return equalUTF16WithUTF8(stringCharacters, stringCharacters + string->length(), buffer.characters, buffer.characters + buffer.length);
244
245 for (unsigned i = 0; i < buffer.length; ++i) {
246 ASSERT(isASCII(buffer.characters[i]));
247 if (stringCharacters[i] != buffer.characters[i])
248 return false;
249 }
250
251 return true;
252 }
253
translateWTF::HashAndUTF8CharactersTranslator254 static void translate(StringImpl*& location, const HashAndUTF8Characters& buffer, unsigned hash)
255 {
256 UChar* target;
257 location = StringImpl::createUninitialized(buffer.utf16Length, target).releaseRef();
258
259 const char* source = buffer.characters;
260 if (convertUTF8ToUTF16(&source, source + buffer.length, &target, target + buffer.utf16Length) != conversionOK)
261 ASSERT_NOT_REACHED();
262
263 location->setHash(hash);
264 location->setIsAtomic(true);
265 }
266 };
267
add(const UChar * s,unsigned length)268 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length)
269 {
270 if (!s)
271 return 0;
272
273 if (!length)
274 return StringImpl::empty();
275
276 UCharBuffer buffer = { s, length };
277 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
278 }
279
add(const UChar * s,unsigned length,unsigned existingHash)280 PassRefPtr<StringImpl> AtomicString::add(const UChar* s, unsigned length, unsigned existingHash)
281 {
282 ASSERT(s);
283 ASSERT(existingHash);
284
285 if (!length)
286 return StringImpl::empty();
287
288 HashAndCharacters buffer = { existingHash, s, length };
289 return addToStringTable<HashAndCharacters, HashAndCharactersTranslator>(buffer);
290 }
291
add(const UChar * s)292 PassRefPtr<StringImpl> AtomicString::add(const UChar* s)
293 {
294 if (!s)
295 return 0;
296
297 int length = 0;
298 while (s[length] != UChar(0))
299 length++;
300
301 if (!length)
302 return StringImpl::empty();
303
304 UCharBuffer buffer = { s, length };
305 return addToStringTable<UCharBuffer, UCharBufferTranslator>(buffer);
306 }
307
addSlowCase(StringImpl * r)308 PassRefPtr<StringImpl> AtomicString::addSlowCase(StringImpl* r)
309 {
310 if (!r || r->isAtomic())
311 return r;
312
313 if (!r->length())
314 return StringImpl::empty();
315
316 StringImpl* result = *stringTable().add(r).first;
317 if (result == r)
318 r->setIsAtomic(true);
319 return result;
320 }
321
find(const UChar * s,unsigned length,unsigned existingHash)322 AtomicStringImpl* AtomicString::find(const UChar* s, unsigned length, unsigned existingHash)
323 {
324 ASSERT(s);
325 ASSERT(existingHash);
326
327 if (!length)
328 return static_cast<AtomicStringImpl*>(StringImpl::empty());
329
330 HashAndCharacters buffer = { existingHash, s, length };
331 HashSet<StringImpl*>::iterator iterator = stringTable().find<HashAndCharacters, HashAndCharactersTranslator>(buffer);
332 if (iterator == stringTable().end())
333 return 0;
334 return static_cast<AtomicStringImpl*>(*iterator);
335 }
336
remove(StringImpl * r)337 void AtomicString::remove(StringImpl* r)
338 {
339 stringTable().remove(r);
340 }
341
lower() const342 AtomicString AtomicString::lower() const
343 {
344 // Note: This is a hot function in the Dromaeo benchmark.
345 StringImpl* impl = this->impl();
346 if (UNLIKELY(!impl))
347 return *this;
348 RefPtr<StringImpl> newImpl = impl->lower();
349 if (LIKELY(newImpl == impl))
350 return *this;
351 return AtomicString(newImpl);
352 }
353
fromUTF8Internal(const char * charactersStart,const char * charactersEnd)354 AtomicString AtomicString::fromUTF8Internal(const char* charactersStart, const char* charactersEnd)
355 {
356 HashAndUTF8Characters buffer;
357 buffer.characters = charactersStart;
358 buffer.hash = calculateStringHashAndLengthFromUTF8(charactersStart, charactersEnd, buffer.length, buffer.utf16Length);
359
360 if (!buffer.hash)
361 return nullAtom;
362
363 AtomicString atomicString;
364 atomicString.m_string = addToStringTable<HashAndUTF8Characters, HashAndUTF8CharactersTranslator>(buffer);
365 return atomicString;
366 }
367
368 } // namespace WTF
369