1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23 #ifndef StringImpl_h
24 #define StringImpl_h
25
26 #include <limits.h>
27 #include <wtf/ASCIICType.h>
28 #include <wtf/CrossThreadRefCounted.h>
29 #include <wtf/Forward.h>
30 #include <wtf/OwnFastMallocPtr.h>
31 #include <wtf/StdLibExtras.h>
32 #include <wtf/StringHasher.h>
33 #include <wtf/Vector.h>
34 #include <wtf/text/StringImplBase.h>
35 #include <wtf/unicode/Unicode.h>
36
37 #if USE(CF)
38 typedef const struct __CFString * CFStringRef;
39 #endif
40
41 #ifdef __OBJC__
42 @class NSString;
43 #endif
44
45 // FIXME: This is a temporary layering violation while we move string code to WTF.
46 // Landing the file moves in one patch, will follow on with patches to change the namespaces.
47 namespace JSC {
48 struct IdentifierCStringTranslator;
49 struct IdentifierUCharBufferTranslator;
50 }
51
52 namespace WTF {
53
54 struct CStringTranslator;
55 struct HashAndCharactersTranslator;
56 struct HashAndUTF8CharactersTranslator;
57 struct UCharBufferTranslator;
58
59 enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
60
61 typedef OwnFastMallocPtr<const UChar> SharableUChar;
62 typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
63 typedef bool (*CharacterMatchFunctionPtr)(UChar);
64
65 class StringImpl : public StringImplBase {
66 friend struct JSC::IdentifierCStringTranslator;
67 friend struct JSC::IdentifierUCharBufferTranslator;
68 friend struct WTF::CStringTranslator;
69 friend struct WTF::HashAndCharactersTranslator;
70 friend struct WTF::HashAndUTF8CharactersTranslator;
71 friend struct WTF::UCharBufferTranslator;
72 friend class AtomicStringImpl;
73 private:
74 // Used to construct static strings, which have an special refCount that can never hit zero.
75 // This means that the static string will never be destroyed, which is important because
76 // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
StringImpl(const UChar * characters,unsigned length,StaticStringConstructType)77 StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
78 : StringImplBase(length, ConstructStaticString)
79 , m_data(characters)
80 , m_buffer(0)
81 , m_hash(0)
82 {
83 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
84 // with impunity. The empty string is special because it is never entered into
85 // AtomicString's HashKey, but still needs to compare correctly.
86 hash();
87 }
88
89 // Create a normal string with internal storage (BufferInternal)
StringImpl(unsigned length)90 StringImpl(unsigned length)
91 : StringImplBase(length, BufferInternal)
92 , m_data(reinterpret_cast<const UChar*>(this + 1))
93 , m_buffer(0)
94 , m_hash(0)
95 {
96 ASSERT(m_data);
97 ASSERT(m_length);
98 }
99
100 // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
StringImpl(const UChar * characters,unsigned length)101 StringImpl(const UChar* characters, unsigned length)
102 : StringImplBase(length, BufferOwned)
103 , m_data(characters)
104 , m_buffer(0)
105 , m_hash(0)
106 {
107 ASSERT(m_data);
108 ASSERT(m_length);
109 }
110
111 // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
StringImpl(const UChar * characters,unsigned length,PassRefPtr<StringImpl> base)112 StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
113 : StringImplBase(length, BufferSubstring)
114 , m_data(characters)
115 , m_substringBuffer(base.leakRef())
116 , m_hash(0)
117 {
118 ASSERT(m_data);
119 ASSERT(m_length);
120 ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
121 }
122
123 // Used to construct new strings sharing an existing SharedUChar (BufferShared)
StringImpl(const UChar * characters,unsigned length,PassRefPtr<SharedUChar> sharedBuffer)124 StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
125 : StringImplBase(length, BufferShared)
126 , m_data(characters)
127 , m_sharedBuffer(sharedBuffer.leakRef())
128 , m_hash(0)
129 {
130 ASSERT(m_data);
131 ASSERT(m_length);
132 }
133
134 // For use only by AtomicString's XXXTranslator helpers.
setHash(unsigned hash)135 void setHash(unsigned hash)
136 {
137 ASSERT(!isStatic());
138 ASSERT(!m_hash);
139 ASSERT(hash == StringHasher::computeHash(m_data, m_length));
140 m_hash = hash;
141 }
142
143 public:
144 ~StringImpl();
145
146 static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
147 static PassRefPtr<StringImpl> create(const char*, unsigned length);
148 static PassRefPtr<StringImpl> create(const char*);
149 static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
create(PassRefPtr<StringImpl> rep,unsigned offset,unsigned length)150 static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
151 {
152 ASSERT(rep);
153 ASSERT(length <= rep->length());
154
155 if (!length)
156 return empty();
157
158 StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
159 return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
160 }
161
162 static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
tryCreateUninitialized(unsigned length,UChar * & output)163 static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
164 {
165 if (!length) {
166 output = 0;
167 return empty();
168 }
169
170 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
171 output = 0;
172 return 0;
173 }
174 StringImpl* resultImpl;
175 if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
176 output = 0;
177 return 0;
178 }
179 output = reinterpret_cast<UChar*>(resultImpl + 1);
180 return adoptRef(new(resultImpl) StringImpl(length));
181 }
182
dataOffset()183 static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
184 static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
185 static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
186
187 template<size_t inlineCapacity>
adopt(Vector<UChar,inlineCapacity> & vector)188 static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
189 {
190 if (size_t size = vector.size()) {
191 ASSERT(vector.data());
192 if (size > std::numeric_limits<unsigned>::max())
193 CRASH();
194 return adoptRef(new StringImpl(vector.releaseBuffer(), size));
195 }
196 return empty();
197 }
198 static PassRefPtr<StringImpl> adopt(StringBuffer&);
199
200 SharedUChar* sharedBuffer();
characters()201 const UChar* characters() const { return m_data; }
202
cost()203 size_t cost()
204 {
205 // For substrings, return the cost of the base string.
206 if (bufferOwnership() == BufferSubstring)
207 return m_substringBuffer->cost();
208
209 if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
210 m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
211 return m_length;
212 }
213 return 0;
214 }
215
isIdentifier()216 bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
setIsIdentifier(bool isIdentifier)217 void setIsIdentifier(bool isIdentifier)
218 {
219 ASSERT(!isStatic());
220 if (isIdentifier)
221 m_refCountAndFlags |= s_refCountFlagIsIdentifier;
222 else
223 m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
224 }
225
hasTerminatingNullCharacter()226 bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
227
isAtomic()228 bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
setIsAtomic(bool isIdentifier)229 void setIsAtomic(bool isIdentifier)
230 {
231 ASSERT(!isStatic());
232 if (isIdentifier)
233 m_refCountAndFlags |= s_refCountFlagIsAtomic;
234 else
235 m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
236 }
237
hash()238 unsigned hash() const { if (!m_hash) m_hash = StringHasher::computeHash(m_data, m_length); return m_hash; }
existingHash()239 unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
240
deref()241 ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
hasOneRef()242 ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
243
244 static StringImpl* empty();
245
copyChars(UChar * destination,const UChar * source,unsigned numCharacters)246 static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
247 {
248 if (numCharacters <= s_copyCharsInlineCutOff) {
249 for (unsigned i = 0; i < numCharacters; ++i)
250 destination[i] = source[i];
251 } else
252 memcpy(destination, source, numCharacters * sizeof(UChar));
253 }
254
255 // Returns a StringImpl suitable for use on another thread.
256 PassRefPtr<StringImpl> crossThreadString();
257 // Makes a deep copy. Helpful only if you need to use a String on another thread
258 // (use crossThreadString if the method call doesn't need to be threadsafe).
259 // Since StringImpl objects are immutable, there's no other reason to make a copy.
260 PassRefPtr<StringImpl> threadsafeCopy() const;
261
262 PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
263
264 UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
265 UChar32 characterStartingAt(unsigned);
266
267 bool containsOnlyWhitespace();
268
269 int toIntStrict(bool* ok = 0, int base = 10);
270 unsigned toUIntStrict(bool* ok = 0, int base = 10);
271 int64_t toInt64Strict(bool* ok = 0, int base = 10);
272 uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
273 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
274
275 int toInt(bool* ok = 0); // ignores trailing garbage
276 unsigned toUInt(bool* ok = 0); // ignores trailing garbage
277 int64_t toInt64(bool* ok = 0); // ignores trailing garbage
278 uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
279 intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
280
281 double toDouble(bool* ok = 0, bool* didReadNumber = 0);
282 float toFloat(bool* ok = 0, bool* didReadNumber = 0);
283
284 PassRefPtr<StringImpl> lower();
285 PassRefPtr<StringImpl> upper();
286
287 enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter };
288
289 PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter);
290 PassRefPtr<StringImpl> foldCase();
291
292 PassRefPtr<StringImpl> stripWhiteSpace();
293 PassRefPtr<StringImpl> simplifyWhiteSpace();
294
295 PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
296
297 size_t find(UChar, unsigned index = 0);
298 size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
299 size_t find(const char*, unsigned index = 0);
300 size_t find(StringImpl*, unsigned index = 0);
301 size_t findIgnoringCase(const char*, unsigned index = 0);
302 size_t findIgnoringCase(StringImpl*, unsigned index = 0);
303
304 size_t reverseFind(UChar, unsigned index = UINT_MAX);
305 size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
306 size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
307
308 bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
309 bool endsWith(StringImpl*, bool caseSensitive = true);
310
311 PassRefPtr<StringImpl> replace(UChar, UChar);
312 PassRefPtr<StringImpl> replace(UChar, StringImpl*);
313 PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
314 PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
315
316 WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0);
317
318 #if USE(CF)
319 CFStringRef createCFString();
320 #endif
321 #ifdef __OBJC__
322 operator NSString*();
323 #endif
324
325 private:
326 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
327 static const unsigned s_copyCharsInlineCutOff = 20;
328
329 static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
330
bufferOwnership()331 BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
isStatic()332 bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
333 const UChar* m_data;
334 union {
335 void* m_buffer;
336 StringImpl* m_substringBuffer;
337 SharedUChar* m_sharedBuffer;
338 };
339 mutable unsigned m_hash;
340 };
341
342 bool equal(const StringImpl*, const StringImpl*);
343 bool equal(const StringImpl*, const char*);
equal(const char * a,StringImpl * b)344 inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
345
346 bool equalIgnoringCase(StringImpl*, StringImpl*);
347 bool equalIgnoringCase(StringImpl*, const char*);
equalIgnoringCase(const char * a,StringImpl * b)348 inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
349 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
equalIgnoringCase(const char * a,const UChar * b,unsigned length)350 inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
351
352 bool equalIgnoringNullity(StringImpl*, StringImpl*);
353
354 template<size_t inlineCapacity>
equalIgnoringNullity(const Vector<UChar,inlineCapacity> & a,StringImpl * b)355 bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
356 {
357 if (!b)
358 return !a.size();
359 if (a.size() != b->length())
360 return false;
361 return !memcmp(a.data(), b->characters(), b->length());
362 }
363
364 int codePointCompare(const StringImpl*, const StringImpl*);
365
isSpaceOrNewline(UChar c)366 static inline bool isSpaceOrNewline(UChar c)
367 {
368 // Use isASCIISpace() for basic Latin-1.
369 // This will include newlines, which aren't included in Unicode DirWS.
370 return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
371 }
372
373 // This is a hot function because it's used when parsing HTML.
createStrippingNullCharacters(const UChar * characters,unsigned length)374 inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
375 {
376 ASSERT(characters);
377 ASSERT(length);
378
379 // Optimize for the case where there are no Null characters by quickly
380 // searching for nulls, and then using StringImpl::create, which will
381 // memcpy the whole buffer. This is faster than assigning character by
382 // character during the loop.
383
384 // Fast case.
385 int foundNull = 0;
386 for (unsigned i = 0; !foundNull && i < length; i++) {
387 int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
388 foundNull |= !c;
389 }
390 if (!foundNull)
391 return StringImpl::create(characters, length);
392
393 return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
394 }
395
396 struct StringHash;
397
398 // StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
399 template<typename T> struct DefaultHash;
400 template<> struct DefaultHash<StringImpl*> {
401 typedef StringHash Hash;
402 };
403 template<> struct DefaultHash<RefPtr<StringImpl> > {
404 typedef StringHash Hash;
405 };
406
407 }
408
409 using WTF::StringImpl;
410 using WTF::equal;
411 using WTF::TextCaseSensitivity;
412 using WTF::TextCaseSensitive;
413 using WTF::TextCaseInsensitive;
414
415 #endif
416