• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * (C) 1999 Lars Knoll (knoll@kde.org)
3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public License
16  * along with this library; see the file COPYING.LIB.  If not, write to
17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA 02110-1301, USA.
19  *
20  */
21 
22 #ifndef WTFString_h
23 #define WTFString_h
24 
25 // This file would be called String.h, but that conflicts with <string.h>
26 // on systems without case-sensitive file systems.
27 
28 #include "StringImpl.h"
29 
30 #ifdef __OBJC__
31 #include <objc/objc.h>
32 #endif
33 
34 #if USE(CF)
35 typedef const struct __CFString * CFStringRef;
36 #endif
37 
38 #if PLATFORM(QT)
39 QT_BEGIN_NAMESPACE
40 class QString;
41 QT_END_NAMESPACE
42 #include <QDataStream>
43 #endif
44 
45 #if PLATFORM(WX)
46 class wxString;
47 #endif
48 
49 #if PLATFORM(HAIKU)
50 class BString;
51 #endif
52 
53 #if PLATFORM(BREWMP)
54 // AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts.
55 #ifndef _AECHAR_DEFINED
56 typedef uint16             AECHAR;
57 #define _AECHAR_DEFINED
58 #endif
59 #endif
60 
61 namespace WTF {
62 
63 class CString;
64 struct StringHash;
65 
66 // Declarations of string operations
67 
68 bool charactersAreAllASCII(const UChar*, size_t);
69 bool charactersAreAllLatin1(const UChar*, size_t);
70 int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
71 unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
72 int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
73 uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
74 intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
75 
76 int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
77 unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
78 int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
79 uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
80 intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
81 
82 double charactersToDouble(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
83 float charactersToFloat(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
84 
85 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters(const UChar*, size_t);
86 
87 class String {
88 public:
89     // Construct a null string, distinguishable from an empty string.
String()90     String() { }
91 
92     // Construct a string with UTF-16 data.
93     String(const UChar* characters, unsigned length);
94 
95     // Construct a string by copying the contents of a vector.  To avoid
96     // copying, consider using String::adopt instead.
97     template<size_t inlineCapacity>
98     explicit String(const Vector<UChar, inlineCapacity>&);
99 
100     // Construct a string with UTF-16 data, from a null-terminated source.
101     String(const UChar*);
102 
103     // Construct a string with latin1 data.
104     String(const char* characters, unsigned length);
105 
106     // Construct a string with latin1 data, from a null-terminated source.
107     String(const char* characters);
108 
109     // Construct a string referencing an existing StringImpl.
String(StringImpl * impl)110     String(StringImpl* impl) : m_impl(impl) { }
String(PassRefPtr<StringImpl> impl)111     String(PassRefPtr<StringImpl> impl) : m_impl(impl) { }
String(RefPtr<StringImpl> impl)112     String(RefPtr<StringImpl> impl) : m_impl(impl) { }
113 
114     // Inline the destructor.
~String()115     ALWAYS_INLINE ~String() { }
116 
swap(String & o)117     void swap(String& o) { m_impl.swap(o.m_impl); }
118 
adopt(StringBuffer & buffer)119     static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
120     template<size_t inlineCapacity>
adopt(Vector<UChar,inlineCapacity> & vector)121     static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); }
122 
isNull()123     bool isNull() const { return !m_impl; }
isEmpty()124     bool isEmpty() const { return !m_impl || !m_impl->length(); }
125 
impl()126     StringImpl* impl() const { return m_impl.get(); }
127 
length()128     unsigned length() const
129     {
130         if (!m_impl)
131             return 0;
132         return m_impl->length();
133     }
134 
characters()135     const UChar* characters() const
136     {
137         if (!m_impl)
138             return 0;
139         return m_impl->characters();
140     }
141 
142     CString ascii() const;
143     CString latin1() const;
144     CString utf8(bool strict = false) const;
145 
146     UChar operator[](unsigned index) const
147     {
148         if (!m_impl || index >= m_impl->length())
149             return 0;
150         return m_impl->characters()[index];
151     }
152 
153     static String number(short);
154     static String number(unsigned short);
155     static String number(int);
156     static String number(unsigned);
157     static String number(long);
158     static String number(unsigned long);
159     static String number(long long);
160     static String number(unsigned long long);
161     static String number(double);
162 
163     // Find a single character or string, also with match function & latin1 forms.
164     size_t find(UChar c, unsigned start = 0) const
165         { return m_impl ? m_impl->find(c, start) : notFound; }
166     size_t find(const String& str, unsigned start = 0) const
167         { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
168     size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
169         { return m_impl ? m_impl->find(matchFunction, start) : notFound; }
170     size_t find(const char* str, unsigned start = 0) const
171         { return m_impl ? m_impl->find(str, start) : notFound; }
172 
173     // Find the last instance of a single character or string.
174     size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
175         { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
176     size_t reverseFind(const String& str, unsigned start = UINT_MAX) const
177         { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
178 
179     // Case insensitive string matching.
180     size_t findIgnoringCase(const char* str, unsigned start = 0) const
181         { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; }
182     size_t findIgnoringCase(const String& str, unsigned start = 0) const
183         { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; }
184     size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const
185         { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; }
186 
187     // Wrappers for find & reverseFind adding dynamic sensitivity check.
find(const char * str,unsigned start,bool caseSensitive)188     size_t find(const char* str, unsigned start, bool caseSensitive) const
189         { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
find(const String & str,unsigned start,bool caseSensitive)190     size_t find(const String& str, unsigned start, bool caseSensitive) const
191         { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
reverseFind(const String & str,unsigned start,bool caseSensitive)192     size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const
193         { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); }
194 
195     const UChar* charactersWithNullTermination();
196 
197     UChar32 characterStartingAt(unsigned) const; // Ditto.
198 
contains(UChar c)199     bool contains(UChar c) const { return find(c) != notFound; }
200     bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
201     bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
202 
203     bool startsWith(const String& s, bool caseSensitive = true) const
204         { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
205     bool endsWith(const String& s, bool caseSensitive = true) const
206         { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
207 
208     void append(const String&);
209     void append(char);
210     void append(UChar);
211     void append(const UChar*, unsigned length);
212     void insert(const String&, unsigned pos);
213     void insert(const UChar*, unsigned length, unsigned pos);
214 
replace(UChar a,UChar b)215     String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
replace(UChar a,const String & b)216     String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
replace(const String & a,const String & b)217     String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
replace(unsigned index,unsigned len,const String & b)218     String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
219 
makeLower()220     void makeLower() { if (m_impl) m_impl = m_impl->lower(); }
makeUpper()221     void makeUpper() { if (m_impl) m_impl = m_impl->upper(); }
makeSecure(UChar aChar)222     void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); }
223 
224     void truncate(unsigned len);
225     void remove(unsigned pos, int len = 1);
226 
227     String substring(unsigned pos, unsigned len = UINT_MAX) const;
228     String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const;
left(unsigned len)229     String left(unsigned len) const { return substring(0, len); }
right(unsigned len)230     String right(unsigned len) const { return substring(length() - len, len); }
231 
232     // Returns a lowercase/uppercase version of the string
233     String lower() const;
234     String upper() const;
235 
236     String stripWhiteSpace() const;
237     String simplifyWhiteSpace() const;
238 
239     String removeCharacters(CharacterMatchFunctionPtr) const;
240     template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
241 
242     // Return the string with case folded for case insensitive comparison.
243     String foldCase() const;
244 
245 #if !PLATFORM(QT)
246     static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
247 #else
248     static String format(const char *, ...);
249 #endif
250 
251     // Returns an uninitialized string. The characters needs to be written
252     // into the buffer returned in data before the returned string is used.
253     // Failure to do this will have unpredictable results.
createUninitialized(unsigned length,UChar * & data)254     static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
255 
256     void split(const String& separator, Vector<String>& result) const;
257     void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
258     void split(UChar separator, Vector<String>& result) const;
259     void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
260 
261     int toIntStrict(bool* ok = 0, int base = 10) const;
262     unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
263     int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
264     uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
265     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
266 
267     int toInt(bool* ok = 0) const;
268     unsigned toUInt(bool* ok = 0) const;
269     int64_t toInt64(bool* ok = 0) const;
270     uint64_t toUInt64(bool* ok = 0) const;
271     intptr_t toIntPtr(bool* ok = 0) const;
272     double toDouble(bool* ok = 0, bool* didReadNumber = 0) const;
273     float toFloat(bool* ok = 0, bool* didReadNumber = 0) const;
274 
275     bool percentage(int& percentage) const;
276 
277     // Returns a StringImpl suitable for use on another thread.
278     String crossThreadString() const;
279     // Makes a deep copy. Helpful only if you need to use a String on another thread
280     // (use crossThreadString if the method call doesn't need to be threadsafe).
281     // Since the underlying StringImpl objects are immutable, there's no other reason
282     // to ever prefer copy() over plain old assignment.
283     String threadsafeCopy() const;
284 
285     // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
286     // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
287     typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA);
288     typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB);
289     operator UnspecifiedBoolTypeA() const;
290     operator UnspecifiedBoolTypeB() const;
291 
292 #if USE(CF)
293     String(CFStringRef);
294     CFStringRef createCFString() const;
295 #endif
296 
297 #ifdef __OBJC__
298     String(NSString*);
299 
300     // This conversion maps NULL to "", which loses the meaning of NULL, but we
301     // need this mapping because AppKit crashes when passed nil NSStrings.
302     operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
303 #endif
304 
305 #if PLATFORM(QT)
306     String(const QString&);
307     String(const QStringRef&);
308     operator QString() const;
309 #endif
310 
311 #if PLATFORM(WX)
312     String(const wxString&);
313     operator wxString() const;
314 #endif
315 
316 #if PLATFORM(HAIKU)
317     String(const BString&);
318     operator BString() const;
319 #endif
320 
321 #if PLATFORM(BREWMP)
322     String(const AECHAR*);
323 #endif
324 
325     // String::fromUTF8 will return a null string if
326     // the input data contains invalid UTF-8 sequences.
327     static String fromUTF8(const char*, size_t);
328     static String fromUTF8(const char*);
329 
330     // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
331     static String fromUTF8WithLatin1Fallback(const char*, size_t);
332 
333     // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
334     WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0) const
335     {
336         if (m_impl)
337             return m_impl->defaultWritingDirection(hasStrongDirectionality);
338         if (hasStrongDirectionality)
339             *hasStrongDirectionality = false;
340         return WTF::Unicode::LeftToRight;
341     }
342 
containsOnlyASCII()343     bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
containsOnlyLatin1()344     bool containsOnlyLatin1() const { return charactersAreAllLatin1(characters(), length()); }
345 
346     // Hash table deleted values, which are only constructed and never copied or destroyed.
String(WTF::HashTableDeletedValueType)347     String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
isHashTableDeletedValue()348     bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
349 
350 private:
351     RefPtr<StringImpl> m_impl;
352 };
353 
354 #if PLATFORM(QT)
355 QDataStream& operator<<(QDataStream& stream, const String& str);
356 QDataStream& operator>>(QDataStream& stream, String& str);
357 #endif
358 
359 String operator+(const String&, const String&);
360 String operator+(const String&, const char*);
361 String operator+(const char*, const String&);
362 
363 inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
364 
365 inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
366 inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
367 inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
368 
369 inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
370 inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
371 inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
372 
equalIgnoringCase(const String & a,const String & b)373 inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
equalIgnoringCase(const String & a,const char * b)374 inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
equalIgnoringCase(const char * a,const String & b)375 inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
376 
equalPossiblyIgnoringCase(const String & a,const String & b,bool ignoreCase)377 inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
378 {
379     return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
380 }
381 
equalIgnoringNullity(const String & a,const String & b)382 inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
383 
384 template<size_t inlineCapacity>
equalIgnoringNullity(const Vector<UChar,inlineCapacity> & a,const String & b)385 inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); }
386 
387 inline bool operator!(const String& str) { return str.isNull(); }
388 
swap(String & a,String & b)389 inline void swap(String& a, String& b) { a.swap(b); }
390 
391 // Definitions of string operations
392 
393 template<size_t inlineCapacity>
String(const Vector<UChar,inlineCapacity> & vector)394 String::String(const Vector<UChar, inlineCapacity>& vector)
395     : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : 0)
396 {
397 }
398 
399 #ifdef __OBJC__
400 // This is for situations in WebKit where the long standing behavior has been
401 // "nil if empty", so we try to maintain longstanding behavior for the sake of
402 // entrenched clients
nsStringNilIfEmpty(const String & str)403 inline NSString* nsStringNilIfEmpty(const String& str) {  return str.isEmpty() ? nil : (NSString*)str; }
404 #endif
405 
charactersAreAllASCII(const UChar * characters,size_t length)406 inline bool charactersAreAllASCII(const UChar* characters, size_t length)
407 {
408     UChar ored = 0;
409     for (size_t i = 0; i < length; ++i)
410         ored |= characters[i];
411     return !(ored & 0xFF80);
412 }
413 
charactersAreAllLatin1(const UChar * characters,size_t length)414 inline bool charactersAreAllLatin1(const UChar* characters, size_t length)
415 {
416     UChar ored = 0;
417     for (size_t i = 0; i < length; ++i)
418         ored |= characters[i];
419     return !(ored & 0xFF00);
420 }
421 
422 int codePointCompare(const String&, const String&);
423 
424 inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
425 {
426     while (index < length) {
427         if (characters[index] == matchCharacter)
428             return index;
429         ++index;
430     }
431     return notFound;
432 }
433 
434 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
435 {
436     while (index < length) {
437         if (matchFunction(characters[index]))
438             return index;
439         ++index;
440     }
441     return notFound;
442 }
443 
444 inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
445 {
446     if (!length)
447         return notFound;
448     if (index >= length)
449         index = length - 1;
450     while (characters[index] != matchCharacter) {
451         if (!index--)
452             return notFound;
453     }
454     return index;
455 }
456 
append(Vector<UChar> & vector,const String & string)457 inline void append(Vector<UChar>& vector, const String& string)
458 {
459     vector.append(string.characters(), string.length());
460 }
461 
appendNumber(Vector<UChar> & vector,unsigned char number)462 inline void appendNumber(Vector<UChar>& vector, unsigned char number)
463 {
464     int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
465     size_t vectorSize = vector.size();
466     vector.grow(vectorSize + numberLength);
467 
468     switch (numberLength) {
469     case 3:
470         vector[vectorSize + 2] = number % 10 + '0';
471         number /= 10;
472 
473     case 2:
474         vector[vectorSize + 1] = number % 10 + '0';
475         number /= 10;
476 
477     case 1:
478         vector[vectorSize] = number % 10 + '0';
479     }
480 }
481 
isAllSpecialCharacters(const UChar * characters,size_t length)482 template<bool isSpecialCharacter(UChar)> inline bool isAllSpecialCharacters(const UChar* characters, size_t length)
483 {
484     for (size_t i = 0; i < length; ++i) {
485         if (!isSpecialCharacter(characters[i]))
486             return false;
487     }
488     return true;
489 }
490 
isAllSpecialCharacters()491 template<bool isSpecialCharacter(UChar)> inline bool String::isAllSpecialCharacters() const
492 {
493     return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters(), length());
494 }
495 
496 // StringHash is the default hash for String
497 template<typename T> struct DefaultHash;
498 template<> struct DefaultHash<String> {
499     typedef StringHash Hash;
500 };
501 
502 template <> struct VectorTraits<String> : SimpleClassVectorTraits { };
503 
504 }
505 
506 using WTF::CString;
507 using WTF::String;
508 using WTF::append;
509 using WTF::appendNumber;
510 using WTF::charactersAreAllASCII;
511 using WTF::charactersAreAllLatin1;
512 using WTF::charactersToIntStrict;
513 using WTF::charactersToUIntStrict;
514 using WTF::charactersToInt64Strict;
515 using WTF::charactersToUInt64Strict;
516 using WTF::charactersToIntPtrStrict;
517 using WTF::charactersToInt;
518 using WTF::charactersToUInt;
519 using WTF::charactersToInt64;
520 using WTF::charactersToUInt64;
521 using WTF::charactersToIntPtr;
522 using WTF::charactersToDouble;
523 using WTF::charactersToFloat;
524 using WTF::equal;
525 using WTF::equalIgnoringCase;
526 using WTF::find;
527 using WTF::isAllSpecialCharacters;
528 using WTF::isSpaceOrNewline;
529 using WTF::reverseFind;
530 
531 #endif
532