• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  *  Copyright (C) 2009 Google Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef UString_h
24 #define UString_h
25 
26 #include "Collector.h"
27 #include <stdint.h>
28 #include <string.h>
29 #include <wtf/Assertions.h>
30 #include <wtf/CrossThreadRefCounted.h>
31 #include <wtf/OwnFastMallocPtr.h>
32 #include <wtf/PassRefPtr.h>
33 #include <wtf/PtrAndFlags.h>
34 #include <wtf/RefPtr.h>
35 #include <wtf/Vector.h>
36 #include <wtf/unicode/Unicode.h>
37 
38 namespace JSC {
39 
40     using WTF::PlacementNewAdoptType;
41     using WTF::PlacementNewAdopt;
42 
43     class IdentifierTable;
44 
45     class CString {
46     public:
CString()47         CString()
48             : m_length(0)
49             , m_data(0)
50         {
51         }
52 
53         CString(const char*);
54         CString(const char*, size_t);
55         CString(const CString&);
56 
57         ~CString();
58 
59         static CString adopt(char*, size_t); // buffer should be allocated with new[].
60 
61         CString& append(const CString&);
62         CString& operator=(const char* c);
63         CString& operator=(const CString&);
64         CString& operator+=(const CString& c) { return append(c); }
65 
size()66         size_t size() const { return m_length; }
c_str()67         const char* c_str() const { return m_data; }
68 
69     private:
70         size_t m_length;
71         char* m_data;
72     };
73 
74     typedef Vector<char, 32> CStringBuffer;
75 
76     class UString {
77         friend class JIT;
78 
79     public:
80         typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
81         struct BaseString;
82         struct Rep : Noncopyable {
83             friend class JIT;
84 
createRep85             static PassRefPtr<Rep> create(UChar* buffer, int length)
86             {
87                 return adoptRef(new BaseString(buffer, length));
88             }
89 
createEmptyBufferRep90             static PassRefPtr<Rep> createEmptyBuffer(size_t size)
91             {
92                 // Guard against integer overflow
93                 if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) {
94                     if (void * buf = tryFastMalloc(size * sizeof(UChar)))
95                         return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size));
96                 }
97                 return adoptRef(new BaseString(0, 0, 0));
98             }
99 
100             static PassRefPtr<Rep> createCopying(const UChar*, int);
101             static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
102 
103             // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
104             // Returns UString::Rep::null for null input or conversion failure.
105             static PassRefPtr<Rep> createFromUTF8(const char*);
106 
107             // Uses SharedUChar to have joint ownership over the UChar*.
108             static PassRefPtr<Rep> create(UChar*, int, PassRefPtr<SharedUChar>);
109 
110             SharedUChar* sharedBuffer();
111             void destroy();
112 
baseIsSelfRep113             bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
114             UChar* data() const;
sizeRep115             int size() const { return len; }
116 
hashRep117             unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
computedHashRep118             unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers
119 
120             static unsigned computeHash(const UChar*, int length);
121             static unsigned computeHash(const char*, int length);
computeHashRep122             static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }
123 
identifierTableRep124             IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
setIdentifierTableRep125             void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }
126 
isStaticRep127             bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
128             void setStatic(bool);
129             void setBaseString(PassRefPtr<BaseString>);
130             BaseString* baseString();
131             const BaseString* baseString() const;
132 
refRep133             Rep* ref() { ++rc; return this; }
derefRep134             ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }
135 
136             void checkConsistency() const;
137             enum UStringFlags {
138                 StaticFlag,
139                 BaseStringFlag
140             };
141 
142             // unshared data
143             int offset;
144             int len;
145             int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
146             mutable unsigned _hash;
147             PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;
148 
nullRep149             static BaseString& null() { return *nullBaseString; }
emptyRep150             static BaseString& empty() { return *emptyBaseString; }
151 
152             bool reserveCapacity(int capacity);
153 
154         protected:
155             // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose.
RepRep156             Rep(int length)
157                 : offset(0)
158                 , len(length)
159                 , rc(1)
160                 , _hash(0)
161                 , m_baseString(0)
162             {
163             }
164 
RepRep165             Rep(PassRefPtr<BaseString> base, int offsetInBase, int length)
166                 : offset(offsetInBase)
167                 , len(length)
168                 , rc(1)
169                 , _hash(0)
170                 , m_baseString(base.releaseRef())
171             {
172                 checkConsistency();
173             }
174 
175             union {
176                 // If !baseIsSelf()
177                 BaseString* m_baseString;
178                 // If baseIsSelf()
179                 SharedUChar* m_sharedBuffer;
180             };
181 
182         private:
183             // For SmallStringStorage which allocates an array and does initialization manually.
RepRep184             Rep() { }
185 
186             friend class SmallStringsStorage;
187             friend void initializeUString();
188             JS_EXPORTDATA static BaseString* nullBaseString;
189             JS_EXPORTDATA static BaseString* emptyBaseString;
190         };
191 
192 
193         struct BaseString : public Rep {
isSharedBaseString194             bool isShared() { return rc != 1 || isBufferReadOnly(); }
195             void setSharedBuffer(PassRefPtr<SharedUChar>);
196 
isBufferReadOnlyBaseString197             bool isBufferReadOnly()
198             {
199                 if (!m_sharedBuffer)
200                     return false;
201                 return slowIsBufferReadOnly();
202             }
203 
204             // potentially shared data.
205             UChar* buf;
206             int preCapacity;
207             int usedPreCapacity;
208             int capacity;
209             int usedCapacity;
210 
211             size_t reportedCost;
212 
213         private:
214             BaseString(UChar* buffer, int length, int additionalCapacity = 0)
RepBaseString215                 : Rep(length)
216                 , buf(buffer)
217                 , preCapacity(0)
218                 , usedPreCapacity(0)
219                 , capacity(length + additionalCapacity)
220                 , usedCapacity(length)
221                 , reportedCost(0)
222             {
223                 m_identifierTableAndFlags.setFlag(BaseStringFlag);
224                 checkConsistency();
225             }
226 
227             SharedUChar* sharedBuffer();
228             bool slowIsBufferReadOnly();
229 
230             friend struct Rep;
231             friend class SmallStringsStorage;
232             friend void initializeUString();
233         };
234 
235     public:
236         UString();
237         UString(const char*);
238         UString(const UChar*, int length);
239         UString(UChar*, int length, bool copy);
240 
UString(const UString & s)241         UString(const UString& s)
242             : m_rep(s.m_rep)
243         {
244         }
245 
246         UString(const Vector<UChar>& buffer);
247 
~UString()248         ~UString()
249         {
250         }
251 
252         // Special constructor for cases where we overwrite an object in place.
UString(PlacementNewAdoptType)253         UString(PlacementNewAdoptType)
254             : m_rep(PlacementNewAdopt)
255         {
256         }
257 
258         static UString from(int);
259         static UString from(unsigned int);
260         static UString from(long);
261         static UString from(double);
262 
263         struct Range {
264         public:
RangeRange265             Range(int pos, int len)
266                 : position(pos)
267                 , length(len)
268             {
269             }
270 
RangeRange271             Range()
272             {
273             }
274 
275             int position;
276             int length;
277         };
278 
279         UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
280 
281         UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;
282 
283         UString& append(const UString&);
284         UString& append(const char*);
285         UString& append(UChar);
append(char c)286         UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
287         UString& append(const UChar*, int size);
288         UString& appendNumeric(int);
289         UString& appendNumeric(double);
290 
291         bool getCString(CStringBuffer&) const;
292 
293         // NOTE: This method should only be used for *debugging* purposes as it
294         // is neither Unicode safe nor free from side effects nor thread-safe.
295         char* ascii() const;
296 
297         /**
298          * Convert the string to UTF-8, assuming it is UTF-16 encoded.
299          * In non-strict mode, this function is tolerant of badly formed UTF-16, it
300          * can create UTF-8 strings that are invalid because they have characters in
301          * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
302          * guaranteed to be otherwise valid.
303          * In strict mode, error is returned as null CString.
304          */
305         CString UTF8String(bool strict = false) const;
306 
307         UString& operator=(const char*c);
308 
309         UString& operator+=(const UString& s) { return append(s); }
310         UString& operator+=(const char* s) { return append(s); }
311 
data()312         const UChar* data() const { return m_rep->data(); }
313 
isNull()314         bool isNull() const { return (m_rep == &Rep::null()); }
isEmpty()315         bool isEmpty() const { return (!m_rep->len); }
316 
317         bool is8Bit() const;
318 
size()319         int size() const { return m_rep->size(); }
320 
321         UChar operator[](int pos) const;
322 
323         double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
324         double toDouble(bool tolerateTrailingJunk) const;
325         double toDouble() const;
326 
327         uint32_t toUInt32(bool* ok = 0) const;
328         uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
329         uint32_t toStrictUInt32(bool* ok = 0) const;
330 
331         unsigned toArrayIndex(bool* ok = 0) const;
332 
333         int find(const UString& f, int pos = 0) const;
334         int find(UChar, int pos = 0) const;
335         int rfind(const UString& f, int pos) const;
336         int rfind(UChar, int pos) const;
337 
338         UString substr(int pos = 0, int len = -1) const;
339 
null()340         static const UString& null() { return *nullUString; }
341 
rep()342         Rep* rep() const { return m_rep.get(); }
343         static Rep* nullRep();
344 
UString(PassRefPtr<Rep> r)345         UString(PassRefPtr<Rep> r)
346             : m_rep(r)
347         {
348             ASSERT(m_rep);
349         }
350 
351         size_t cost() const;
352 
353         // Attempt to grow this string such that it can grow to a total length of 'capacity'
354         // without reallocation.  This may fail a number of reasons - if the BasicString is
355         // shared and another string is using part of the capacity beyond our end point, if
356         // the realloc fails, or if this string is empty and has no storage.
357         //
358         // This method returns a boolean indicating success.
reserveCapacity(int capacity)359         bool reserveCapacity(int capacity)
360         {
361             return m_rep->reserveCapacity(capacity);
362         }
363 
364     private:
365         void expandCapacity(int requiredLength);
366         void expandPreCapacity(int requiredPreCap);
367         void makeNull();
368 
369         RefPtr<Rep> m_rep;
370         static UString* nullUString;
371 
372         friend void initializeUString();
373         friend bool operator==(const UString&, const UString&);
374         friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
375     };
376     PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
377     PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
378     PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);
379 
380     inline bool operator==(const UString& s1, const UString& s2)
381     {
382         int size = s1.size();
383         switch (size) {
384         case 0:
385             return !s2.size();
386         case 1:
387             return s2.size() == 1 && s1.data()[0] == s2.data()[0];
388         case 2: {
389             if (s2.size() != 2)
390                 return false;
391             const UChar* d1 = s1.data();
392             const UChar* d2 = s2.data();
393             return (d1[0] == d2[0]) & (d1[1] == d2[1]);
394         }
395         default:
396             return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
397         }
398     }
399 
400 
401     inline bool operator!=(const UString& s1, const UString& s2)
402     {
403         return !JSC::operator==(s1, s2);
404     }
405 
406     bool operator<(const UString& s1, const UString& s2);
407     bool operator>(const UString& s1, const UString& s2);
408 
409     bool operator==(const UString& s1, const char* s2);
410 
411     inline bool operator!=(const UString& s1, const char* s2)
412     {
413         return !JSC::operator==(s1, s2);
414     }
415 
416     inline bool operator==(const char *s1, const UString& s2)
417     {
418         return operator==(s2, s1);
419     }
420 
421     inline bool operator!=(const char *s1, const UString& s2)
422     {
423         return !JSC::operator==(s1, s2);
424     }
425 
426     bool operator==(const CString&, const CString&);
427 
428     inline UString operator+(const UString& s1, const UString& s2)
429     {
430         RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
431         return UString(result ? result.release() : UString::nullRep());
432     }
433 
434     int compare(const UString&, const UString&);
435 
436     bool equal(const UString::Rep*, const UString::Rep*);
437 
create(PassRefPtr<UString::Rep> rep,int offset,int length)438     inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length)
439     {
440         ASSERT(rep);
441         rep->checkConsistency();
442 
443         int repOffset = rep->offset;
444 
445         PassRefPtr<BaseString> base = rep->baseString();
446 
447         ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
448         ASSERT(offset + repOffset + length <= base->usedCapacity);
449 
450         // Steal the single reference this Rep was created with.
451         return adoptRef(new Rep(base, repOffset + offset, length));
452     }
453 
data()454     inline UChar* UString::Rep::data() const
455     {
456         const BaseString* base = baseString();
457         return base->buf + base->preCapacity + offset;
458     }
459 
setStatic(bool v)460     inline void UString::Rep::setStatic(bool v)
461     {
462         ASSERT(!identifierTable());
463         if (v)
464             m_identifierTableAndFlags.setFlag(StaticFlag);
465         else
466             m_identifierTableAndFlags.clearFlag(StaticFlag);
467     }
468 
setBaseString(PassRefPtr<BaseString> base)469     inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
470     {
471         ASSERT(base != this);
472         ASSERT(!baseIsSelf());
473         m_baseString = base.releaseRef();
474     }
475 
baseString()476     inline UString::BaseString* UString::Rep::baseString()
477     {
478         return !baseIsSelf() ? m_baseString : reinterpret_cast<BaseString*>(this) ;
479     }
480 
baseString()481     inline const UString::BaseString* UString::Rep::baseString() const
482     {
483         return const_cast<Rep*>(this)->baseString();
484     }
485 
486 #ifdef NDEBUG
checkConsistency()487     inline void UString::Rep::checkConsistency() const
488     {
489     }
490 #endif
491 
UString()492     inline UString::UString()
493         : m_rep(&Rep::null())
494     {
495     }
496 
497     // Rule from ECMA 15.2 about what an array index is.
498     // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
toArrayIndex(bool * ok)499     inline unsigned UString::toArrayIndex(bool* ok) const
500     {
501         unsigned i = toStrictUInt32(ok);
502         if (ok && i >= 0xFFFFFFFFU)
503             *ok = false;
504         return i;
505     }
506 
507     // We'd rather not do shared substring append for small strings, since
508     // this runs too much risk of a tiny initial string holding down a
509     // huge buffer.
510     // FIXME: this should be size_t but that would cause warnings until we
511     // fix UString sizes to be size_t instead of int
512     static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);
513 
cost()514     inline size_t UString::cost() const
515     {
516         BaseString* base = m_rep->baseString();
517         size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
518         size_t reportedCost = base->reportedCost;
519         ASSERT(capacity >= reportedCost);
520 
521         size_t capacityDelta = capacity - reportedCost;
522 
523         if (capacityDelta < static_cast<size_t>(minShareSize))
524             return 0;
525 
526         base->reportedCost = capacity;
527 
528         return capacityDelta;
529     }
530 
531     struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
hashIdentifierRepHash532         static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
hashIdentifierRepHash533         static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
534     };
535 
536     void initializeUString();
537 } // namespace JSC
538 
539 namespace WTF {
540 
541     template<typename T> struct DefaultHash;
542     template<typename T> struct StrHash;
543 
544     template<> struct StrHash<JSC::UString::Rep*> {
545         static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
546         static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
547         static const bool safeToCompareToEmptyOrDeleted = false;
548     };
549 
550     template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
551         using StrHash<JSC::UString::Rep*>::hash;
552         static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
553         using StrHash<JSC::UString::Rep*>::equal;
554         static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
555         static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
556         static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
557 
558         static const bool safeToCompareToEmptyOrDeleted = false;
559     };
560 
561     template<> struct DefaultHash<JSC::UString::Rep*> {
562         typedef StrHash<JSC::UString::Rep*> Hash;
563     };
564 
565     template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
566         typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
567 
568     };
569 
570 } // namespace WTF
571 
572 #endif
573