1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef UString_h 24 #define UString_h 25 26 #include "Collector.h" 27 #include <stdint.h> 28 #include <string.h> 29 #include <wtf/Assertions.h> 30 #include <wtf/CrossThreadRefCounted.h> 31 #include <wtf/OwnFastMallocPtr.h> 32 #include <wtf/PassRefPtr.h> 33 #include <wtf/PtrAndFlags.h> 34 #include <wtf/RefPtr.h> 35 #include <wtf/Vector.h> 36 #include <wtf/unicode/Unicode.h> 37 38 namespace JSC { 39 40 using WTF::PlacementNewAdoptType; 41 using WTF::PlacementNewAdopt; 42 43 class IdentifierTable; 44 45 class CString { 46 public: CString()47 CString() 48 : m_length(0) 49 , m_data(0) 50 { 51 } 52 53 CString(const char*); 54 CString(const char*, size_t); 55 CString(const CString&); 56 57 ~CString(); 58 59 static CString adopt(char*, size_t); // buffer should be allocated with new[]. 60 61 CString& append(const CString&); 62 CString& operator=(const char* c); 63 CString& operator=(const CString&); 64 CString& operator+=(const CString& c) { return append(c); } 65 size()66 size_t size() const { return m_length; } c_str()67 const char* c_str() const { return m_data; } 68 69 private: 70 size_t m_length; 71 char* m_data; 72 }; 73 74 typedef Vector<char, 32> CStringBuffer; 75 76 class UString { 77 friend class JIT; 78 79 public: 80 typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar; 81 struct BaseString; 82 struct Rep : Noncopyable { 83 friend class JIT; 84 createRep85 static PassRefPtr<Rep> create(UChar* buffer, int length) 86 { 87 return adoptRef(new BaseString(buffer, length)); 88 } 89 createEmptyBufferRep90 static PassRefPtr<Rep> createEmptyBuffer(size_t size) 91 { 92 // Guard against integer overflow 93 if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) { 94 if (void * buf = tryFastMalloc(size * sizeof(UChar))) 95 return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size)); 96 } 97 return adoptRef(new BaseString(0, 0, 0)); 98 } 99 100 static PassRefPtr<Rep> createCopying(const UChar*, int); 101 static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length); 102 103 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h). 104 // Returns UString::Rep::null for null input or conversion failure. 105 static PassRefPtr<Rep> createFromUTF8(const char*); 106 107 // Uses SharedUChar to have joint ownership over the UChar*. 108 static PassRefPtr<Rep> create(UChar*, int, PassRefPtr<SharedUChar>); 109 110 SharedUChar* sharedBuffer(); 111 void destroy(); 112 baseIsSelfRep113 bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); } 114 UChar* data() const; sizeRep115 int size() const { return len; } 116 hashRep117 unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; } computedHashRep118 unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers 119 120 static unsigned computeHash(const UChar*, int length); 121 static unsigned computeHash(const char*, int length); computeHashRep122 static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); } 123 identifierTableRep124 IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); } setIdentifierTableRep125 void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); } 126 isStaticRep127 bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); } 128 void setStatic(bool); 129 void setBaseString(PassRefPtr<BaseString>); 130 BaseString* baseString(); 131 const BaseString* baseString() const; 132 refRep133 Rep* ref() { ++rc; return this; } derefRep134 ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); } 135 136 void checkConsistency() const; 137 enum UStringFlags { 138 StaticFlag, 139 BaseStringFlag 140 }; 141 142 // unshared data 143 int offset; 144 int len; 145 int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted. 146 mutable unsigned _hash; 147 PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags; 148 nullRep149 static BaseString& null() { return *nullBaseString; } emptyRep150 static BaseString& empty() { return *emptyBaseString; } 151 152 bool reserveCapacity(int capacity); 153 154 protected: 155 // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose. RepRep156 Rep(int length) 157 : offset(0) 158 , len(length) 159 , rc(1) 160 , _hash(0) 161 , m_baseString(0) 162 { 163 } 164 RepRep165 Rep(PassRefPtr<BaseString> base, int offsetInBase, int length) 166 : offset(offsetInBase) 167 , len(length) 168 , rc(1) 169 , _hash(0) 170 , m_baseString(base.releaseRef()) 171 { 172 checkConsistency(); 173 } 174 175 union { 176 // If !baseIsSelf() 177 BaseString* m_baseString; 178 // If baseIsSelf() 179 SharedUChar* m_sharedBuffer; 180 }; 181 182 private: 183 // For SmallStringStorage which allocates an array and does initialization manually. RepRep184 Rep() { } 185 186 friend class SmallStringsStorage; 187 friend void initializeUString(); 188 JS_EXPORTDATA static BaseString* nullBaseString; 189 JS_EXPORTDATA static BaseString* emptyBaseString; 190 }; 191 192 193 struct BaseString : public Rep { isSharedBaseString194 bool isShared() { return rc != 1 || isBufferReadOnly(); } 195 void setSharedBuffer(PassRefPtr<SharedUChar>); 196 isBufferReadOnlyBaseString197 bool isBufferReadOnly() 198 { 199 if (!m_sharedBuffer) 200 return false; 201 return slowIsBufferReadOnly(); 202 } 203 204 // potentially shared data. 205 UChar* buf; 206 int preCapacity; 207 int usedPreCapacity; 208 int capacity; 209 int usedCapacity; 210 211 size_t reportedCost; 212 213 private: 214 BaseString(UChar* buffer, int length, int additionalCapacity = 0) RepBaseString215 : Rep(length) 216 , buf(buffer) 217 , preCapacity(0) 218 , usedPreCapacity(0) 219 , capacity(length + additionalCapacity) 220 , usedCapacity(length) 221 , reportedCost(0) 222 { 223 m_identifierTableAndFlags.setFlag(BaseStringFlag); 224 checkConsistency(); 225 } 226 227 SharedUChar* sharedBuffer(); 228 bool slowIsBufferReadOnly(); 229 230 friend struct Rep; 231 friend class SmallStringsStorage; 232 friend void initializeUString(); 233 }; 234 235 public: 236 UString(); 237 UString(const char*); 238 UString(const UChar*, int length); 239 UString(UChar*, int length, bool copy); 240 UString(const UString & s)241 UString(const UString& s) 242 : m_rep(s.m_rep) 243 { 244 } 245 246 UString(const Vector<UChar>& buffer); 247 ~UString()248 ~UString() 249 { 250 } 251 252 // Special constructor for cases where we overwrite an object in place. UString(PlacementNewAdoptType)253 UString(PlacementNewAdoptType) 254 : m_rep(PlacementNewAdopt) 255 { 256 } 257 258 static UString from(int); 259 static UString from(unsigned int); 260 static UString from(long); 261 static UString from(double); 262 263 struct Range { 264 public: RangeRange265 Range(int pos, int len) 266 : position(pos) 267 , length(len) 268 { 269 } 270 RangeRange271 Range() 272 { 273 } 274 275 int position; 276 int length; 277 }; 278 279 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const; 280 281 UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const; 282 283 UString& append(const UString&); 284 UString& append(const char*); 285 UString& append(UChar); append(char c)286 UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); } 287 UString& append(const UChar*, int size); 288 UString& appendNumeric(int); 289 UString& appendNumeric(double); 290 291 bool getCString(CStringBuffer&) const; 292 293 // NOTE: This method should only be used for *debugging* purposes as it 294 // is neither Unicode safe nor free from side effects nor thread-safe. 295 char* ascii() const; 296 297 /** 298 * Convert the string to UTF-8, assuming it is UTF-16 encoded. 299 * In non-strict mode, this function is tolerant of badly formed UTF-16, it 300 * can create UTF-8 strings that are invalid because they have characters in 301 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is 302 * guaranteed to be otherwise valid. 303 * In strict mode, error is returned as null CString. 304 */ 305 CString UTF8String(bool strict = false) const; 306 307 UString& operator=(const char*c); 308 309 UString& operator+=(const UString& s) { return append(s); } 310 UString& operator+=(const char* s) { return append(s); } 311 data()312 const UChar* data() const { return m_rep->data(); } 313 isNull()314 bool isNull() const { return (m_rep == &Rep::null()); } isEmpty()315 bool isEmpty() const { return (!m_rep->len); } 316 317 bool is8Bit() const; 318 size()319 int size() const { return m_rep->size(); } 320 321 UChar operator[](int pos) const; 322 323 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; 324 double toDouble(bool tolerateTrailingJunk) const; 325 double toDouble() const; 326 327 uint32_t toUInt32(bool* ok = 0) const; 328 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const; 329 uint32_t toStrictUInt32(bool* ok = 0) const; 330 331 unsigned toArrayIndex(bool* ok = 0) const; 332 333 int find(const UString& f, int pos = 0) const; 334 int find(UChar, int pos = 0) const; 335 int rfind(const UString& f, int pos) const; 336 int rfind(UChar, int pos) const; 337 338 UString substr(int pos = 0, int len = -1) const; 339 null()340 static const UString& null() { return *nullUString; } 341 rep()342 Rep* rep() const { return m_rep.get(); } 343 static Rep* nullRep(); 344 UString(PassRefPtr<Rep> r)345 UString(PassRefPtr<Rep> r) 346 : m_rep(r) 347 { 348 ASSERT(m_rep); 349 } 350 351 size_t cost() const; 352 353 // Attempt to grow this string such that it can grow to a total length of 'capacity' 354 // without reallocation. This may fail a number of reasons - if the BasicString is 355 // shared and another string is using part of the capacity beyond our end point, if 356 // the realloc fails, or if this string is empty and has no storage. 357 // 358 // This method returns a boolean indicating success. reserveCapacity(int capacity)359 bool reserveCapacity(int capacity) 360 { 361 return m_rep->reserveCapacity(capacity); 362 } 363 364 private: 365 void expandCapacity(int requiredLength); 366 void expandPreCapacity(int requiredPreCap); 367 void makeNull(); 368 369 RefPtr<Rep> m_rep; 370 static UString* nullUString; 371 372 friend void initializeUString(); 373 friend bool operator==(const UString&, const UString&); 374 friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory 375 }; 376 PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*); 377 PassRefPtr<UString::Rep> concatenate(UString::Rep*, int); 378 PassRefPtr<UString::Rep> concatenate(UString::Rep*, double); 379 380 inline bool operator==(const UString& s1, const UString& s2) 381 { 382 int size = s1.size(); 383 switch (size) { 384 case 0: 385 return !s2.size(); 386 case 1: 387 return s2.size() == 1 && s1.data()[0] == s2.data()[0]; 388 case 2: { 389 if (s2.size() != 2) 390 return false; 391 const UChar* d1 = s1.data(); 392 const UChar* d2 = s2.data(); 393 return (d1[0] == d2[0]) & (d1[1] == d2[1]); 394 } 395 default: 396 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0; 397 } 398 } 399 400 401 inline bool operator!=(const UString& s1, const UString& s2) 402 { 403 return !JSC::operator==(s1, s2); 404 } 405 406 bool operator<(const UString& s1, const UString& s2); 407 bool operator>(const UString& s1, const UString& s2); 408 409 bool operator==(const UString& s1, const char* s2); 410 411 inline bool operator!=(const UString& s1, const char* s2) 412 { 413 return !JSC::operator==(s1, s2); 414 } 415 416 inline bool operator==(const char *s1, const UString& s2) 417 { 418 return operator==(s2, s1); 419 } 420 421 inline bool operator!=(const char *s1, const UString& s2) 422 { 423 return !JSC::operator==(s1, s2); 424 } 425 426 bool operator==(const CString&, const CString&); 427 428 inline UString operator+(const UString& s1, const UString& s2) 429 { 430 RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep()); 431 return UString(result ? result.release() : UString::nullRep()); 432 } 433 434 int compare(const UString&, const UString&); 435 436 bool equal(const UString::Rep*, const UString::Rep*); 437 create(PassRefPtr<UString::Rep> rep,int offset,int length)438 inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length) 439 { 440 ASSERT(rep); 441 rep->checkConsistency(); 442 443 int repOffset = rep->offset; 444 445 PassRefPtr<BaseString> base = rep->baseString(); 446 447 ASSERT(-(offset + repOffset) <= base->usedPreCapacity); 448 ASSERT(offset + repOffset + length <= base->usedCapacity); 449 450 // Steal the single reference this Rep was created with. 451 return adoptRef(new Rep(base, repOffset + offset, length)); 452 } 453 data()454 inline UChar* UString::Rep::data() const 455 { 456 const BaseString* base = baseString(); 457 return base->buf + base->preCapacity + offset; 458 } 459 setStatic(bool v)460 inline void UString::Rep::setStatic(bool v) 461 { 462 ASSERT(!identifierTable()); 463 if (v) 464 m_identifierTableAndFlags.setFlag(StaticFlag); 465 else 466 m_identifierTableAndFlags.clearFlag(StaticFlag); 467 } 468 setBaseString(PassRefPtr<BaseString> base)469 inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base) 470 { 471 ASSERT(base != this); 472 ASSERT(!baseIsSelf()); 473 m_baseString = base.releaseRef(); 474 } 475 baseString()476 inline UString::BaseString* UString::Rep::baseString() 477 { 478 return !baseIsSelf() ? m_baseString : reinterpret_cast<BaseString*>(this) ; 479 } 480 baseString()481 inline const UString::BaseString* UString::Rep::baseString() const 482 { 483 return const_cast<Rep*>(this)->baseString(); 484 } 485 486 #ifdef NDEBUG checkConsistency()487 inline void UString::Rep::checkConsistency() const 488 { 489 } 490 #endif 491 UString()492 inline UString::UString() 493 : m_rep(&Rep::null()) 494 { 495 } 496 497 // Rule from ECMA 15.2 about what an array index is. 498 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1. toArrayIndex(bool * ok)499 inline unsigned UString::toArrayIndex(bool* ok) const 500 { 501 unsigned i = toStrictUInt32(ok); 502 if (ok && i >= 0xFFFFFFFFU) 503 *ok = false; 504 return i; 505 } 506 507 // We'd rather not do shared substring append for small strings, since 508 // this runs too much risk of a tiny initial string holding down a 509 // huge buffer. 510 // FIXME: this should be size_t but that would cause warnings until we 511 // fix UString sizes to be size_t instead of int 512 static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar); 513 cost()514 inline size_t UString::cost() const 515 { 516 BaseString* base = m_rep->baseString(); 517 size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar); 518 size_t reportedCost = base->reportedCost; 519 ASSERT(capacity >= reportedCost); 520 521 size_t capacityDelta = capacity - reportedCost; 522 523 if (capacityDelta < static_cast<size_t>(minShareSize)) 524 return 0; 525 526 base->reportedCost = capacity; 527 528 return capacityDelta; 529 } 530 531 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > { hashIdentifierRepHash532 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); } hashIdentifierRepHash533 static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); } 534 }; 535 536 void initializeUString(); 537 } // namespace JSC 538 539 namespace WTF { 540 541 template<typename T> struct DefaultHash; 542 template<typename T> struct StrHash; 543 544 template<> struct StrHash<JSC::UString::Rep*> { 545 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); } 546 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); } 547 static const bool safeToCompareToEmptyOrDeleted = false; 548 }; 549 550 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> { 551 using StrHash<JSC::UString::Rep*>::hash; 552 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); } 553 using StrHash<JSC::UString::Rep*>::equal; 554 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); } 555 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); } 556 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); } 557 558 static const bool safeToCompareToEmptyOrDeleted = false; 559 }; 560 561 template<> struct DefaultHash<JSC::UString::Rep*> { 562 typedef StrHash<JSC::UString::Rep*> Hash; 563 }; 564 565 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > { 566 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash; 567 568 }; 569 570 } // namespace WTF 571 572 #endif 573