1 /*
2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 * Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Library General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Library General Public License for more details.
16 *
17 * You should have received a copy of the GNU Library General Public License
18 * along with this library; see the file COPYING.LIB. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
21 *
22 */
23
24 #include "config.h"
25 #include "UString.h"
26
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
29 #include "dtoa.h"
30 #include "Identifier.h"
31 #include "Operations.h"
32 #include <ctype.h>
33 #include <float.h>
34 #include <limits.h>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <wtf/ASCIICType.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/MathExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
43
44 #if HAVE(STRING_H)
45 #include <string.h>
46 #endif
47 #if HAVE(STRINGS_H)
48 #include <strings.h>
49 #endif
50
51 using namespace WTF;
52 using namespace WTF::Unicode;
53 using namespace std;
54
55 // This can be tuned differently per platform by putting platform #ifs right here.
56 // If you don't define this macro at all, then copyChars will just call directly
57 // to memcpy.
58 #define USTRING_COPY_CHARS_INLINE_CUTOFF 20
59
60 namespace JSC {
61
62 extern const double NaN;
63 extern const double Inf;
64
65 // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
66 static const int minLengthToShare = 10;
67
overflowIndicator()68 static inline size_t overflowIndicator() { return std::numeric_limits<size_t>::max(); }
maxUChars()69 static inline size_t maxUChars() { return std::numeric_limits<size_t>::max() / sizeof(UChar); }
70
allocChars(size_t length)71 static inline UChar* allocChars(size_t length)
72 {
73 ASSERT(length);
74 if (length > maxUChars())
75 return 0;
76 return static_cast<UChar*>(tryFastMalloc(sizeof(UChar) * length));
77 }
78
reallocChars(UChar * buffer,size_t length)79 static inline UChar* reallocChars(UChar* buffer, size_t length)
80 {
81 ASSERT(length);
82 if (length > maxUChars())
83 return 0;
84 return static_cast<UChar*>(tryFastRealloc(buffer, sizeof(UChar) * length));
85 }
86
copyChars(UChar * destination,const UChar * source,unsigned numCharacters)87 static inline void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
88 {
89 #ifdef USTRING_COPY_CHARS_INLINE_CUTOFF
90 if (numCharacters <= USTRING_COPY_CHARS_INLINE_CUTOFF) {
91 for (unsigned i = 0; i < numCharacters; ++i)
92 destination[i] = source[i];
93 return;
94 }
95 #endif
96 memcpy(destination, source, numCharacters * sizeof(UChar));
97 }
98
99 COMPILE_ASSERT(sizeof(UChar) == 2, uchar_is_2_bytes);
100
CString(const char * c)101 CString::CString(const char* c)
102 : m_length(strlen(c))
103 , m_data(new char[m_length + 1])
104 {
105 memcpy(m_data, c, m_length + 1);
106 }
107
CString(const char * c,size_t length)108 CString::CString(const char* c, size_t length)
109 : m_length(length)
110 , m_data(new char[length + 1])
111 {
112 memcpy(m_data, c, m_length);
113 m_data[m_length] = 0;
114 }
115
CString(const CString & b)116 CString::CString(const CString& b)
117 {
118 m_length = b.m_length;
119 if (b.m_data) {
120 m_data = new char[m_length + 1];
121 memcpy(m_data, b.m_data, m_length + 1);
122 } else
123 m_data = 0;
124 }
125
~CString()126 CString::~CString()
127 {
128 delete [] m_data;
129 }
130
adopt(char * c,size_t length)131 CString CString::adopt(char* c, size_t length)
132 {
133 CString s;
134 s.m_data = c;
135 s.m_length = length;
136 return s;
137 }
138
append(const CString & t)139 CString& CString::append(const CString& t)
140 {
141 char* n;
142 n = new char[m_length + t.m_length + 1];
143 if (m_length)
144 memcpy(n, m_data, m_length);
145 if (t.m_length)
146 memcpy(n + m_length, t.m_data, t.m_length);
147 m_length += t.m_length;
148 n[m_length] = 0;
149
150 delete [] m_data;
151 m_data = n;
152
153 return *this;
154 }
155
operator =(const char * c)156 CString& CString::operator=(const char* c)
157 {
158 if (m_data)
159 delete [] m_data;
160 m_length = strlen(c);
161 m_data = new char[m_length + 1];
162 memcpy(m_data, c, m_length + 1);
163
164 return *this;
165 }
166
operator =(const CString & str)167 CString& CString::operator=(const CString& str)
168 {
169 if (this == &str)
170 return *this;
171
172 if (m_data)
173 delete [] m_data;
174 m_length = str.m_length;
175 if (str.m_data) {
176 m_data = new char[m_length + 1];
177 memcpy(m_data, str.m_data, m_length + 1);
178 } else
179 m_data = 0;
180
181 return *this;
182 }
183
operator ==(const CString & c1,const CString & c2)184 bool operator==(const CString& c1, const CString& c2)
185 {
186 size_t len = c1.size();
187 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
188 }
189
190 // These static strings are immutable, except for rc, whose initial value is chosen to
191 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
192 static UChar sharedEmptyChar;
193 UString::BaseString* UString::Rep::nullBaseString;
194 UString::BaseString* UString::Rep::emptyBaseString;
195 UString* UString::nullUString;
196
initializeStaticBaseString(UString::BaseString & base)197 static void initializeStaticBaseString(UString::BaseString& base)
198 {
199 base.rc = INT_MAX / 2;
200 base.m_identifierTableAndFlags.setFlag(UString::Rep::StaticFlag);
201 base.checkConsistency();
202 }
203
initializeUString()204 void initializeUString()
205 {
206 UString::Rep::nullBaseString = new UString::BaseString(0, 0);
207 initializeStaticBaseString(*UString::Rep::nullBaseString);
208
209 UString::Rep::emptyBaseString = new UString::BaseString(&sharedEmptyChar, 0);
210 initializeStaticBaseString(*UString::Rep::emptyBaseString);
211
212 UString::nullUString = new UString;
213 }
214
215 static char* statBuffer = 0; // Only used for debugging via UString::ascii().
216
createCopying(const UChar * d,int l)217 PassRefPtr<UString::Rep> UString::Rep::createCopying(const UChar* d, int l)
218 {
219 UChar* copyD = static_cast<UChar*>(fastMalloc(l * sizeof(UChar)));
220 copyChars(copyD, d, l);
221 return create(copyD, l);
222 }
223
createFromUTF8(const char * string)224 PassRefPtr<UString::Rep> UString::Rep::createFromUTF8(const char* string)
225 {
226 if (!string)
227 return &UString::Rep::null();
228
229 size_t length = strlen(string);
230 Vector<UChar, 1024> buffer(length);
231 UChar* p = buffer.data();
232 if (conversionOK != convertUTF8ToUTF16(&string, string + length, &p, p + length))
233 return &UString::Rep::null();
234
235 return UString::Rep::createCopying(buffer.data(), p - buffer.data());
236 }
237
create(UChar * string,int length,PassRefPtr<UString::SharedUChar> sharedBuffer)238 PassRefPtr<UString::Rep> UString::Rep::create(UChar* string, int length, PassRefPtr<UString::SharedUChar> sharedBuffer)
239 {
240 PassRefPtr<UString::Rep> rep = create(string, length);
241 rep->baseString()->setSharedBuffer(sharedBuffer);
242 rep->checkConsistency();
243 return rep;
244 }
245
sharedBuffer()246 UString::SharedUChar* UString::Rep::sharedBuffer()
247 {
248 UString::BaseString* base = baseString();
249 if (len < minLengthToShare)
250 return 0;
251
252 return base->sharedBuffer();
253 }
254
destroy()255 void UString::Rep::destroy()
256 {
257 checkConsistency();
258
259 // Static null and empty strings can never be destroyed, but we cannot rely on
260 // reference counting, because ref/deref are not thread-safe.
261 if (!isStatic()) {
262 if (identifierTable())
263 Identifier::remove(this);
264
265 UString::BaseString* base = baseString();
266 if (base == this) {
267 if (m_sharedBuffer)
268 m_sharedBuffer->deref();
269 else
270 fastFree(base->buf);
271 } else
272 base->deref();
273
274 delete this;
275 }
276 }
277
278 // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's
279 // or anything like that.
280 const unsigned PHI = 0x9e3779b9U;
281
282 // Paul Hsieh's SuperFastHash
283 // http://www.azillionmonkeys.com/qed/hash.html
computeHash(const UChar * s,int len)284 unsigned UString::Rep::computeHash(const UChar* s, int len)
285 {
286 unsigned l = len;
287 uint32_t hash = PHI;
288 uint32_t tmp;
289
290 int rem = l & 1;
291 l >>= 1;
292
293 // Main loop
294 for (; l > 0; l--) {
295 hash += s[0];
296 tmp = (s[1] << 11) ^ hash;
297 hash = (hash << 16) ^ tmp;
298 s += 2;
299 hash += hash >> 11;
300 }
301
302 // Handle end case
303 if (rem) {
304 hash += s[0];
305 hash ^= hash << 11;
306 hash += hash >> 17;
307 }
308
309 // Force "avalanching" of final 127 bits
310 hash ^= hash << 3;
311 hash += hash >> 5;
312 hash ^= hash << 2;
313 hash += hash >> 15;
314 hash ^= hash << 10;
315
316 // this avoids ever returning a hash code of 0, since that is used to
317 // signal "hash not computed yet", using a value that is likely to be
318 // effectively the same as 0 when the low bits are masked
319 if (hash == 0)
320 hash = 0x80000000;
321
322 return hash;
323 }
324
325 // Paul Hsieh's SuperFastHash
326 // http://www.azillionmonkeys.com/qed/hash.html
computeHash(const char * s,int l)327 unsigned UString::Rep::computeHash(const char* s, int l)
328 {
329 // This hash is designed to work on 16-bit chunks at a time. But since the normal case
330 // (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
331 // were 16-bit chunks, which should give matching results
332
333 uint32_t hash = PHI;
334 uint32_t tmp;
335
336 size_t rem = l & 1;
337 l >>= 1;
338
339 // Main loop
340 for (; l > 0; l--) {
341 hash += static_cast<unsigned char>(s[0]);
342 tmp = (static_cast<unsigned char>(s[1]) << 11) ^ hash;
343 hash = (hash << 16) ^ tmp;
344 s += 2;
345 hash += hash >> 11;
346 }
347
348 // Handle end case
349 if (rem) {
350 hash += static_cast<unsigned char>(s[0]);
351 hash ^= hash << 11;
352 hash += hash >> 17;
353 }
354
355 // Force "avalanching" of final 127 bits
356 hash ^= hash << 3;
357 hash += hash >> 5;
358 hash ^= hash << 2;
359 hash += hash >> 15;
360 hash ^= hash << 10;
361
362 // this avoids ever returning a hash code of 0, since that is used to
363 // signal "hash not computed yet", using a value that is likely to be
364 // effectively the same as 0 when the low bits are masked
365 if (hash == 0)
366 hash = 0x80000000;
367
368 return hash;
369 }
370
371 #ifndef NDEBUG
checkConsistency() const372 void UString::Rep::checkConsistency() const
373 {
374 const UString::BaseString* base = baseString();
375
376 // There is no recursion for base strings.
377 ASSERT(base == base->baseString());
378
379 if (isStatic()) {
380 // There are only two static strings: null and empty.
381 ASSERT(!len);
382
383 // Static strings cannot get in identifier tables, because they are globally shared.
384 ASSERT(!identifierTable());
385 }
386
387 // The string fits in buffer.
388 ASSERT(base->usedPreCapacity <= base->preCapacity);
389 ASSERT(base->usedCapacity <= base->capacity);
390 ASSERT(-offset <= base->usedPreCapacity);
391 ASSERT(offset + len <= base->usedCapacity);
392 }
393 #endif
394
sharedBuffer()395 UString::SharedUChar* UString::BaseString::sharedBuffer()
396 {
397 if (!m_sharedBuffer)
398 setSharedBuffer(SharedUChar::create(new OwnFastMallocPtr<UChar>(buf)));
399 return m_sharedBuffer;
400 }
401
setSharedBuffer(PassRefPtr<UString::SharedUChar> sharedBuffer)402 void UString::BaseString::setSharedBuffer(PassRefPtr<UString::SharedUChar> sharedBuffer)
403 {
404 // The manual steps below are because m_sharedBuffer can't be a RefPtr. m_sharedBuffer
405 // is in a union with another variable to avoid making BaseString any larger.
406 if (m_sharedBuffer)
407 m_sharedBuffer->deref();
408 m_sharedBuffer = sharedBuffer.releaseRef();
409 }
410
slowIsBufferReadOnly()411 bool UString::BaseString::slowIsBufferReadOnly()
412 {
413 // The buffer may not be modified as soon as the underlying data has been shared with another class.
414 if (m_sharedBuffer->isShared())
415 return true;
416
417 // At this point, we know it that the underlying buffer isn't shared outside of this base class,
418 // so get rid of m_sharedBuffer.
419 OwnPtr<OwnFastMallocPtr<UChar> > mallocPtr(m_sharedBuffer->release());
420 UChar* unsharedBuf = const_cast<UChar*>(mallocPtr->release());
421 setSharedBuffer(0);
422 preCapacity += (buf - unsharedBuf);
423 buf = unsharedBuf;
424 return false;
425 }
426
427 // Put these early so they can be inlined.
expandedSize(size_t capacitySize,size_t precapacitySize)428 static inline size_t expandedSize(size_t capacitySize, size_t precapacitySize)
429 {
430 // Combine capacitySize & precapacitySize to produce a single size to allocate,
431 // check that doing so does not result in overflow.
432 size_t size = capacitySize + precapacitySize;
433 if (size < capacitySize)
434 return overflowIndicator();
435
436 // Small Strings (up to 4 pages):
437 // Expand the allocation size to 112.5% of the amount requested. This is largely sicking
438 // to our previous policy, however 112.5% is cheaper to calculate.
439 if (size < 0x4000) {
440 size_t expandedSize = ((size + (size >> 3)) | 15) + 1;
441 // Given the limited range within which we calculate the expansion in this
442 // fashion the above calculation should never overflow.
443 ASSERT(expandedSize >= size);
444 ASSERT(expandedSize < maxUChars());
445 return expandedSize;
446 }
447
448 // Medium Strings (up to 128 pages):
449 // For pages covering multiple pages over-allocation is less of a concern - any unused
450 // space will not be paged in if it is not used, so this is purely a VM overhead. For
451 // these strings allocate 2x the requested size.
452 if (size < 0x80000) {
453 size_t expandedSize = ((size + size) | 0xfff) + 1;
454 // Given the limited range within which we calculate the expansion in this
455 // fashion the above calculation should never overflow.
456 ASSERT(expandedSize >= size);
457 ASSERT(expandedSize < maxUChars());
458 return expandedSize;
459 }
460
461 // Large Strings (to infinity and beyond!):
462 // Revert to our 112.5% policy - probably best to limit the amount of unused VM we allow
463 // any individual string be responsible for.
464 size_t expandedSize = ((size + (size >> 3)) | 0xfff) + 1;
465
466 // Check for overflow - any result that is at least as large as requested (but
467 // still below the limit) is okay.
468 if ((expandedSize >= size) && (expandedSize < maxUChars()))
469 return expandedSize;
470 return overflowIndicator();
471 }
472
expandCapacity(UString::Rep * rep,int requiredLength)473 static inline bool expandCapacity(UString::Rep* rep, int requiredLength)
474 {
475 rep->checkConsistency();
476 ASSERT(!rep->baseString()->isBufferReadOnly());
477
478 UString::BaseString* base = rep->baseString();
479
480 if (requiredLength > base->capacity) {
481 size_t newCapacity = expandedSize(requiredLength, base->preCapacity);
482 UChar* oldBuf = base->buf;
483 base->buf = reallocChars(base->buf, newCapacity);
484 if (!base->buf) {
485 base->buf = oldBuf;
486 return false;
487 }
488 base->capacity = newCapacity - base->preCapacity;
489 }
490 if (requiredLength > base->usedCapacity)
491 base->usedCapacity = requiredLength;
492
493 rep->checkConsistency();
494 return true;
495 }
496
reserveCapacity(int capacity)497 bool UString::Rep::reserveCapacity(int capacity)
498 {
499 // If this is an empty string there is no point 'growing' it - just allocate a new one.
500 // If the BaseString is shared with another string that is using more capacity than this
501 // string is, then growing the buffer won't help.
502 // If the BaseString's buffer is readonly, then it isn't allowed to grow.
503 UString::BaseString* base = baseString();
504 if (!base->buf || !base->capacity || (offset + len) != base->usedCapacity || base->isBufferReadOnly())
505 return false;
506
507 // If there is already sufficient capacity, no need to grow!
508 if (capacity <= base->capacity)
509 return true;
510
511 checkConsistency();
512
513 size_t newCapacity = expandedSize(capacity, base->preCapacity);
514 UChar* oldBuf = base->buf;
515 base->buf = reallocChars(base->buf, newCapacity);
516 if (!base->buf) {
517 base->buf = oldBuf;
518 return false;
519 }
520 base->capacity = newCapacity - base->preCapacity;
521
522 checkConsistency();
523 return true;
524 }
525
expandCapacity(int requiredLength)526 void UString::expandCapacity(int requiredLength)
527 {
528 if (!JSC::expandCapacity(m_rep.get(), requiredLength))
529 makeNull();
530 }
531
expandPreCapacity(int requiredPreCap)532 void UString::expandPreCapacity(int requiredPreCap)
533 {
534 m_rep->checkConsistency();
535 ASSERT(!m_rep->baseString()->isBufferReadOnly());
536
537 BaseString* base = m_rep->baseString();
538
539 if (requiredPreCap > base->preCapacity) {
540 size_t newCapacity = expandedSize(requiredPreCap, base->capacity);
541 int delta = newCapacity - base->capacity - base->preCapacity;
542
543 UChar* newBuf = allocChars(newCapacity);
544 if (!newBuf) {
545 makeNull();
546 return;
547 }
548 copyChars(newBuf + delta, base->buf, base->capacity + base->preCapacity);
549 fastFree(base->buf);
550 base->buf = newBuf;
551
552 base->preCapacity = newCapacity - base->capacity;
553 }
554 if (requiredPreCap > base->usedPreCapacity)
555 base->usedPreCapacity = requiredPreCap;
556
557 m_rep->checkConsistency();
558 }
559
createRep(const char * c)560 static PassRefPtr<UString::Rep> createRep(const char* c)
561 {
562 if (!c)
563 return &UString::Rep::null();
564
565 if (!c[0])
566 return &UString::Rep::empty();
567
568 size_t length = strlen(c);
569 UChar* d = allocChars(length);
570 if (!d)
571 return &UString::Rep::null();
572 else {
573 for (size_t i = 0; i < length; i++)
574 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
575 return UString::Rep::create(d, static_cast<int>(length));
576 }
577
578 }
579
UString(const char * c)580 UString::UString(const char* c)
581 : m_rep(createRep(c))
582 {
583 }
584
UString(const UChar * c,int length)585 UString::UString(const UChar* c, int length)
586 {
587 if (length == 0)
588 m_rep = &Rep::empty();
589 else
590 m_rep = Rep::createCopying(c, length);
591 }
592
UString(UChar * c,int length,bool copy)593 UString::UString(UChar* c, int length, bool copy)
594 {
595 if (length == 0)
596 m_rep = &Rep::empty();
597 else if (copy)
598 m_rep = Rep::createCopying(c, length);
599 else
600 m_rep = Rep::create(c, length);
601 }
602
UString(const Vector<UChar> & buffer)603 UString::UString(const Vector<UChar>& buffer)
604 {
605 if (!buffer.size())
606 m_rep = &Rep::empty();
607 else
608 m_rep = Rep::createCopying(buffer.data(), buffer.size());
609 }
610
newCapacityWithOverflowCheck(const int currentCapacity,const int extendLength,const bool plusOne=false)611 static ALWAYS_INLINE int newCapacityWithOverflowCheck(const int currentCapacity, const int extendLength, const bool plusOne = false)
612 {
613 ASSERT_WITH_MESSAGE(extendLength >= 0, "extendedLength = %d", extendLength);
614
615 const int plusLength = plusOne ? 1 : 0;
616 if (currentCapacity > std::numeric_limits<int>::max() - extendLength - plusLength)
617 CRASH();
618
619 return currentCapacity + extendLength + plusLength;
620 }
621
concatenate(PassRefPtr<UString::Rep> r,const UChar * tData,int tSize)622 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const UChar* tData, int tSize)
623 {
624 RefPtr<UString::Rep> rep = r;
625
626 rep->checkConsistency();
627
628 int thisSize = rep->size();
629 int thisOffset = rep->offset;
630 int length = thisSize + tSize;
631 UString::BaseString* base = rep->baseString();
632
633 // possible cases:
634 if (tSize == 0) {
635 // t is empty
636 } else if (thisSize == 0) {
637 // this is empty
638 rep = UString::Rep::createCopying(tData, tSize);
639 } else if (rep == base && !base->isShared()) {
640 // this is direct and has refcount of 1 (so we can just alter it directly)
641 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
642 rep = &UString::Rep::null();
643 if (rep->data()) {
644 copyChars(rep->data() + thisSize, tData, tSize);
645 rep->len = length;
646 rep->_hash = 0;
647 }
648 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
649 // this reaches the end of the buffer - extend it if it's long enough to append to
650 if (!expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length)))
651 rep = &UString::Rep::null();
652 if (rep->data()) {
653 copyChars(rep->data() + thisSize, tData, tSize);
654 rep = UString::Rep::create(rep, 0, length);
655 }
656 } else {
657 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
658 size_t newCapacity = expandedSize(length, 0);
659 UChar* d = allocChars(newCapacity);
660 if (!d)
661 rep = &UString::Rep::null();
662 else {
663 copyChars(d, rep->data(), thisSize);
664 copyChars(d + thisSize, tData, tSize);
665 rep = UString::Rep::create(d, length);
666 rep->baseString()->capacity = newCapacity;
667 }
668 }
669
670 rep->checkConsistency();
671
672 return rep.release();
673 }
674
concatenate(PassRefPtr<UString::Rep> r,const char * t)675 static ALWAYS_INLINE PassRefPtr<UString::Rep> concatenate(PassRefPtr<UString::Rep> r, const char* t)
676 {
677 RefPtr<UString::Rep> rep = r;
678
679 rep->checkConsistency();
680
681 int thisSize = rep->size();
682 int thisOffset = rep->offset;
683 int tSize = static_cast<int>(strlen(t));
684 int length = thisSize + tSize;
685 UString::BaseString* base = rep->baseString();
686
687 // possible cases:
688 if (thisSize == 0) {
689 // this is empty
690 rep = createRep(t);
691 } else if (tSize == 0) {
692 // t is empty, we'll just return *this below.
693 } else if (rep == base && !base->isShared()) {
694 // this is direct and has refcount of 1 (so we can just alter it directly)
695 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
696 UChar* d = rep->data();
697 if (d) {
698 for (int i = 0; i < tSize; ++i)
699 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
700 rep->len = length;
701 rep->_hash = 0;
702 }
703 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
704 // this string reaches the end of the buffer - extend it
705 expandCapacity(rep.get(), newCapacityWithOverflowCheck(thisOffset, length));
706 UChar* d = rep->data();
707 if (d) {
708 for (int i = 0; i < tSize; ++i)
709 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
710 rep = UString::Rep::create(rep, 0, length);
711 }
712 } else {
713 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
714 size_t newCapacity = expandedSize(length, 0);
715 UChar* d = allocChars(newCapacity);
716 if (!d)
717 rep = &UString::Rep::null();
718 else {
719 copyChars(d, rep->data(), thisSize);
720 for (int i = 0; i < tSize; ++i)
721 d[thisSize + i] = static_cast<unsigned char>(t[i]); // use unsigned char to zero-extend instead of sign-extend
722 rep = UString::Rep::create(d, length);
723 rep->baseString()->capacity = newCapacity;
724 }
725 }
726
727 rep->checkConsistency();
728
729 return rep.release();
730 }
731
concatenate(UString::Rep * a,UString::Rep * b)732 PassRefPtr<UString::Rep> concatenate(UString::Rep* a, UString::Rep* b)
733 {
734 a->checkConsistency();
735 b->checkConsistency();
736
737 int aSize = a->size();
738 int bSize = b->size();
739 int aOffset = a->offset;
740
741 // possible cases:
742
743 UString::BaseString* aBase = a->baseString();
744 if (bSize == 1 && aOffset + aSize == aBase->usedCapacity && aOffset + aSize < aBase->capacity && !aBase->isBufferReadOnly()) {
745 // b is a single character (common fast case)
746 ++aBase->usedCapacity;
747 a->data()[aSize] = b->data()[0];
748 return UString::Rep::create(a, 0, aSize + 1);
749 }
750
751 // a is empty
752 if (aSize == 0)
753 return b;
754 // b is empty
755 if (bSize == 0)
756 return a;
757
758 int bOffset = b->offset;
759 int length = aSize + bSize;
760
761 UString::BaseString* bBase = b->baseString();
762 if (aOffset + aSize == aBase->usedCapacity && aSize >= minShareSize && 4 * aSize >= bSize
763 && (-bOffset != bBase->usedPreCapacity || aSize >= bSize) && !aBase->isBufferReadOnly()) {
764 // - a reaches the end of its buffer so it qualifies for shared append
765 // - also, it's at least a quarter the length of b - appending to a much shorter
766 // string does more harm than good
767 // - however, if b qualifies for prepend and is longer than a, we'd rather prepend
768
769 UString x(a);
770 x.expandCapacity(newCapacityWithOverflowCheck(aOffset, length));
771 if (!a->data() || !x.data())
772 return 0;
773 copyChars(a->data() + aSize, b->data(), bSize);
774 PassRefPtr<UString::Rep> result = UString::Rep::create(a, 0, length);
775
776 a->checkConsistency();
777 b->checkConsistency();
778 result->checkConsistency();
779
780 return result;
781 }
782
783 if (-bOffset == bBase->usedPreCapacity && bSize >= minShareSize && 4 * bSize >= aSize && !bBase->isBufferReadOnly()) {
784 // - b reaches the beginning of its buffer so it qualifies for shared prepend
785 // - also, it's at least a quarter the length of a - prepending to a much shorter
786 // string does more harm than good
787 UString y(b);
788 y.expandPreCapacity(-bOffset + aSize);
789 if (!b->data() || !y.data())
790 return 0;
791 copyChars(b->data() - aSize, a->data(), aSize);
792 PassRefPtr<UString::Rep> result = UString::Rep::create(b, -aSize, length);
793
794 a->checkConsistency();
795 b->checkConsistency();
796 result->checkConsistency();
797
798 return result;
799 }
800
801 // a does not qualify for append, and b does not qualify for prepend, gotta make a whole new string
802 size_t newCapacity = expandedSize(length, 0);
803 UChar* d = allocChars(newCapacity);
804 if (!d)
805 return 0;
806 copyChars(d, a->data(), aSize);
807 copyChars(d + aSize, b->data(), bSize);
808 PassRefPtr<UString::Rep> result = UString::Rep::create(d, length);
809 result->baseString()->capacity = newCapacity;
810
811 a->checkConsistency();
812 b->checkConsistency();
813 result->checkConsistency();
814
815 return result;
816 }
817
concatenate(UString::Rep * rep,int i)818 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, int i)
819 {
820 UChar buf[1 + sizeof(i) * 3];
821 UChar* end = buf + sizeof(buf) / sizeof(UChar);
822 UChar* p = end;
823
824 if (i == 0)
825 *--p = '0';
826 else if (i == INT_MIN) {
827 char minBuf[1 + sizeof(i) * 3];
828 sprintf(minBuf, "%d", INT_MIN);
829 return concatenate(rep, minBuf);
830 } else {
831 bool negative = false;
832 if (i < 0) {
833 negative = true;
834 i = -i;
835 }
836 while (i) {
837 *--p = static_cast<unsigned short>((i % 10) + '0');
838 i /= 10;
839 }
840 if (negative)
841 *--p = '-';
842 }
843
844 return concatenate(rep, p, static_cast<int>(end - p));
845
846 }
847
concatenate(UString::Rep * rep,double d)848 PassRefPtr<UString::Rep> concatenate(UString::Rep* rep, double d)
849 {
850 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
851 if (isnan(d))
852 return concatenate(rep, "NaN");
853
854 if (d == 0.0) // stringify -0 as 0
855 d = 0.0;
856
857 char buf[80];
858 int decimalPoint;
859 int sign;
860
861 char result[80];
862 WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
863 int length = static_cast<int>(strlen(result));
864
865 int i = 0;
866 if (sign)
867 buf[i++] = '-';
868
869 if (decimalPoint <= 0 && decimalPoint > -6) {
870 buf[i++] = '0';
871 buf[i++] = '.';
872 for (int j = decimalPoint; j < 0; j++)
873 buf[i++] = '0';
874 strcpy(buf + i, result);
875 } else if (decimalPoint <= 21 && decimalPoint > 0) {
876 if (length <= decimalPoint) {
877 strcpy(buf + i, result);
878 i += length;
879 for (int j = 0; j < decimalPoint - length; j++)
880 buf[i++] = '0';
881 buf[i] = '\0';
882 } else {
883 strncpy(buf + i, result, decimalPoint);
884 i += decimalPoint;
885 buf[i++] = '.';
886 strcpy(buf + i, result + decimalPoint);
887 }
888 } else if (result[0] < '0' || result[0] > '9')
889 strcpy(buf + i, result);
890 else {
891 buf[i++] = result[0];
892 if (length > 1) {
893 buf[i++] = '.';
894 strcpy(buf + i, result + 1);
895 i += length - 1;
896 }
897
898 buf[i++] = 'e';
899 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
900 // decimalPoint can't be more than 3 digits decimal given the
901 // nature of float representation
902 int exponential = decimalPoint - 1;
903 if (exponential < 0)
904 exponential = -exponential;
905 if (exponential >= 100)
906 buf[i++] = static_cast<char>('0' + exponential / 100);
907 if (exponential >= 10)
908 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
909 buf[i++] = static_cast<char>('0' + exponential % 10);
910 buf[i++] = '\0';
911 }
912
913 return concatenate(rep, buf);
914 }
915
from(int i)916 UString UString::from(int i)
917 {
918 UChar buf[1 + sizeof(i) * 3];
919 UChar* end = buf + sizeof(buf) / sizeof(UChar);
920 UChar* p = end;
921
922 if (i == 0)
923 *--p = '0';
924 else if (i == INT_MIN) {
925 char minBuf[1 + sizeof(i) * 3];
926 sprintf(minBuf, "%d", INT_MIN);
927 return UString(minBuf);
928 } else {
929 bool negative = false;
930 if (i < 0) {
931 negative = true;
932 i = -i;
933 }
934 while (i) {
935 *--p = static_cast<unsigned short>((i % 10) + '0');
936 i /= 10;
937 }
938 if (negative)
939 *--p = '-';
940 }
941
942 return UString(p, static_cast<int>(end - p));
943 }
944
from(unsigned int u)945 UString UString::from(unsigned int u)
946 {
947 UChar buf[sizeof(u) * 3];
948 UChar* end = buf + sizeof(buf) / sizeof(UChar);
949 UChar* p = end;
950
951 if (u == 0)
952 *--p = '0';
953 else {
954 while (u) {
955 *--p = static_cast<unsigned short>((u % 10) + '0');
956 u /= 10;
957 }
958 }
959
960 return UString(p, static_cast<int>(end - p));
961 }
962
from(long l)963 UString UString::from(long l)
964 {
965 UChar buf[1 + sizeof(l) * 3];
966 UChar* end = buf + sizeof(buf) / sizeof(UChar);
967 UChar* p = end;
968
969 if (l == 0)
970 *--p = '0';
971 else if (l == LONG_MIN) {
972 char minBuf[1 + sizeof(l) * 3];
973 sprintf(minBuf, "%ld", LONG_MIN);
974 return UString(minBuf);
975 } else {
976 bool negative = false;
977 if (l < 0) {
978 negative = true;
979 l = -l;
980 }
981 while (l) {
982 *--p = static_cast<unsigned short>((l % 10) + '0');
983 l /= 10;
984 }
985 if (negative)
986 *--p = '-';
987 }
988
989 return UString(p, static_cast<int>(end - p));
990 }
991
from(double d)992 UString UString::from(double d)
993 {
994 // avoid ever printing -NaN, in JS conceptually there is only one NaN value
995 if (isnan(d))
996 return "NaN";
997
998 char buf[80];
999 int decimalPoint;
1000 int sign;
1001
1002 char result[80];
1003 WTF::dtoa(result, d, 0, &decimalPoint, &sign, NULL);
1004 int length = static_cast<int>(strlen(result));
1005
1006 int i = 0;
1007 if (sign)
1008 buf[i++] = '-';
1009
1010 if (decimalPoint <= 0 && decimalPoint > -6) {
1011 buf[i++] = '0';
1012 buf[i++] = '.';
1013 for (int j = decimalPoint; j < 0; j++)
1014 buf[i++] = '0';
1015 strcpy(buf + i, result);
1016 } else if (decimalPoint <= 21 && decimalPoint > 0) {
1017 if (length <= decimalPoint) {
1018 strcpy(buf + i, result);
1019 i += length;
1020 for (int j = 0; j < decimalPoint - length; j++)
1021 buf[i++] = '0';
1022 buf[i] = '\0';
1023 } else {
1024 strncpy(buf + i, result, decimalPoint);
1025 i += decimalPoint;
1026 buf[i++] = '.';
1027 strcpy(buf + i, result + decimalPoint);
1028 }
1029 } else if (result[0] < '0' || result[0] > '9')
1030 strcpy(buf + i, result);
1031 else {
1032 buf[i++] = result[0];
1033 if (length > 1) {
1034 buf[i++] = '.';
1035 strcpy(buf + i, result + 1);
1036 i += length - 1;
1037 }
1038
1039 buf[i++] = 'e';
1040 buf[i++] = (decimalPoint >= 0) ? '+' : '-';
1041 // decimalPoint can't be more than 3 digits decimal given the
1042 // nature of float representation
1043 int exponential = decimalPoint - 1;
1044 if (exponential < 0)
1045 exponential = -exponential;
1046 if (exponential >= 100)
1047 buf[i++] = static_cast<char>('0' + exponential / 100);
1048 if (exponential >= 10)
1049 buf[i++] = static_cast<char>('0' + (exponential % 100) / 10);
1050 buf[i++] = static_cast<char>('0' + exponential % 10);
1051 buf[i++] = '\0';
1052 }
1053
1054 return UString(buf);
1055 }
1056
spliceSubstringsWithSeparators(const Range * substringRanges,int rangeCount,const UString * separators,int separatorCount) const1057 UString UString::spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const
1058 {
1059 m_rep->checkConsistency();
1060
1061 if (rangeCount == 1 && separatorCount == 0) {
1062 int thisSize = size();
1063 int position = substringRanges[0].position;
1064 int length = substringRanges[0].length;
1065 if (position <= 0 && length >= thisSize)
1066 return *this;
1067 return UString::Rep::create(m_rep, max(0, position), min(thisSize, length));
1068 }
1069
1070 int totalLength = 0;
1071 for (int i = 0; i < rangeCount; i++)
1072 totalLength += substringRanges[i].length;
1073 for (int i = 0; i < separatorCount; i++)
1074 totalLength += separators[i].size();
1075
1076 if (totalLength == 0)
1077 return "";
1078
1079 UChar* buffer = allocChars(totalLength);
1080 if (!buffer)
1081 return null();
1082
1083 int maxCount = max(rangeCount, separatorCount);
1084 int bufferPos = 0;
1085 for (int i = 0; i < maxCount; i++) {
1086 if (i < rangeCount) {
1087 copyChars(buffer + bufferPos, data() + substringRanges[i].position, substringRanges[i].length);
1088 bufferPos += substringRanges[i].length;
1089 }
1090 if (i < separatorCount) {
1091 copyChars(buffer + bufferPos, separators[i].data(), separators[i].size());
1092 bufferPos += separators[i].size();
1093 }
1094 }
1095
1096 return UString::Rep::create(buffer, totalLength);
1097 }
1098
replaceRange(int rangeStart,int rangeLength,const UString & replacement) const1099 UString UString::replaceRange(int rangeStart, int rangeLength, const UString& replacement) const
1100 {
1101 m_rep->checkConsistency();
1102
1103 int replacementLength = replacement.size();
1104 int totalLength = size() - rangeLength + replacementLength;
1105 if (totalLength == 0)
1106 return "";
1107
1108 UChar* buffer = allocChars(totalLength);
1109 if (!buffer)
1110 return null();
1111
1112 copyChars(buffer, data(), rangeStart);
1113 copyChars(buffer + rangeStart, replacement.data(), replacementLength);
1114 int rangeEnd = rangeStart + rangeLength;
1115 copyChars(buffer + rangeStart + replacementLength, data() + rangeEnd, size() - rangeEnd);
1116
1117 return UString::Rep::create(buffer, totalLength);
1118 }
1119
1120
append(const UString & t)1121 UString& UString::append(const UString &t)
1122 {
1123 m_rep->checkConsistency();
1124 t.rep()->checkConsistency();
1125
1126 int thisSize = size();
1127 int thisOffset = m_rep->offset;
1128 int tSize = t.size();
1129 int length = thisSize + tSize;
1130 BaseString* base = m_rep->baseString();
1131
1132 // possible cases:
1133 if (thisSize == 0) {
1134 // this is empty
1135 *this = t;
1136 } else if (tSize == 0) {
1137 // t is empty
1138 } else if (m_rep == base && !base->isShared()) {
1139 // this is direct and has refcount of 1 (so we can just alter it directly)
1140 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1141 if (data()) {
1142 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1143 m_rep->len = length;
1144 m_rep->_hash = 0;
1145 }
1146 } else if (thisOffset + thisSize == base->usedCapacity && thisSize >= minShareSize && !base->isBufferReadOnly()) {
1147 // this reaches the end of the buffer - extend it if it's long enough to append to
1148 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length));
1149 if (data()) {
1150 copyChars(m_rep->data() + thisSize, t.data(), tSize);
1151 m_rep = Rep::create(m_rep, 0, length);
1152 }
1153 } else {
1154 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1155 size_t newCapacity = expandedSize(length, 0);
1156 UChar* d = allocChars(newCapacity);
1157 if (!d)
1158 makeNull();
1159 else {
1160 copyChars(d, data(), thisSize);
1161 copyChars(d + thisSize, t.data(), tSize);
1162 m_rep = Rep::create(d, length);
1163 m_rep->baseString()->capacity = newCapacity;
1164 }
1165 }
1166
1167 m_rep->checkConsistency();
1168 t.rep()->checkConsistency();
1169
1170 return *this;
1171 }
1172
append(const UChar * tData,int tSize)1173 UString& UString::append(const UChar* tData, int tSize)
1174 {
1175 m_rep = concatenate(m_rep.release(), tData, tSize);
1176 return *this;
1177 }
1178
appendNumeric(int i)1179 UString& UString::appendNumeric(int i)
1180 {
1181 m_rep = concatenate(rep(), i);
1182 return *this;
1183 }
1184
appendNumeric(double d)1185 UString& UString::appendNumeric(double d)
1186 {
1187 m_rep = concatenate(rep(), d);
1188 return *this;
1189 }
1190
append(const char * t)1191 UString& UString::append(const char* t)
1192 {
1193 m_rep = concatenate(m_rep.release(), t);
1194 return *this;
1195 }
1196
append(UChar c)1197 UString& UString::append(UChar c)
1198 {
1199 m_rep->checkConsistency();
1200
1201 int thisOffset = m_rep->offset;
1202 int length = size();
1203 BaseString* base = m_rep->baseString();
1204
1205 // possible cases:
1206 if (length == 0) {
1207 // this is empty - must make a new m_rep because we don't want to pollute the shared empty one
1208 size_t newCapacity = expandedSize(1, 0);
1209 UChar* d = allocChars(newCapacity);
1210 if (!d)
1211 makeNull();
1212 else {
1213 d[0] = c;
1214 m_rep = Rep::create(d, 1);
1215 m_rep->baseString()->capacity = newCapacity;
1216 }
1217 } else if (m_rep == base && !base->isShared()) {
1218 // this is direct and has refcount of 1 (so we can just alter it directly)
1219 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1220 UChar* d = m_rep->data();
1221 if (d) {
1222 d[length] = c;
1223 m_rep->len = length + 1;
1224 m_rep->_hash = 0;
1225 }
1226 } else if (thisOffset + length == base->usedCapacity && length >= minShareSize && !base->isBufferReadOnly()) {
1227 // this reaches the end of the string - extend it and share
1228 expandCapacity(newCapacityWithOverflowCheck(thisOffset, length, true));
1229 UChar* d = m_rep->data();
1230 if (d) {
1231 d[length] = c;
1232 m_rep = Rep::create(m_rep, 0, length + 1);
1233 }
1234 } else {
1235 // This is shared in some way that prevents us from modifying base, so we must make a whole new string.
1236 size_t newCapacity = expandedSize(length + 1, 0);
1237 UChar* d = allocChars(newCapacity);
1238 if (!d)
1239 makeNull();
1240 else {
1241 copyChars(d, data(), length);
1242 d[length] = c;
1243 m_rep = Rep::create(d, length + 1);
1244 m_rep->baseString()->capacity = newCapacity;
1245 }
1246 }
1247
1248 m_rep->checkConsistency();
1249
1250 return *this;
1251 }
1252
getCString(CStringBuffer & buffer) const1253 bool UString::getCString(CStringBuffer& buffer) const
1254 {
1255 int length = size();
1256 int neededSize = length + 1;
1257 buffer.resize(neededSize);
1258 char* buf = buffer.data();
1259
1260 UChar ored = 0;
1261 const UChar* p = data();
1262 char* q = buf;
1263 const UChar* limit = p + length;
1264 while (p != limit) {
1265 UChar c = p[0];
1266 ored |= c;
1267 *q = static_cast<char>(c);
1268 ++p;
1269 ++q;
1270 }
1271 *q = '\0';
1272
1273 return !(ored & 0xFF00);
1274 }
1275
ascii() const1276 char* UString::ascii() const
1277 {
1278 int length = size();
1279 int neededSize = length + 1;
1280 delete[] statBuffer;
1281 statBuffer = new char[neededSize];
1282
1283 const UChar* p = data();
1284 char* q = statBuffer;
1285 const UChar* limit = p + length;
1286 while (p != limit) {
1287 *q = static_cast<char>(p[0]);
1288 ++p;
1289 ++q;
1290 }
1291 *q = '\0';
1292
1293 return statBuffer;
1294 }
1295
operator =(const char * c)1296 UString& UString::operator=(const char* c)
1297 {
1298 if (!c) {
1299 m_rep = &Rep::null();
1300 return *this;
1301 }
1302
1303 if (!c[0]) {
1304 m_rep = &Rep::empty();
1305 return *this;
1306 }
1307
1308 int l = static_cast<int>(strlen(c));
1309 UChar* d;
1310 BaseString* base = m_rep->baseString();
1311 if (!base->isShared() && l <= base->capacity && m_rep == base && m_rep->offset == 0 && base->preCapacity == 0) {
1312 d = base->buf;
1313 m_rep->_hash = 0;
1314 m_rep->len = l;
1315 } else {
1316 d = allocChars(l);
1317 if (!d) {
1318 makeNull();
1319 return *this;
1320 }
1321 m_rep = Rep::create(d, l);
1322 }
1323 for (int i = 0; i < l; i++)
1324 d[i] = static_cast<unsigned char>(c[i]); // use unsigned char to zero-extend instead of sign-extend
1325
1326 return *this;
1327 }
1328
is8Bit() const1329 bool UString::is8Bit() const
1330 {
1331 const UChar* u = data();
1332 const UChar* limit = u + size();
1333 while (u < limit) {
1334 if (u[0] > 0xFF)
1335 return false;
1336 ++u;
1337 }
1338
1339 return true;
1340 }
1341
operator [](int pos) const1342 UChar UString::operator[](int pos) const
1343 {
1344 if (pos >= size())
1345 return '\0';
1346 return data()[pos];
1347 }
1348
toDouble(bool tolerateTrailingJunk,bool tolerateEmptyString) const1349 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
1350 {
1351 if (size() == 1) {
1352 UChar c = data()[0];
1353 if (isASCIIDigit(c))
1354 return c - '0';
1355 if (isASCIISpace(c) && tolerateEmptyString)
1356 return 0;
1357 return NaN;
1358 }
1359
1360 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
1361 // after the number, so this is too strict a check.
1362 CStringBuffer s;
1363 if (!getCString(s))
1364 return NaN;
1365 const char* c = s.data();
1366
1367 // skip leading white space
1368 while (isASCIISpace(*c))
1369 c++;
1370
1371 // empty string ?
1372 if (*c == '\0')
1373 return tolerateEmptyString ? 0.0 : NaN;
1374
1375 double d;
1376
1377 // hex number ?
1378 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
1379 const char* firstDigitPosition = c + 2;
1380 c++;
1381 d = 0.0;
1382 while (*(++c)) {
1383 if (*c >= '0' && *c <= '9')
1384 d = d * 16.0 + *c - '0';
1385 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
1386 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
1387 else
1388 break;
1389 }
1390
1391 if (d >= mantissaOverflowLowerBound)
1392 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
1393 } else {
1394 // regular number ?
1395 char* end;
1396 d = WTF::strtod(c, &end);
1397 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
1398 c = end;
1399 } else {
1400 double sign = 1.0;
1401
1402 if (*c == '+')
1403 c++;
1404 else if (*c == '-') {
1405 sign = -1.0;
1406 c++;
1407 }
1408
1409 // We used strtod() to do the conversion. However, strtod() handles
1410 // infinite values slightly differently than JavaScript in that it
1411 // converts the string "inf" with any capitalization to infinity,
1412 // whereas the ECMA spec requires that it be converted to NaN.
1413
1414 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
1415 d = sign * Inf;
1416 c += 8;
1417 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
1418 c = end;
1419 else
1420 return NaN;
1421 }
1422 }
1423
1424 // allow trailing white space
1425 while (isASCIISpace(*c))
1426 c++;
1427 // don't allow anything after - unless tolerant=true
1428 if (!tolerateTrailingJunk && *c != '\0')
1429 d = NaN;
1430
1431 return d;
1432 }
1433
toDouble(bool tolerateTrailingJunk) const1434 double UString::toDouble(bool tolerateTrailingJunk) const
1435 {
1436 return toDouble(tolerateTrailingJunk, true);
1437 }
1438
toDouble() const1439 double UString::toDouble() const
1440 {
1441 return toDouble(false, true);
1442 }
1443
toUInt32(bool * ok) const1444 uint32_t UString::toUInt32(bool* ok) const
1445 {
1446 double d = toDouble();
1447 bool b = true;
1448
1449 if (d != static_cast<uint32_t>(d)) {
1450 b = false;
1451 d = 0;
1452 }
1453
1454 if (ok)
1455 *ok = b;
1456
1457 return static_cast<uint32_t>(d);
1458 }
1459
toUInt32(bool * ok,bool tolerateEmptyString) const1460 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
1461 {
1462 double d = toDouble(false, tolerateEmptyString);
1463 bool b = true;
1464
1465 if (d != static_cast<uint32_t>(d)) {
1466 b = false;
1467 d = 0;
1468 }
1469
1470 if (ok)
1471 *ok = b;
1472
1473 return static_cast<uint32_t>(d);
1474 }
1475
toStrictUInt32(bool * ok) const1476 uint32_t UString::toStrictUInt32(bool* ok) const
1477 {
1478 if (ok)
1479 *ok = false;
1480
1481 // Empty string is not OK.
1482 int len = m_rep->len;
1483 if (len == 0)
1484 return 0;
1485 const UChar* p = m_rep->data();
1486 unsigned short c = p[0];
1487
1488 // If the first digit is 0, only 0 itself is OK.
1489 if (c == '0') {
1490 if (len == 1 && ok)
1491 *ok = true;
1492 return 0;
1493 }
1494
1495 // Convert to UInt32, checking for overflow.
1496 uint32_t i = 0;
1497 while (1) {
1498 // Process character, turning it into a digit.
1499 if (c < '0' || c > '9')
1500 return 0;
1501 const unsigned d = c - '0';
1502
1503 // Multiply by 10, checking for overflow out of 32 bits.
1504 if (i > 0xFFFFFFFFU / 10)
1505 return 0;
1506 i *= 10;
1507
1508 // Add in the digit, checking for overflow out of 32 bits.
1509 const unsigned max = 0xFFFFFFFFU - d;
1510 if (i > max)
1511 return 0;
1512 i += d;
1513
1514 // Handle end of string.
1515 if (--len == 0) {
1516 if (ok)
1517 *ok = true;
1518 return i;
1519 }
1520
1521 // Get next character.
1522 c = *(++p);
1523 }
1524 }
1525
find(const UString & f,int pos) const1526 int UString::find(const UString& f, int pos) const
1527 {
1528 int fsz = f.size();
1529
1530 if (pos < 0)
1531 pos = 0;
1532
1533 if (fsz == 1) {
1534 UChar ch = f[0];
1535 const UChar* end = data() + size();
1536 for (const UChar* c = data() + pos; c < end; c++) {
1537 if (*c == ch)
1538 return static_cast<int>(c - data());
1539 }
1540 return -1;
1541 }
1542
1543 int sz = size();
1544 if (sz < fsz)
1545 return -1;
1546 if (fsz == 0)
1547 return pos;
1548 const UChar* end = data() + sz - fsz;
1549 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1550 const UChar* fdata = f.data();
1551 unsigned short fchar = fdata[0];
1552 ++fdata;
1553 for (const UChar* c = data() + pos; c <= end; c++) {
1554 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
1555 return static_cast<int>(c - data());
1556 }
1557
1558 return -1;
1559 }
1560
find(UChar ch,int pos) const1561 int UString::find(UChar ch, int pos) const
1562 {
1563 if (pos < 0)
1564 pos = 0;
1565 const UChar* end = data() + size();
1566 for (const UChar* c = data() + pos; c < end; c++) {
1567 if (*c == ch)
1568 return static_cast<int>(c - data());
1569 }
1570
1571 return -1;
1572 }
1573
rfind(const UString & f,int pos) const1574 int UString::rfind(const UString& f, int pos) const
1575 {
1576 int sz = size();
1577 int fsz = f.size();
1578 if (sz < fsz)
1579 return -1;
1580 if (pos < 0)
1581 pos = 0;
1582 if (pos > sz - fsz)
1583 pos = sz - fsz;
1584 if (fsz == 0)
1585 return pos;
1586 int fsizeminusone = (fsz - 1) * sizeof(UChar);
1587 const UChar* fdata = f.data();
1588 for (const UChar* c = data() + pos; c >= data(); c--) {
1589 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
1590 return static_cast<int>(c - data());
1591 }
1592
1593 return -1;
1594 }
1595
rfind(UChar ch,int pos) const1596 int UString::rfind(UChar ch, int pos) const
1597 {
1598 if (isEmpty())
1599 return -1;
1600 if (pos + 1 >= size())
1601 pos = size() - 1;
1602 for (const UChar* c = data() + pos; c >= data(); c--) {
1603 if (*c == ch)
1604 return static_cast<int>(c - data());
1605 }
1606
1607 return -1;
1608 }
1609
substr(int pos,int len) const1610 UString UString::substr(int pos, int len) const
1611 {
1612 int s = size();
1613
1614 if (pos < 0)
1615 pos = 0;
1616 else if (pos >= s)
1617 pos = s;
1618 if (len < 0)
1619 len = s;
1620 if (pos + len >= s)
1621 len = s - pos;
1622
1623 if (pos == 0 && len == s)
1624 return *this;
1625
1626 return UString(Rep::create(m_rep, pos, len));
1627 }
1628
operator ==(const UString & s1,const char * s2)1629 bool operator==(const UString& s1, const char *s2)
1630 {
1631 if (s2 == 0)
1632 return s1.isEmpty();
1633
1634 const UChar* u = s1.data();
1635 const UChar* uend = u + s1.size();
1636 while (u != uend && *s2) {
1637 if (u[0] != (unsigned char)*s2)
1638 return false;
1639 s2++;
1640 u++;
1641 }
1642
1643 return u == uend && *s2 == 0;
1644 }
1645
operator <(const UString & s1,const UString & s2)1646 bool operator<(const UString& s1, const UString& s2)
1647 {
1648 const int l1 = s1.size();
1649 const int l2 = s2.size();
1650 const int lmin = l1 < l2 ? l1 : l2;
1651 const UChar* c1 = s1.data();
1652 const UChar* c2 = s2.data();
1653 int l = 0;
1654 while (l < lmin && *c1 == *c2) {
1655 c1++;
1656 c2++;
1657 l++;
1658 }
1659 if (l < lmin)
1660 return (c1[0] < c2[0]);
1661
1662 return (l1 < l2);
1663 }
1664
operator >(const UString & s1,const UString & s2)1665 bool operator>(const UString& s1, const UString& s2)
1666 {
1667 const int l1 = s1.size();
1668 const int l2 = s2.size();
1669 const int lmin = l1 < l2 ? l1 : l2;
1670 const UChar* c1 = s1.data();
1671 const UChar* c2 = s2.data();
1672 int l = 0;
1673 while (l < lmin && *c1 == *c2) {
1674 c1++;
1675 c2++;
1676 l++;
1677 }
1678 if (l < lmin)
1679 return (c1[0] > c2[0]);
1680
1681 return (l1 > l2);
1682 }
1683
compare(const UString & s1,const UString & s2)1684 int compare(const UString& s1, const UString& s2)
1685 {
1686 const int l1 = s1.size();
1687 const int l2 = s2.size();
1688 const int lmin = l1 < l2 ? l1 : l2;
1689 const UChar* c1 = s1.data();
1690 const UChar* c2 = s2.data();
1691 int l = 0;
1692 while (l < lmin && *c1 == *c2) {
1693 c1++;
1694 c2++;
1695 l++;
1696 }
1697
1698 if (l < lmin)
1699 return (c1[0] > c2[0]) ? 1 : -1;
1700
1701 if (l1 == l2)
1702 return 0;
1703
1704 return (l1 > l2) ? 1 : -1;
1705 }
1706
equal(const UString::Rep * r,const UString::Rep * b)1707 bool equal(const UString::Rep* r, const UString::Rep* b)
1708 {
1709 int length = r->len;
1710 if (length != b->len)
1711 return false;
1712 const UChar* d = r->data();
1713 const UChar* s = b->data();
1714 for (int i = 0; i != length; ++i) {
1715 if (d[i] != s[i])
1716 return false;
1717 }
1718 return true;
1719 }
1720
UTF8String(bool strict) const1721 CString UString::UTF8String(bool strict) const
1722 {
1723 // Allocate a buffer big enough to hold all the characters.
1724 const int length = size();
1725 Vector<char, 1024> buffer(length * 3);
1726
1727 // Convert to runs of 8-bit characters.
1728 char* p = buffer.data();
1729 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
1730 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
1731 if (result != conversionOK)
1732 return CString();
1733
1734 return CString(buffer.data(), p - buffer.data());
1735 }
1736
1737 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
makeNull()1738 NEVER_INLINE void UString::makeNull()
1739 {
1740 m_rep = &Rep::null();
1741 }
1742
1743 // For use in error handling code paths -- having this not be inlined helps avoid PIC branches to fetch the global on Mac OS X.
nullRep()1744 NEVER_INLINE UString::Rep* UString::nullRep()
1745 {
1746 return &Rep::null();
1747 }
1748
1749 } // namespace JSC
1750