1 /*
2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2007-2009 Torch Mobile, Inc.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 */
21
22 #include "config.h"
23 #include "WTFString.h"
24
25 #include <stdarg.h>
26 #include <wtf/ASCIICType.h>
27 #include <wtf/text/CString.h>
28 #include <wtf/StringExtras.h>
29 #include <wtf/Vector.h>
30 #include <wtf/dtoa.h>
31 #include <wtf/unicode/UTF8.h>
32 #include <wtf/unicode/Unicode.h>
33
34 using namespace std;
35
36 namespace WTF {
37
38 using namespace Unicode;
39 using namespace std;
40
41 // Construct a string with UTF-16 data.
String(const UChar * characters,unsigned length)42 String::String(const UChar* characters, unsigned length)
43 : m_impl(characters ? StringImpl::create(characters, length) : 0)
44 {
45 }
46
47 // Construct a string with UTF-16 data, from a null-terminated source.
String(const UChar * str)48 String::String(const UChar* str)
49 {
50 if (!str)
51 return;
52
53 size_t len = 0;
54 while (str[len] != UChar(0))
55 len++;
56
57 if (len > numeric_limits<unsigned>::max())
58 CRASH();
59
60 m_impl = StringImpl::create(str, len);
61 }
62
63 // Construct a string with latin1 data.
String(const char * characters,unsigned length)64 String::String(const char* characters, unsigned length)
65 : m_impl(characters ? StringImpl::create(characters, length) : 0)
66 {
67 }
68
69 // Construct a string with latin1 data, from a null-terminated source.
String(const char * characters)70 String::String(const char* characters)
71 : m_impl(characters ? StringImpl::create(characters) : 0)
72 {
73 }
74
append(const String & str)75 void String::append(const String& str)
76 {
77 if (str.isEmpty())
78 return;
79
80 // FIXME: This is extremely inefficient. So much so that we might want to take this
81 // out of String's API. We can make it better by optimizing the case where exactly
82 // one String is pointing at this StringImpl, but even then it's going to require a
83 // call to fastMalloc every single time.
84 if (str.m_impl) {
85 if (m_impl) {
86 UChar* data;
87 if (str.length() > numeric_limits<unsigned>::max() - m_impl->length())
88 CRASH();
89 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + str.length(), data);
90 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
91 memcpy(data + m_impl->length(), str.characters(), str.length() * sizeof(UChar));
92 m_impl = newImpl.release();
93 } else
94 m_impl = str.m_impl;
95 }
96 }
97
append(char c)98 void String::append(char c)
99 {
100 // FIXME: This is extremely inefficient. So much so that we might want to take this
101 // out of String's API. We can make it better by optimizing the case where exactly
102 // one String is pointing at this StringImpl, but even then it's going to require a
103 // call to fastMalloc every single time.
104 if (m_impl) {
105 UChar* data;
106 if (m_impl->length() >= numeric_limits<unsigned>::max())
107 CRASH();
108 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
109 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
110 data[m_impl->length()] = c;
111 m_impl = newImpl.release();
112 } else
113 m_impl = StringImpl::create(&c, 1);
114 }
115
append(UChar c)116 void String::append(UChar c)
117 {
118 // FIXME: This is extremely inefficient. So much so that we might want to take this
119 // out of String's API. We can make it better by optimizing the case where exactly
120 // one String is pointing at this StringImpl, but even then it's going to require a
121 // call to fastMalloc every single time.
122 if (m_impl) {
123 UChar* data;
124 if (m_impl->length() >= numeric_limits<unsigned>::max())
125 CRASH();
126 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(m_impl->length() + 1, data);
127 memcpy(data, m_impl->characters(), m_impl->length() * sizeof(UChar));
128 data[m_impl->length()] = c;
129 m_impl = newImpl.release();
130 } else
131 m_impl = StringImpl::create(&c, 1);
132 }
133
operator +(const String & a,const String & b)134 String operator+(const String& a, const String& b)
135 {
136 if (a.isEmpty())
137 return b;
138 if (b.isEmpty())
139 return a;
140 String c = a;
141 c += b;
142 return c;
143 }
144
operator +(const String & s,const char * cs)145 String operator+(const String& s, const char* cs)
146 {
147 return s + String(cs);
148 }
149
operator +(const char * cs,const String & s)150 String operator+(const char* cs, const String& s)
151 {
152 return String(cs) + s;
153 }
154
codePointCompare(const String & a,const String & b)155 int codePointCompare(const String& a, const String& b)
156 {
157 return codePointCompare(a.impl(), b.impl());
158 }
159
insert(const String & str,unsigned pos)160 void String::insert(const String& str, unsigned pos)
161 {
162 if (str.isEmpty()) {
163 if (str.isNull())
164 return;
165 if (isNull())
166 m_impl = str.impl();
167 return;
168 }
169 insert(str.characters(), str.length(), pos);
170 }
171
append(const UChar * charactersToAppend,unsigned lengthToAppend)172 void String::append(const UChar* charactersToAppend, unsigned lengthToAppend)
173 {
174 if (!m_impl) {
175 if (!charactersToAppend)
176 return;
177 m_impl = StringImpl::create(charactersToAppend, lengthToAppend);
178 return;
179 }
180
181 if (!lengthToAppend)
182 return;
183
184 ASSERT(charactersToAppend);
185 UChar* data;
186 if (lengthToAppend > numeric_limits<unsigned>::max() - length())
187 CRASH();
188 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToAppend, data);
189 memcpy(data, characters(), length() * sizeof(UChar));
190 memcpy(data + length(), charactersToAppend, lengthToAppend * sizeof(UChar));
191 m_impl = newImpl.release();
192 }
193
insert(const UChar * charactersToInsert,unsigned lengthToInsert,unsigned position)194 void String::insert(const UChar* charactersToInsert, unsigned lengthToInsert, unsigned position)
195 {
196 if (position >= length()) {
197 append(charactersToInsert, lengthToInsert);
198 return;
199 }
200
201 ASSERT(m_impl);
202
203 if (!lengthToInsert)
204 return;
205
206 ASSERT(charactersToInsert);
207 UChar* data;
208 if (lengthToInsert > numeric_limits<unsigned>::max() - length())
209 CRASH();
210 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() + lengthToInsert, data);
211 memcpy(data, characters(), position * sizeof(UChar));
212 memcpy(data + position, charactersToInsert, lengthToInsert * sizeof(UChar));
213 memcpy(data + position + lengthToInsert, characters() + position, (length() - position) * sizeof(UChar));
214 m_impl = newImpl.release();
215 }
216
characterStartingAt(unsigned i) const217 UChar32 String::characterStartingAt(unsigned i) const
218 {
219 if (!m_impl || i >= m_impl->length())
220 return 0;
221 return m_impl->characterStartingAt(i);
222 }
223
truncate(unsigned position)224 void String::truncate(unsigned position)
225 {
226 if (position >= length())
227 return;
228 UChar* data;
229 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(position, data);
230 memcpy(data, characters(), position * sizeof(UChar));
231 m_impl = newImpl.release();
232 }
233
remove(unsigned position,int lengthToRemove)234 void String::remove(unsigned position, int lengthToRemove)
235 {
236 if (lengthToRemove <= 0)
237 return;
238 if (position >= length())
239 return;
240 if (static_cast<unsigned>(lengthToRemove) > length() - position)
241 lengthToRemove = length() - position;
242 UChar* data;
243 RefPtr<StringImpl> newImpl = StringImpl::createUninitialized(length() - lengthToRemove, data);
244 memcpy(data, characters(), position * sizeof(UChar));
245 memcpy(data + position, characters() + position + lengthToRemove,
246 (length() - lengthToRemove - position) * sizeof(UChar));
247 m_impl = newImpl.release();
248 }
249
substring(unsigned pos,unsigned len) const250 String String::substring(unsigned pos, unsigned len) const
251 {
252 if (!m_impl)
253 return String();
254 return m_impl->substring(pos, len);
255 }
256
substringSharingImpl(unsigned offset,unsigned length) const257 String String::substringSharingImpl(unsigned offset, unsigned length) const
258 {
259 // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
260
261 unsigned stringLength = this->length();
262 offset = min(offset, stringLength);
263 length = min(length, stringLength - offset);
264
265 if (!offset && length == stringLength)
266 return *this;
267 return String(StringImpl::create(m_impl, offset, length));
268 }
269
lower() const270 String String::lower() const
271 {
272 if (!m_impl)
273 return String();
274 return m_impl->lower();
275 }
276
upper() const277 String String::upper() const
278 {
279 if (!m_impl)
280 return String();
281 return m_impl->upper();
282 }
283
stripWhiteSpace() const284 String String::stripWhiteSpace() const
285 {
286 if (!m_impl)
287 return String();
288 return m_impl->stripWhiteSpace();
289 }
290
simplifyWhiteSpace() const291 String String::simplifyWhiteSpace() const
292 {
293 if (!m_impl)
294 return String();
295 return m_impl->simplifyWhiteSpace();
296 }
297
removeCharacters(CharacterMatchFunctionPtr findMatch) const298 String String::removeCharacters(CharacterMatchFunctionPtr findMatch) const
299 {
300 if (!m_impl)
301 return String();
302 return m_impl->removeCharacters(findMatch);
303 }
304
foldCase() const305 String String::foldCase() const
306 {
307 if (!m_impl)
308 return String();
309 return m_impl->foldCase();
310 }
311
percentage(int & result) const312 bool String::percentage(int& result) const
313 {
314 if (!m_impl || !m_impl->length())
315 return false;
316
317 if ((*m_impl)[m_impl->length() - 1] != '%')
318 return false;
319
320 result = charactersToIntStrict(m_impl->characters(), m_impl->length() - 1);
321 return true;
322 }
323
charactersWithNullTermination()324 const UChar* String::charactersWithNullTermination()
325 {
326 if (!m_impl)
327 return 0;
328 if (m_impl->hasTerminatingNullCharacter())
329 return m_impl->characters();
330 m_impl = StringImpl::createWithTerminatingNullCharacter(*m_impl);
331 return m_impl->characters();
332 }
333
format(const char * format,...)334 String String::format(const char *format, ...)
335 {
336 #if PLATFORM(QT)
337 // Use QString::vsprintf to avoid the locale dependent formatting of vsnprintf.
338 // https://bugs.webkit.org/show_bug.cgi?id=18994
339 va_list args;
340 va_start(args, format);
341
342 QString buffer;
343 buffer.vsprintf(format, args);
344
345 va_end(args);
346
347 QByteArray ba = buffer.toUtf8();
348 return StringImpl::create(ba.constData(), ba.length());
349
350 #elif OS(WINCE)
351 va_list args;
352 va_start(args, format);
353
354 Vector<char, 256> buffer;
355
356 int bufferSize = 256;
357 buffer.resize(bufferSize);
358 for (;;) {
359 int written = vsnprintf(buffer.data(), bufferSize, format, args);
360 va_end(args);
361
362 if (written == 0)
363 return String("");
364 if (written > 0)
365 return StringImpl::create(buffer.data(), written);
366
367 bufferSize <<= 1;
368 buffer.resize(bufferSize);
369 va_start(args, format);
370 }
371
372 #else
373 va_list args;
374 va_start(args, format);
375
376 Vector<char, 256> buffer;
377
378 // Do the format once to get the length.
379 #if COMPILER(MSVC)
380 int result = _vscprintf(format, args);
381 #else
382 char ch;
383 int result = vsnprintf(&ch, 1, format, args);
384 // We need to call va_end() and then va_start() again here, as the
385 // contents of args is undefined after the call to vsnprintf
386 // according to http://man.cx/snprintf(3)
387 //
388 // Not calling va_end/va_start here happens to work on lots of
389 // systems, but fails e.g. on 64bit Linux.
390 va_end(args);
391 va_start(args, format);
392 #endif
393
394 if (result == 0)
395 return String("");
396 if (result < 0)
397 return String();
398 unsigned len = result;
399 buffer.grow(len + 1);
400
401 // Now do the formatting again, guaranteed to fit.
402 vsnprintf(buffer.data(), buffer.size(), format, args);
403
404 va_end(args);
405
406 return StringImpl::create(buffer.data(), len);
407 #endif
408 }
409
number(short n)410 String String::number(short n)
411 {
412 return String::format("%hd", n);
413 }
414
number(unsigned short n)415 String String::number(unsigned short n)
416 {
417 return String::format("%hu", n);
418 }
419
number(int n)420 String String::number(int n)
421 {
422 return String::format("%d", n);
423 }
424
number(unsigned n)425 String String::number(unsigned n)
426 {
427 return String::format("%u", n);
428 }
429
number(long n)430 String String::number(long n)
431 {
432 return String::format("%ld", n);
433 }
434
number(unsigned long n)435 String String::number(unsigned long n)
436 {
437 return String::format("%lu", n);
438 }
439
number(long long n)440 String String::number(long long n)
441 {
442 #if OS(WINDOWS) && !PLATFORM(QT)
443 return String::format("%I64i", n);
444 #else
445 return String::format("%lli", n);
446 #endif
447 }
448
number(unsigned long long n)449 String String::number(unsigned long long n)
450 {
451 #if OS(WINDOWS) && !PLATFORM(QT)
452 return String::format("%I64u", n);
453 #else
454 return String::format("%llu", n);
455 #endif
456 }
457
number(double n)458 String String::number(double n)
459 {
460 return String::format("%.6lg", n);
461 }
462
toIntStrict(bool * ok,int base) const463 int String::toIntStrict(bool* ok, int base) const
464 {
465 if (!m_impl) {
466 if (ok)
467 *ok = false;
468 return 0;
469 }
470 return m_impl->toIntStrict(ok, base);
471 }
472
toUIntStrict(bool * ok,int base) const473 unsigned String::toUIntStrict(bool* ok, int base) const
474 {
475 if (!m_impl) {
476 if (ok)
477 *ok = false;
478 return 0;
479 }
480 return m_impl->toUIntStrict(ok, base);
481 }
482
toInt64Strict(bool * ok,int base) const483 int64_t String::toInt64Strict(bool* ok, int base) const
484 {
485 if (!m_impl) {
486 if (ok)
487 *ok = false;
488 return 0;
489 }
490 return m_impl->toInt64Strict(ok, base);
491 }
492
toUInt64Strict(bool * ok,int base) const493 uint64_t String::toUInt64Strict(bool* ok, int base) const
494 {
495 if (!m_impl) {
496 if (ok)
497 *ok = false;
498 return 0;
499 }
500 return m_impl->toUInt64Strict(ok, base);
501 }
502
toIntPtrStrict(bool * ok,int base) const503 intptr_t String::toIntPtrStrict(bool* ok, int base) const
504 {
505 if (!m_impl) {
506 if (ok)
507 *ok = false;
508 return 0;
509 }
510 return m_impl->toIntPtrStrict(ok, base);
511 }
512
513
toInt(bool * ok) const514 int String::toInt(bool* ok) const
515 {
516 if (!m_impl) {
517 if (ok)
518 *ok = false;
519 return 0;
520 }
521 return m_impl->toInt(ok);
522 }
523
toUInt(bool * ok) const524 unsigned String::toUInt(bool* ok) const
525 {
526 if (!m_impl) {
527 if (ok)
528 *ok = false;
529 return 0;
530 }
531 return m_impl->toUInt(ok);
532 }
533
toInt64(bool * ok) const534 int64_t String::toInt64(bool* ok) const
535 {
536 if (!m_impl) {
537 if (ok)
538 *ok = false;
539 return 0;
540 }
541 return m_impl->toInt64(ok);
542 }
543
toUInt64(bool * ok) const544 uint64_t String::toUInt64(bool* ok) const
545 {
546 if (!m_impl) {
547 if (ok)
548 *ok = false;
549 return 0;
550 }
551 return m_impl->toUInt64(ok);
552 }
553
toIntPtr(bool * ok) const554 intptr_t String::toIntPtr(bool* ok) const
555 {
556 if (!m_impl) {
557 if (ok)
558 *ok = false;
559 return 0;
560 }
561 return m_impl->toIntPtr(ok);
562 }
563
toDouble(bool * ok,bool * didReadNumber) const564 double String::toDouble(bool* ok, bool* didReadNumber) const
565 {
566 if (!m_impl) {
567 if (ok)
568 *ok = false;
569 if (didReadNumber)
570 *didReadNumber = false;
571 return 0.0;
572 }
573 return m_impl->toDouble(ok, didReadNumber);
574 }
575
toFloat(bool * ok,bool * didReadNumber) const576 float String::toFloat(bool* ok, bool* didReadNumber) const
577 {
578 if (!m_impl) {
579 if (ok)
580 *ok = false;
581 if (didReadNumber)
582 *didReadNumber = false;
583 return 0.0f;
584 }
585 return m_impl->toFloat(ok, didReadNumber);
586 }
587
threadsafeCopy() const588 String String::threadsafeCopy() const
589 {
590 if (!m_impl)
591 return String();
592 return m_impl->threadsafeCopy();
593 }
594
crossThreadString() const595 String String::crossThreadString() const
596 {
597 if (!m_impl)
598 return String();
599 return m_impl->crossThreadString();
600 }
601
split(const String & separator,bool allowEmptyEntries,Vector<String> & result) const602 void String::split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const
603 {
604 result.clear();
605
606 unsigned startPos = 0;
607 size_t endPos;
608 while ((endPos = find(separator, startPos)) != notFound) {
609 if (allowEmptyEntries || startPos != endPos)
610 result.append(substring(startPos, endPos - startPos));
611 startPos = endPos + separator.length();
612 }
613 if (allowEmptyEntries || startPos != length())
614 result.append(substring(startPos));
615 }
616
split(const String & separator,Vector<String> & result) const617 void String::split(const String& separator, Vector<String>& result) const
618 {
619 split(separator, false, result);
620 }
621
split(UChar separator,bool allowEmptyEntries,Vector<String> & result) const622 void String::split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const
623 {
624 result.clear();
625
626 unsigned startPos = 0;
627 size_t endPos;
628 while ((endPos = find(separator, startPos)) != notFound) {
629 if (allowEmptyEntries || startPos != endPos)
630 result.append(substring(startPos, endPos - startPos));
631 startPos = endPos + 1;
632 }
633 if (allowEmptyEntries || startPos != length())
634 result.append(substring(startPos));
635 }
636
split(UChar separator,Vector<String> & result) const637 void String::split(UChar separator, Vector<String>& result) const
638 {
639 split(String(&separator, 1), false, result);
640 }
641
ascii() const642 CString String::ascii() const
643 {
644 // Printable ASCII characters 32..127 and the null character are
645 // preserved, characters outside of this range are converted to '?'.
646
647 unsigned length = this->length();
648 const UChar* characters = this->characters();
649
650 char* characterBuffer;
651 CString result = CString::newUninitialized(length, characterBuffer);
652
653 for (unsigned i = 0; i < length; ++i) {
654 UChar ch = characters[i];
655 characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
656 }
657
658 return result;
659 }
660
latin1() const661 CString String::latin1() const
662 {
663 // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
664 // preserved, characters outside of this range are converted to '?'.
665
666 unsigned length = this->length();
667 const UChar* characters = this->characters();
668
669 char* characterBuffer;
670 CString result = CString::newUninitialized(length, characterBuffer);
671
672 for (unsigned i = 0; i < length; ++i) {
673 UChar ch = characters[i];
674 characterBuffer[i] = ch > 0xff ? '?' : ch;
675 }
676
677 return result;
678 }
679
680 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
putUTF8Triple(char * & buffer,UChar ch)681 static inline void putUTF8Triple(char*& buffer, UChar ch)
682 {
683 ASSERT(ch >= 0x0800);
684 *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
685 *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
686 *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
687 }
688
utf8(bool strict) const689 CString String::utf8(bool strict) const
690 {
691 unsigned length = this->length();
692 const UChar* characters = this->characters();
693
694 // Allocate a buffer big enough to hold all the characters
695 // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
696 // Optimization ideas, if we find this function is hot:
697 // * We could speculatively create a CStringBuffer to contain 'length'
698 // characters, and resize if necessary (i.e. if the buffer contains
699 // non-ascii characters). (Alternatively, scan the buffer first for
700 // ascii characters, so we know this will be sufficient).
701 // * We could allocate a CStringBuffer with an appropriate size to
702 // have a good chance of being able to write the string into the
703 // buffer without reallocing (say, 1.5 x length).
704 if (length > numeric_limits<unsigned>::max() / 3)
705 return CString();
706 Vector<char, 1024> bufferVector(length * 3);
707
708 char* buffer = bufferVector.data();
709 ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
710 ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
711
712 // Only produced from strict conversion.
713 if (result == sourceIllegal)
714 return CString();
715
716 // Check for an unconverted high surrogate.
717 if (result == sourceExhausted) {
718 if (strict)
719 return CString();
720 // This should be one unpaired high surrogate. Treat it the same
721 // was as an unpaired high surrogate would have been handled in
722 // the middle of a string with non-strict conversion - which is
723 // to say, simply encode it to UTF-8.
724 ASSERT((characters + 1) == (this->characters() + length));
725 ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
726 // There should be room left, since one UChar hasn't been converted.
727 ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
728 putUTF8Triple(buffer, *characters);
729 }
730
731 return CString(bufferVector.data(), buffer - bufferVector.data());
732 }
733
fromUTF8(const char * stringStart,size_t length)734 String String::fromUTF8(const char* stringStart, size_t length)
735 {
736 if (length > numeric_limits<unsigned>::max())
737 CRASH();
738
739 if (!stringStart)
740 return String();
741
742 // We'll use a StringImpl as a buffer; if the source string only contains ascii this should be
743 // the right length, if there are any multi-byte sequences this buffer will be too large.
744 UChar* buffer;
745 String stringBuffer(StringImpl::createUninitialized(length, buffer));
746 UChar* bufferEnd = buffer + length;
747
748 // Try converting into the buffer.
749 const char* stringCurrent = stringStart;
750 if (convertUTF8ToUTF16(&stringCurrent, stringStart + length, &buffer, bufferEnd) != conversionOK)
751 return String();
752
753 // stringBuffer is full (the input must have been all ascii) so just return it!
754 if (buffer == bufferEnd)
755 return stringBuffer;
756
757 // stringBuffer served its purpose as a buffer, copy the contents out into a new string.
758 unsigned utf16Length = buffer - stringBuffer.characters();
759 ASSERT(utf16Length < length);
760 return String(stringBuffer.characters(), utf16Length);
761 }
762
fromUTF8(const char * string)763 String String::fromUTF8(const char* string)
764 {
765 if (!string)
766 return String();
767 return fromUTF8(string, strlen(string));
768 }
769
fromUTF8WithLatin1Fallback(const char * string,size_t size)770 String String::fromUTF8WithLatin1Fallback(const char* string, size_t size)
771 {
772 String utf8 = fromUTF8(string, size);
773 if (!utf8)
774 return String(string, size);
775 return utf8;
776 }
777
778 // String Operations
779
isCharacterAllowedInBase(UChar c,int base)780 static bool isCharacterAllowedInBase(UChar c, int base)
781 {
782 if (c > 0x7F)
783 return false;
784 if (isASCIIDigit(c))
785 return c - '0' < base;
786 if (isASCIIAlpha(c)) {
787 if (base > 36)
788 base = 36;
789 return (c >= 'a' && c < 'a' + base - 10)
790 || (c >= 'A' && c < 'A' + base - 10);
791 }
792 return false;
793 }
794
795 template <typename IntegralType>
toIntegralType(const UChar * data,size_t length,bool * ok,int base)796 static inline IntegralType toIntegralType(const UChar* data, size_t length, bool* ok, int base)
797 {
798 static const IntegralType integralMax = numeric_limits<IntegralType>::max();
799 static const bool isSigned = numeric_limits<IntegralType>::is_signed;
800 const IntegralType maxMultiplier = integralMax / base;
801
802 IntegralType value = 0;
803 bool isOk = false;
804 bool isNegative = false;
805
806 if (!data)
807 goto bye;
808
809 // skip leading whitespace
810 while (length && isSpaceOrNewline(*data)) {
811 length--;
812 data++;
813 }
814
815 if (isSigned && length && *data == '-') {
816 length--;
817 data++;
818 isNegative = true;
819 } else if (length && *data == '+') {
820 length--;
821 data++;
822 }
823
824 if (!length || !isCharacterAllowedInBase(*data, base))
825 goto bye;
826
827 while (length && isCharacterAllowedInBase(*data, base)) {
828 length--;
829 IntegralType digitValue;
830 UChar c = *data;
831 if (isASCIIDigit(c))
832 digitValue = c - '0';
833 else if (c >= 'a')
834 digitValue = c - 'a' + 10;
835 else
836 digitValue = c - 'A' + 10;
837
838 if (value > maxMultiplier || (value == maxMultiplier && digitValue > (integralMax % base) + isNegative))
839 goto bye;
840
841 value = base * value + digitValue;
842 data++;
843 }
844
845 #if COMPILER(MSVC)
846 #pragma warning(push, 0)
847 #pragma warning(disable:4146)
848 #endif
849
850 if (isNegative)
851 value = -value;
852
853 #if COMPILER(MSVC)
854 #pragma warning(pop)
855 #endif
856
857 // skip trailing space
858 while (length && isSpaceOrNewline(*data)) {
859 length--;
860 data++;
861 }
862
863 if (!length)
864 isOk = true;
865 bye:
866 if (ok)
867 *ok = isOk;
868 return isOk ? value : 0;
869 }
870
lengthOfCharactersAsInteger(const UChar * data,size_t length)871 static unsigned lengthOfCharactersAsInteger(const UChar* data, size_t length)
872 {
873 size_t i = 0;
874
875 // Allow leading spaces.
876 for (; i != length; ++i) {
877 if (!isSpaceOrNewline(data[i]))
878 break;
879 }
880
881 // Allow sign.
882 if (i != length && (data[i] == '+' || data[i] == '-'))
883 ++i;
884
885 // Allow digits.
886 for (; i != length; ++i) {
887 if (!isASCIIDigit(data[i]))
888 break;
889 }
890
891 return i;
892 }
893
charactersToIntStrict(const UChar * data,size_t length,bool * ok,int base)894 int charactersToIntStrict(const UChar* data, size_t length, bool* ok, int base)
895 {
896 return toIntegralType<int>(data, length, ok, base);
897 }
898
charactersToUIntStrict(const UChar * data,size_t length,bool * ok,int base)899 unsigned charactersToUIntStrict(const UChar* data, size_t length, bool* ok, int base)
900 {
901 return toIntegralType<unsigned>(data, length, ok, base);
902 }
903
charactersToInt64Strict(const UChar * data,size_t length,bool * ok,int base)904 int64_t charactersToInt64Strict(const UChar* data, size_t length, bool* ok, int base)
905 {
906 return toIntegralType<int64_t>(data, length, ok, base);
907 }
908
charactersToUInt64Strict(const UChar * data,size_t length,bool * ok,int base)909 uint64_t charactersToUInt64Strict(const UChar* data, size_t length, bool* ok, int base)
910 {
911 return toIntegralType<uint64_t>(data, length, ok, base);
912 }
913
charactersToIntPtrStrict(const UChar * data,size_t length,bool * ok,int base)914 intptr_t charactersToIntPtrStrict(const UChar* data, size_t length, bool* ok, int base)
915 {
916 return toIntegralType<intptr_t>(data, length, ok, base);
917 }
918
charactersToInt(const UChar * data,size_t length,bool * ok)919 int charactersToInt(const UChar* data, size_t length, bool* ok)
920 {
921 return toIntegralType<int>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
922 }
923
charactersToUInt(const UChar * data,size_t length,bool * ok)924 unsigned charactersToUInt(const UChar* data, size_t length, bool* ok)
925 {
926 return toIntegralType<unsigned>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
927 }
928
charactersToInt64(const UChar * data,size_t length,bool * ok)929 int64_t charactersToInt64(const UChar* data, size_t length, bool* ok)
930 {
931 return toIntegralType<int64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
932 }
933
charactersToUInt64(const UChar * data,size_t length,bool * ok)934 uint64_t charactersToUInt64(const UChar* data, size_t length, bool* ok)
935 {
936 return toIntegralType<uint64_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
937 }
938
charactersToIntPtr(const UChar * data,size_t length,bool * ok)939 intptr_t charactersToIntPtr(const UChar* data, size_t length, bool* ok)
940 {
941 return toIntegralType<intptr_t>(data, lengthOfCharactersAsInteger(data, length), ok, 10);
942 }
943
charactersToDouble(const UChar * data,size_t length,bool * ok,bool * didReadNumber)944 double charactersToDouble(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
945 {
946 if (!length) {
947 if (ok)
948 *ok = false;
949 if (didReadNumber)
950 *didReadNumber = false;
951 return 0.0;
952 }
953
954 Vector<char, 256> bytes(length + 1);
955 for (unsigned i = 0; i < length; ++i)
956 bytes[i] = data[i] < 0x7F ? data[i] : '?';
957 bytes[length] = '\0';
958 char* start = bytes.data();
959 char* end;
960 double val = WTF::strtod(start, &end);
961 if (ok)
962 *ok = (end == 0 || *end == '\0');
963 if (didReadNumber)
964 *didReadNumber = end - start;
965 return val;
966 }
967
charactersToFloat(const UChar * data,size_t length,bool * ok,bool * didReadNumber)968 float charactersToFloat(const UChar* data, size_t length, bool* ok, bool* didReadNumber)
969 {
970 // FIXME: This will return ok even when the string fits into a double but not a float.
971 return static_cast<float>(charactersToDouble(data, length, ok, didReadNumber));
972 }
973
974 } // namespace WTF
975
976 #ifndef NDEBUG
977 // For use in the debugger
978 String* string(const char*);
979 Vector<char> asciiDebug(StringImpl* impl);
980 Vector<char> asciiDebug(String& string);
981
string(const char * s)982 String* string(const char* s)
983 {
984 // leaks memory!
985 return new String(s);
986 }
987
asciiDebug(StringImpl * impl)988 Vector<char> asciiDebug(StringImpl* impl)
989 {
990 if (!impl)
991 return asciiDebug(String("[null]").impl());
992
993 Vector<char> buffer;
994 unsigned length = impl->length();
995 const UChar* characters = impl->characters();
996
997 buffer.resize(length + 1);
998 for (unsigned i = 0; i < length; ++i) {
999 UChar ch = characters[i];
1000 buffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
1001 }
1002 buffer[length] = '\0';
1003
1004 return buffer;
1005 }
1006
asciiDebug(String & string)1007 Vector<char> asciiDebug(String& string)
1008 {
1009 return asciiDebug(string.impl());
1010 }
1011
1012 #endif
1013