1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "StringImpl.h"
27
28 #include "AtomicString.h"
29 #include "CString.h"
30 #include "CharacterNames.h"
31 #include "FloatConversion.h"
32 #include "StringBuffer.h"
33 #include "StringHash.h"
34 #include "TextBreakIterator.h"
35 #include "TextEncoding.h"
36 #include "ThreadGlobalData.h"
37 #include <runtime/UString.h>
38 #include <wtf/dtoa.h>
39 #include <wtf/Assertions.h>
40 #include <wtf/Threading.h>
41 #include <wtf/unicode/Unicode.h>
42
43 using namespace WTF;
44 using namespace Unicode;
45 using namespace std;
46
47 namespace WebCore {
48
49 static const unsigned minLengthToShare = 20;
50
newUCharVector(unsigned n)51 static inline UChar* newUCharVector(unsigned n)
52 {
53 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n));
54 }
55
deleteUCharVector(const UChar * p)56 static inline void deleteUCharVector(const UChar* p)
57 {
58 fastFree(const_cast<UChar*>(p));
59 }
60
61 // Some of the factory methods create buffers using fastMalloc.
62 // We must ensure that all allocations of StringImpl are allocated using
63 // fastMalloc so that we don't have mis-matched frees. We accomplish
64 // this by overriding the new and delete operators.
operator new(size_t size,void * address)65 void* StringImpl::operator new(size_t size, void* address)
66 {
67 if (address)
68 return address; // Allocating using an internal buffer
69 return fastMalloc(size);
70 }
71
operator new(size_t size)72 void* StringImpl::operator new(size_t size)
73 {
74 return fastMalloc(size);
75 }
76
operator delete(void * address)77 void StringImpl::operator delete(void* address)
78 {
79 fastFree(address);
80 }
81
82 // This constructor is used only to create the empty string.
StringImpl()83 StringImpl::StringImpl()
84 : m_data(0)
85 , m_length(0)
86 , m_hash(0)
87 {
88 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
89 // with impunity. The empty string is special because it is never entered into
90 // AtomicString's HashKey, but still needs to compare correctly.
91 hash();
92 }
93
StringImpl(const UChar * characters,unsigned length)94 inline StringImpl::StringImpl(const UChar* characters, unsigned length)
95 : m_data(characters)
96 , m_length(length)
97 , m_hash(0)
98 {
99 ASSERT(characters);
100 ASSERT(length);
101 ASSERT(!bufferIsInternal());
102 }
103
StringImpl(unsigned length)104 inline StringImpl::StringImpl(unsigned length)
105 : m_data(reinterpret_cast<const UChar*>(this + 1))
106 , m_length(length)
107 , m_hash(0)
108 {
109 ASSERT(length);
110 ASSERT(bufferIsInternal());
111 }
112
~StringImpl()113 StringImpl::~StringImpl()
114 {
115 if (inTable())
116 AtomicString::remove(this);
117 if (!bufferIsInternal()) {
118 SharedUChar* sharedBuffer = m_sharedBufferAndFlags.get();
119 if (sharedBuffer)
120 sharedBuffer->deref();
121 else
122 deleteUCharVector(m_data);
123 }
124 }
125
empty()126 StringImpl* StringImpl::empty()
127 {
128 return threadGlobalData().emptyString();
129 }
130
containsOnlyWhitespace()131 bool StringImpl::containsOnlyWhitespace()
132 {
133 // FIXME: The definition of whitespace here includes a number of characters
134 // that are not whitespace from the point of view of RenderText; I wonder if
135 // that's a problem in practice.
136 for (unsigned i = 0; i < m_length; i++)
137 if (!isASCIISpace(m_data[i]))
138 return false;
139 return true;
140 }
141
substring(unsigned start,unsigned length)142 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
143 {
144 if (start >= m_length)
145 return empty();
146 unsigned maxLength = m_length - start;
147 if (length >= maxLength) {
148 if (!start)
149 return this;
150 length = maxLength;
151 }
152 return create(m_data + start, length);
153 }
154
characterStartingAt(unsigned i)155 UChar32 StringImpl::characterStartingAt(unsigned i)
156 {
157 if (U16_IS_SINGLE(m_data[i]))
158 return m_data[i];
159 if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
160 return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
161 return 0;
162 }
163
lower()164 PassRefPtr<StringImpl> StringImpl::lower()
165 {
166 // Note: This is a hot function in the Dromaeo benchmark, specifically the
167 // no-op code path up through the first 'return' statement.
168
169 // First scan the string for uppercase and non-ASCII characters:
170 UChar ored = 0;
171 bool noUpper = true;
172 const UChar *end = m_data + m_length;
173 for (const UChar* chp = m_data; chp != end; chp++) {
174 if (UNLIKELY(isASCIIUpper(*chp)))
175 noUpper = false;
176 ored |= *chp;
177 }
178
179 // Nothing to do if the string is all ASCII with no uppercase.
180 if (noUpper && !(ored & ~0x7F))
181 return this;
182
183 int32_t length = m_length;
184 UChar* data;
185 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
186
187 if (!(ored & ~0x7F)) {
188 // Do a faster loop for the case where all the characters are ASCII.
189 for (int i = 0; i < length; i++) {
190 UChar c = m_data[i];
191 data[i] = toASCIILower(c);
192 }
193 return newImpl;
194 }
195
196 // Do a slower implementation for cases that include non-ASCII characters.
197 bool error;
198 int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
199 if (!error && realLength == length)
200 return newImpl;
201 newImpl = createUninitialized(realLength, data);
202 Unicode::toLower(data, realLength, m_data, m_length, &error);
203 if (error)
204 return this;
205 return newImpl;
206 }
207
upper()208 PassRefPtr<StringImpl> StringImpl::upper()
209 {
210 // This function could be optimized for no-op cases the way lower() is,
211 // but in empirical testing, few actual calls to upper() are no-ops, so
212 // it wouldn't be worth the extra time for pre-scanning.
213 UChar* data;
214 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
215 int32_t length = m_length;
216
217 // Do a faster loop for the case where all the characters are ASCII.
218 UChar ored = 0;
219 for (int i = 0; i < length; i++) {
220 UChar c = m_data[i];
221 ored |= c;
222 data[i] = toASCIIUpper(c);
223 }
224 if (!(ored & ~0x7F))
225 return newImpl;
226
227 // Do a slower implementation for cases that include non-ASCII characters.
228 bool error;
229 int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
230 if (!error && realLength == length)
231 return newImpl;
232 newImpl = createUninitialized(realLength, data);
233 Unicode::toUpper(data, realLength, m_data, m_length, &error);
234 if (error)
235 return this;
236 return newImpl;
237 }
238
secure(UChar aChar)239 PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
240 {
241 UChar* data;
242 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
243 int32_t length = m_length;
244 for (int i = 0; i < length; ++i)
245 data[i] = aChar;
246 return newImpl;
247 }
248
foldCase()249 PassRefPtr<StringImpl> StringImpl::foldCase()
250 {
251 UChar* data;
252 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
253 int32_t length = m_length;
254
255 // Do a faster loop for the case where all the characters are ASCII.
256 UChar ored = 0;
257 for (int i = 0; i < length; i++) {
258 UChar c = m_data[i];
259 ored |= c;
260 data[i] = toASCIILower(c);
261 }
262 if (!(ored & ~0x7F))
263 return newImpl;
264
265 // Do a slower implementation for cases that include non-ASCII characters.
266 bool error;
267 int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
268 if (!error && realLength == length)
269 return newImpl;
270 newImpl = createUninitialized(realLength, data);
271 Unicode::foldCase(data, realLength, m_data, m_length, &error);
272 if (error)
273 return this;
274 return newImpl;
275 }
276
stripWhiteSpace()277 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
278 {
279 if (!m_length)
280 return empty();
281
282 unsigned start = 0;
283 unsigned end = m_length - 1;
284
285 // skip white space from start
286 while (start <= end && isSpaceOrNewline(m_data[start]))
287 start++;
288
289 // only white space
290 if (start > end)
291 return empty();
292
293 // skip white space from end
294 while (end && isSpaceOrNewline(m_data[end]))
295 end--;
296
297 if (!start && end == m_length - 1)
298 return this;
299 return create(m_data + start, end + 1 - start);
300 }
301
removeCharacters(CharacterMatchFunctionPtr findMatch)302 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
303 {
304 const UChar* from = m_data;
305 const UChar* fromend = from + m_length;
306
307 // Assume the common case will not remove any characters
308 while (from != fromend && !findMatch(*from))
309 from++;
310 if (from == fromend)
311 return this;
312
313 StringBuffer data(m_length);
314 UChar* to = data.characters();
315 unsigned outc = from - m_data;
316
317 if (outc)
318 memcpy(to, m_data, outc * sizeof(UChar));
319
320 while (true) {
321 while (from != fromend && findMatch(*from))
322 from++;
323 while (from != fromend && !findMatch(*from))
324 to[outc++] = *from++;
325 if (from == fromend)
326 break;
327 }
328
329 data.shrink(outc);
330
331 return adopt(data);
332 }
333
simplifyWhiteSpace()334 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
335 {
336 StringBuffer data(m_length);
337
338 const UChar* from = m_data;
339 const UChar* fromend = from + m_length;
340 int outc = 0;
341 bool changedToSpace = false;
342
343 UChar* to = data.characters();
344
345 while (true) {
346 while (from != fromend && isSpaceOrNewline(*from)) {
347 if (*from != ' ')
348 changedToSpace = true;
349 from++;
350 }
351 while (from != fromend && !isSpaceOrNewline(*from))
352 to[outc++] = *from++;
353 if (from != fromend)
354 to[outc++] = ' ';
355 else
356 break;
357 }
358
359 if (outc > 0 && to[outc - 1] == ' ')
360 outc--;
361
362 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
363 return this;
364
365 data.shrink(outc);
366
367 return adopt(data);
368 }
369
capitalize(UChar previous)370 PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
371 {
372 StringBuffer stringWithPrevious(m_length + 1);
373 stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous;
374 for (unsigned i = 1; i < m_length + 1; i++) {
375 // Replace   with a real space since ICU no longer treats   as a word separator.
376 if (m_data[i - 1] == noBreakSpace)
377 stringWithPrevious[i] = ' ';
378 else
379 stringWithPrevious[i] = m_data[i - 1];
380 }
381
382 TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1);
383 if (!boundary)
384 return this;
385
386 StringBuffer data(m_length);
387
388 int32_t endOfWord;
389 int32_t startOfWord = textBreakFirst(boundary);
390 for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) {
391 if (startOfWord != 0) // Ignore first char of previous string
392 data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]);
393 for (int i = startOfWord + 1; i < endOfWord; i++)
394 data[i - 1] = m_data[i - 1];
395 }
396
397 return adopt(data);
398 }
399
toIntStrict(bool * ok,int base)400 int StringImpl::toIntStrict(bool* ok, int base)
401 {
402 return charactersToIntStrict(m_data, m_length, ok, base);
403 }
404
toUIntStrict(bool * ok,int base)405 unsigned StringImpl::toUIntStrict(bool* ok, int base)
406 {
407 return charactersToUIntStrict(m_data, m_length, ok, base);
408 }
409
toInt64Strict(bool * ok,int base)410 int64_t StringImpl::toInt64Strict(bool* ok, int base)
411 {
412 return charactersToInt64Strict(m_data, m_length, ok, base);
413 }
414
toUInt64Strict(bool * ok,int base)415 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
416 {
417 return charactersToUInt64Strict(m_data, m_length, ok, base);
418 }
419
toIntPtrStrict(bool * ok,int base)420 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
421 {
422 return charactersToIntPtrStrict(m_data, m_length, ok, base);
423 }
424
toInt(bool * ok)425 int StringImpl::toInt(bool* ok)
426 {
427 return charactersToInt(m_data, m_length, ok);
428 }
429
toUInt(bool * ok)430 unsigned StringImpl::toUInt(bool* ok)
431 {
432 return charactersToUInt(m_data, m_length, ok);
433 }
434
toInt64(bool * ok)435 int64_t StringImpl::toInt64(bool* ok)
436 {
437 return charactersToInt64(m_data, m_length, ok);
438 }
439
toUInt64(bool * ok)440 uint64_t StringImpl::toUInt64(bool* ok)
441 {
442 return charactersToUInt64(m_data, m_length, ok);
443 }
444
toIntPtr(bool * ok)445 intptr_t StringImpl::toIntPtr(bool* ok)
446 {
447 return charactersToIntPtr(m_data, m_length, ok);
448 }
449
toDouble(bool * ok)450 double StringImpl::toDouble(bool* ok)
451 {
452 return charactersToDouble(m_data, m_length, ok);
453 }
454
toFloat(bool * ok)455 float StringImpl::toFloat(bool* ok)
456 {
457 return charactersToFloat(m_data, m_length, ok);
458 }
459
equal(const UChar * a,const char * b,int length)460 static bool equal(const UChar* a, const char* b, int length)
461 {
462 ASSERT(length >= 0);
463 while (length--) {
464 unsigned char bc = *b++;
465 if (*a++ != bc)
466 return false;
467 }
468 return true;
469 }
470
equalIgnoringCase(const UChar * a,const char * b,unsigned length)471 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
472 {
473 while (length--) {
474 unsigned char bc = *b++;
475 if (foldCase(*a++) != foldCase(bc))
476 return false;
477 }
478 return true;
479 }
480
equalIgnoringCase(const UChar * a,const UChar * b,int length)481 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
482 {
483 ASSERT(length >= 0);
484 return umemcasecmp(a, b, length) == 0;
485 }
486
find(const char * chs,int index,bool caseSensitive)487 int StringImpl::find(const char* chs, int index, bool caseSensitive)
488 {
489 if (!chs || index < 0)
490 return -1;
491
492 int chsLength = strlen(chs);
493 int n = m_length - index;
494 if (n < 0)
495 return -1;
496 n -= chsLength - 1;
497 if (n <= 0)
498 return -1;
499
500 const char* chsPlusOne = chs + 1;
501 int chsLengthMinusOne = chsLength - 1;
502
503 const UChar* ptr = m_data + index - 1;
504 if (caseSensitive) {
505 UChar c = *chs;
506 do {
507 if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
508 return m_length - chsLength - n + 1;
509 } while (--n);
510 } else {
511 UChar lc = Unicode::foldCase(*chs);
512 do {
513 if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
514 return m_length - chsLength - n + 1;
515 } while (--n);
516 }
517
518 return -1;
519 }
520
find(UChar c,int start)521 int StringImpl::find(UChar c, int start)
522 {
523 return WebCore::find(m_data, m_length, c, start);
524 }
525
find(CharacterMatchFunctionPtr matchFunction,int start)526 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
527 {
528 return WebCore::find(m_data, m_length, matchFunction, start);
529 }
530
find(StringImpl * str,int index,bool caseSensitive)531 int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
532 {
533 /*
534 We use a simple trick for efficiency's sake. Instead of
535 comparing strings, we compare the sum of str with that of
536 a part of this string. Only if that matches, we call memcmp
537 or ucstrnicmp.
538 */
539 ASSERT(str);
540 if (index < 0)
541 index += m_length;
542 int lstr = str->m_length;
543 int lthis = m_length - index;
544 if ((unsigned)lthis > m_length)
545 return -1;
546 int delta = lthis - lstr;
547 if (delta < 0)
548 return -1;
549
550 const UChar* uthis = m_data + index;
551 const UChar* ustr = str->m_data;
552 unsigned hthis = 0;
553 unsigned hstr = 0;
554 if (caseSensitive) {
555 for (int i = 0; i < lstr; i++) {
556 hthis += uthis[i];
557 hstr += ustr[i];
558 }
559 int i = 0;
560 while (1) {
561 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
562 return index + i;
563 if (i == delta)
564 return -1;
565 hthis += uthis[i + lstr];
566 hthis -= uthis[i];
567 i++;
568 }
569 } else {
570 for (int i = 0; i < lstr; i++ ) {
571 hthis += toASCIILower(uthis[i]);
572 hstr += toASCIILower(ustr[i]);
573 }
574 int i = 0;
575 while (1) {
576 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
577 return index + i;
578 if (i == delta)
579 return -1;
580 hthis += toASCIILower(uthis[i + lstr]);
581 hthis -= toASCIILower(uthis[i]);
582 i++;
583 }
584 }
585 }
586
reverseFind(UChar c,int index)587 int StringImpl::reverseFind(UChar c, int index)
588 {
589 return WebCore::reverseFind(m_data, m_length, c, index);
590 }
591
reverseFind(StringImpl * str,int index,bool caseSensitive)592 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
593 {
594 /*
595 See StringImpl::find() for explanations.
596 */
597 ASSERT(str);
598 int lthis = m_length;
599 if (index < 0)
600 index += lthis;
601
602 int lstr = str->m_length;
603 int delta = lthis - lstr;
604 if ( index < 0 || index > lthis || delta < 0 )
605 return -1;
606 if ( index > delta )
607 index = delta;
608
609 const UChar *uthis = m_data;
610 const UChar *ustr = str->m_data;
611 unsigned hthis = 0;
612 unsigned hstr = 0;
613 int i;
614 if (caseSensitive) {
615 for ( i = 0; i < lstr; i++ ) {
616 hthis += uthis[index + i];
617 hstr += ustr[i];
618 }
619 i = index;
620 while (1) {
621 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
622 return i;
623 if (i == 0)
624 return -1;
625 i--;
626 hthis -= uthis[i + lstr];
627 hthis += uthis[i];
628 }
629 } else {
630 for (i = 0; i < lstr; i++) {
631 hthis += toASCIILower(uthis[index + i]);
632 hstr += toASCIILower(ustr[i]);
633 }
634 i = index;
635 while (1) {
636 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
637 return i;
638 if (i == 0)
639 return -1;
640 i--;
641 hthis -= toASCIILower(uthis[i + lstr]);
642 hthis += toASCIILower(uthis[i]);
643 }
644 }
645
646 // Should never get here.
647 return -1;
648 }
649
endsWith(StringImpl * m_data,bool caseSensitive)650 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
651 {
652 ASSERT(m_data);
653 int start = m_length - m_data->m_length;
654 if (start >= 0)
655 return (find(m_data, start, caseSensitive) == start);
656 return false;
657 }
658
replace(UChar oldC,UChar newC)659 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
660 {
661 if (oldC == newC)
662 return this;
663 unsigned i;
664 for (i = 0; i != m_length; ++i)
665 if (m_data[i] == oldC)
666 break;
667 if (i == m_length)
668 return this;
669
670 UChar* data;
671 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
672
673 for (i = 0; i != m_length; ++i) {
674 UChar ch = m_data[i];
675 if (ch == oldC)
676 ch = newC;
677 data[i] = ch;
678 }
679 return newImpl;
680 }
681
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)682 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
683 {
684 position = min(position, length());
685 lengthToReplace = min(lengthToReplace, length() - position);
686 unsigned lengthToInsert = str ? str->length() : 0;
687 if (!lengthToReplace && !lengthToInsert)
688 return this;
689 UChar* data;
690
691 if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
692 CRASH();
693
694 PassRefPtr<StringImpl> newImpl =
695 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
696 memcpy(data, characters(), position * sizeof(UChar));
697 if (str)
698 memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
699 memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
700 (length() - position - lengthToReplace) * sizeof(UChar));
701 return newImpl;
702 }
703
replace(UChar pattern,StringImpl * replacement)704 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
705 {
706 if (!replacement)
707 return this;
708
709 int repStrLength = replacement->length();
710 int srcSegmentStart = 0;
711 int matchCount = 0;
712
713 // Count the matches
714 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
715 ++matchCount;
716 ++srcSegmentStart;
717 }
718
719 // If we have 0 matches, we don't have to do any more work
720 if (!matchCount)
721 return this;
722
723 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
724 CRASH();
725
726 unsigned replaceSize = matchCount * repStrLength;
727 unsigned newSize = m_length - matchCount;
728 if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
729 CRASH();
730
731 newSize += replaceSize;
732
733 UChar* data;
734 PassRefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
735
736 // Construct the new data
737 int srcSegmentEnd;
738 int srcSegmentLength;
739 srcSegmentStart = 0;
740 int dstOffset = 0;
741
742 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
743 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
744 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
745 dstOffset += srcSegmentLength;
746 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
747 dstOffset += repStrLength;
748 srcSegmentStart = srcSegmentEnd + 1;
749 }
750
751 srcSegmentLength = m_length - srcSegmentStart;
752 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
753
754 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
755
756 return newImpl;
757 }
758
replace(StringImpl * pattern,StringImpl * replacement)759 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
760 {
761 if (!pattern || !replacement)
762 return this;
763
764 int patternLength = pattern->length();
765 if (!patternLength)
766 return this;
767
768 int repStrLength = replacement->length();
769 int srcSegmentStart = 0;
770 int matchCount = 0;
771
772 // Count the matches
773 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
774 ++matchCount;
775 srcSegmentStart += patternLength;
776 }
777
778 // If we have 0 matches, we don't have to do any more work
779 if (!matchCount)
780 return this;
781
782 UChar* data;
783
784 unsigned newSize = m_length - matchCount * patternLength;
785 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
786 CRASH();
787
788 if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
789 CRASH();
790
791 newSize += matchCount * repStrLength;
792
793 PassRefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
794
795 // Construct the new data
796 int srcSegmentEnd;
797 int srcSegmentLength;
798 srcSegmentStart = 0;
799 int dstOffset = 0;
800
801 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
802 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
803 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
804 dstOffset += srcSegmentLength;
805 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
806 dstOffset += repStrLength;
807 srcSegmentStart = srcSegmentEnd + patternLength;
808 }
809
810 srcSegmentLength = m_length - srcSegmentStart;
811 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
812
813 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
814
815 return newImpl;
816 }
817
equal(StringImpl * a,StringImpl * b)818 bool equal(StringImpl* a, StringImpl* b)
819 {
820 return StringHash::equal(a, b);
821 }
822
equal(StringImpl * a,const char * b)823 bool equal(StringImpl* a, const char* b)
824 {
825 if (!a)
826 return !b;
827 if (!b)
828 return !a;
829
830 unsigned length = a->length();
831 const UChar* as = a->characters();
832 for (unsigned i = 0; i != length; ++i) {
833 unsigned char bc = b[i];
834 if (!bc)
835 return false;
836 if (as[i] != bc)
837 return false;
838 }
839
840 return !b[length];
841 }
842
equalIgnoringCase(StringImpl * a,StringImpl * b)843 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
844 {
845 return CaseFoldingHash::equal(a, b);
846 }
847
equalIgnoringCase(StringImpl * a,const char * b)848 bool equalIgnoringCase(StringImpl* a, const char* b)
849 {
850 if (!a)
851 return !b;
852 if (!b)
853 return !a;
854
855 unsigned length = a->length();
856 const UChar* as = a->characters();
857
858 // Do a faster loop for the case where all the characters are ASCII.
859 UChar ored = 0;
860 bool equal = true;
861 for (unsigned i = 0; i != length; ++i) {
862 char bc = b[i];
863 if (!bc)
864 return false;
865 UChar ac = as[i];
866 ored |= ac;
867 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
868 }
869
870 // Do a slower implementation for cases that include non-ASCII characters.
871 if (ored & ~0x7F) {
872 equal = true;
873 for (unsigned i = 0; i != length; ++i) {
874 unsigned char bc = b[i];
875 equal = equal && (foldCase(as[i]) == foldCase(bc));
876 }
877 }
878
879 return equal && !b[length];
880 }
881
equalIgnoringNullity(StringImpl * a,StringImpl * b)882 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
883 {
884 if (StringHash::equal(a, b))
885 return true;
886 if (!a && b && !b->length())
887 return true;
888 if (!b && a && !a->length())
889 return true;
890
891 return false;
892 }
893
ascii()894 Vector<char> StringImpl::ascii()
895 {
896 Vector<char> buffer(m_length + 1);
897 for (unsigned i = 0; i != m_length; ++i) {
898 UChar c = m_data[i];
899 if ((c >= 0x20 && c < 0x7F) || c == 0x00)
900 buffer[i] = c;
901 else
902 buffer[i] = '?';
903 }
904 buffer[m_length] = '\0';
905 return buffer;
906 }
907
defaultWritingDirection()908 WTF::Unicode::Direction StringImpl::defaultWritingDirection()
909 {
910 for (unsigned i = 0; i < m_length; ++i) {
911 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
912 if (charDirection == WTF::Unicode::LeftToRight)
913 return WTF::Unicode::LeftToRight;
914 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
915 return WTF::Unicode::RightToLeft;
916 }
917 return WTF::Unicode::LeftToRight;
918 }
919
920 // This is a hot function because it's used when parsing HTML.
createStrippingNullCharactersSlowCase(const UChar * characters,unsigned length)921 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
922 {
923 StringBuffer strippedCopy(length);
924 unsigned strippedLength = 0;
925 for (unsigned i = 0; i < length; i++) {
926 if (int c = characters[i])
927 strippedCopy[strippedLength++] = c;
928 }
929 ASSERT(strippedLength < length); // Only take the slow case when stripping.
930 strippedCopy.shrink(strippedLength);
931 return adopt(strippedCopy);
932 }
933
adopt(StringBuffer & buffer)934 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
935 {
936 unsigned length = buffer.length();
937 if (length == 0)
938 return empty();
939 return adoptRef(new StringImpl(buffer.release(), length));
940 }
941
adopt(Vector<UChar> & vector)942 PassRefPtr<StringImpl> StringImpl::adopt(Vector<UChar>& vector)
943 {
944 size_t size = vector.size();
945 if (size == 0)
946 return empty();
947 return adoptRef(new StringImpl(vector.releaseBuffer(), size));
948 }
949
createUninitialized(unsigned length,UChar * & data)950 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
951 {
952 if (!length) {
953 data = 0;
954 return empty();
955 }
956
957 // Allocate a single buffer large enough to contain the StringImpl
958 // struct as well as the data which it contains. This removes one
959 // heap allocation from this call.
960 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
961 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
962 data = reinterpret_cast<UChar*>(string + 1);
963 string = new (string) StringImpl(length);
964 return adoptRef(string);
965 }
966
create(const UChar * characters,unsigned length)967 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
968 {
969 if (!characters || !length)
970 return empty();
971
972 UChar* data;
973 PassRefPtr<StringImpl> string = createUninitialized(length, data);
974 memcpy(data, characters, length * sizeof(UChar));
975 return string;
976 }
977
create(const char * characters,unsigned length)978 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
979 {
980 if (!characters || !length)
981 return empty();
982
983 UChar* data;
984 PassRefPtr<StringImpl> string = createUninitialized(length, data);
985 for (unsigned i = 0; i != length; ++i) {
986 unsigned char c = characters[i];
987 data[i] = c;
988 }
989 return string;
990 }
991
create(const char * string)992 PassRefPtr<StringImpl> StringImpl::create(const char* string)
993 {
994 if (!string)
995 return empty();
996 return create(string, strlen(string));
997 }
998
999 #if USE(JSC)
create(const JSC::UString & str)1000 PassRefPtr<StringImpl> StringImpl::create(const JSC::UString& str)
1001 {
1002 SharedUChar* sharedBuffer = const_cast<JSC::UString*>(&str)->rep()->sharedBuffer();
1003 if (sharedBuffer) {
1004 PassRefPtr<StringImpl> impl = adoptRef(new StringImpl(str.data(), str.size()));
1005 sharedBuffer->ref();
1006 impl->m_sharedBufferAndFlags.set(sharedBuffer);
1007 return impl;
1008 }
1009 return StringImpl::create(str.data(), str.size());
1010 }
1011
ustring()1012 JSC::UString StringImpl::ustring()
1013 {
1014 SharedUChar* sharedBuffer = this->sharedBuffer();
1015 if (sharedBuffer)
1016 return JSC::UString::Rep::create(sharedBuffer, const_cast<UChar*>(m_data), m_length);
1017
1018 return JSC::UString(m_data, m_length);
1019 }
1020 #endif
1021
createWithTerminatingNullCharacter(const StringImpl & string)1022 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
1023 {
1024 // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
1025 // get allocated in a single malloc block.
1026 UChar* data;
1027 int length = string.m_length;
1028 RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
1029 memcpy(data, string.m_data, length * sizeof(UChar));
1030 data[length] = 0;
1031 terminatedString->m_length--;
1032 terminatedString->m_hash = string.m_hash;
1033 terminatedString->m_sharedBufferAndFlags.setFlag(HasTerminatingNullCharacter);
1034 return terminatedString.release();
1035 }
1036
threadsafeCopy() const1037 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
1038 {
1039 // Special-case empty strings to make sure that per-thread empty string instance isn't returned.
1040 if (m_length == 0)
1041 return adoptRef(new StringImpl);
1042 return create(m_data, m_length);
1043 }
1044
crossThreadString()1045 PassRefPtr<StringImpl> StringImpl::crossThreadString()
1046 {
1047 SharedUChar* shared = sharedBuffer();
1048 if (shared) {
1049 RefPtr<StringImpl> impl = adoptRef(new StringImpl(m_data, m_length));
1050 impl->m_sharedBufferAndFlags.set(shared->crossThreadCopy().releaseRef());
1051 return impl.release();
1052 }
1053
1054 // If no shared buffer is available, create a copy.
1055 return threadsafeCopy();
1056 }
1057
sharedBuffer()1058 StringImpl::SharedUChar* StringImpl::sharedBuffer()
1059 {
1060 if (m_length < minLengthToShare || bufferIsInternal())
1061 return 0;
1062
1063 if (!m_sharedBufferAndFlags.get())
1064 m_sharedBufferAndFlags.set(SharedUChar::create(new OwnFastMallocPtr<UChar>(const_cast<UChar*>(m_data))).releaseRef());
1065 return m_sharedBufferAndFlags.get();
1066 }
1067
1068
1069 } // namespace WebCore
1070