1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "StringImpl.h"
27
28 #include "AtomicString.h"
29 #include "CString.h"
30 #include "CharacterNames.h"
31 #include "FloatConversion.h"
32 #include "StringBuffer.h"
33 #include "StringHash.h"
34 #include "TextBreakIterator.h"
35 #include "TextEncoding.h"
36 #include "ThreadGlobalData.h"
37 #include <wtf/dtoa.h>
38 #include <wtf/Assertions.h>
39 #include <wtf/Threading.h>
40 #include <wtf/unicode/Unicode.h>
41
42 using namespace WTF;
43 using namespace Unicode;
44
45 namespace WebCore {
46
47 static const unsigned minLengthToShare = 20;
48
newUCharVector(unsigned n)49 static inline UChar* newUCharVector(unsigned n)
50 {
51 return static_cast<UChar*>(fastMalloc(sizeof(UChar) * n));
52 }
53
deleteUCharVector(const UChar * p)54 static inline void deleteUCharVector(const UChar* p)
55 {
56 fastFree(const_cast<UChar*>(p));
57 }
58
59 // Some of the factory methods create buffers using fastMalloc.
60 // We must ensure that ll allocations of StringImpl are allocated using
61 // fastMalloc so that we don't have mis-matched frees. We accomplish
62 // this by overriding the new and delete operators.
operator new(size_t size,void * address)63 void* StringImpl::operator new(size_t size, void* address)
64 {
65 if (address)
66 return address; // Allocating using an internal buffer
67 return fastMalloc(size);
68 }
69
operator new(size_t size)70 void* StringImpl::operator new(size_t size)
71 {
72 return fastMalloc(size);
73 }
74
operator delete(void * address)75 void StringImpl::operator delete(void* address)
76 {
77 fastFree(address);
78 }
79
80 // This constructor is used only to create the empty string.
StringImpl()81 StringImpl::StringImpl()
82 : m_length(0)
83 , m_data(0)
84 , m_hash(0)
85 , m_bufferIsInternal(false)
86 {
87 // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
88 // with impunity. The empty string is special because it is never entered into
89 // AtomicString's HashKey, but still needs to compare correctly.
90 hash();
91 }
92
93 // This is one of the most common constructors, but it's also used for the copy()
94 // operation. Because of that, it's the one constructor that doesn't assert the
95 // length is non-zero, since we support copying the empty string.
StringImpl(const UChar * characters,unsigned length)96 inline StringImpl::StringImpl(const UChar* characters, unsigned length)
97 : m_length(length)
98 , m_hash(0)
99 , m_bufferIsInternal(false)
100 {
101 UChar* data = newUCharVector(length);
102 memcpy(data, characters, length * sizeof(UChar));
103 m_data = data;
104 }
105
StringImpl(const StringImpl & str,WithTerminatingNullCharacter)106 inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter)
107 : m_length(str.m_length)
108 , m_hash(str.m_hash)
109 , m_bufferIsInternal(false)
110 {
111 m_sharedBufferAndFlags.setFlag(HasTerminatingNullCharacter);
112 UChar* data = newUCharVector(str.m_length + 1);
113 memcpy(data, str.m_data, str.m_length * sizeof(UChar));
114 data[str.m_length] = 0;
115 m_data = data;
116 }
117
StringImpl(const char * characters,unsigned length)118 inline StringImpl::StringImpl(const char* characters, unsigned length)
119 : m_length(length)
120 , m_hash(0)
121 , m_bufferIsInternal(false)
122 {
123 ASSERT(characters);
124 ASSERT(length);
125
126 UChar* data = newUCharVector(length);
127 for (unsigned i = 0; i != length; ++i) {
128 unsigned char c = characters[i];
129 data[i] = c;
130 }
131 m_data = data;
132 }
133
StringImpl(UChar * characters,unsigned length,AdoptBuffer)134 inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer)
135 : m_length(length)
136 , m_data(characters)
137 , m_hash(0)
138 , m_bufferIsInternal(false)
139 {
140 ASSERT(characters);
141 ASSERT(length);
142 }
143
144 // This constructor is only for use by AtomicString.
StringImpl(const UChar * characters,unsigned length,unsigned hash)145 StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash)
146 : m_length(length)
147 , m_hash(hash)
148 , m_bufferIsInternal(false)
149 {
150 ASSERT(hash);
151 ASSERT(characters);
152 ASSERT(length);
153
154 setInTable();
155 UChar* data = newUCharVector(length);
156 memcpy(data, characters, length * sizeof(UChar));
157 m_data = data;
158 }
159
160 // This constructor is only for use by AtomicString.
StringImpl(const char * characters,unsigned length,unsigned hash)161 StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash)
162 : m_length(length)
163 , m_hash(hash)
164 , m_bufferIsInternal(false)
165 {
166 ASSERT(hash);
167 ASSERT(characters);
168 ASSERT(length);
169
170 setInTable();
171 UChar* data = newUCharVector(length);
172 for (unsigned i = 0; i != length; ++i) {
173 unsigned char c = characters[i];
174 data[i] = c;
175 }
176 m_data = data;
177 }
178
~StringImpl()179 StringImpl::~StringImpl()
180 {
181 if (inTable())
182 AtomicString::remove(this);
183 if (!m_bufferIsInternal) {
184 SharedUChar* sharedBuffer = m_sharedBufferAndFlags.get();
185 if (sharedBuffer)
186 sharedBuffer->deref();
187 else
188 deleteUCharVector(m_data);
189 }
190 }
191
empty()192 StringImpl* StringImpl::empty()
193 {
194 return threadGlobalData().emptyString();
195 }
196
containsOnlyWhitespace()197 bool StringImpl::containsOnlyWhitespace()
198 {
199 // FIXME: The definition of whitespace here includes a number of characters
200 // that are not whitespace from the point of view of RenderText; I wonder if
201 // that's a problem in practice.
202 for (unsigned i = 0; i < m_length; i++)
203 if (!isASCIISpace(m_data[i]))
204 return false;
205 return true;
206 }
207
substring(unsigned start,unsigned length)208 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
209 {
210 if (start >= m_length)
211 return empty();
212 unsigned maxLength = m_length - start;
213 if (length >= maxLength) {
214 if (!start)
215 return this;
216 length = maxLength;
217 }
218 return create(m_data + start, length);
219 }
220
substringCopy(unsigned start,unsigned length)221 PassRefPtr<StringImpl> StringImpl::substringCopy(unsigned start, unsigned length)
222 {
223 start = min(start, m_length);
224 length = min(length, m_length - start);
225 if (!length)
226 return adoptRef(new StringImpl);
227 return create(m_data + start, length);
228 }
229
characterStartingAt(unsigned i)230 UChar32 StringImpl::characterStartingAt(unsigned i)
231 {
232 if (U16_IS_SINGLE(m_data[i]))
233 return m_data[i];
234 if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
235 return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
236 return 0;
237 }
238
isLower()239 bool StringImpl::isLower()
240 {
241 // Do a faster loop for the case where all the characters are ASCII.
242 bool allLower = true;
243 UChar ored = 0;
244 for (unsigned i = 0; i < m_length; i++) {
245 UChar c = m_data[i];
246 allLower = allLower && isASCIILower(c);
247 ored |= c;
248 }
249 if (!(ored & ~0x7F))
250 return allLower;
251
252 // Do a slower check for cases that include non-ASCII characters.
253 allLower = true;
254 unsigned i = 0;
255 while (i < m_length) {
256 UChar32 character;
257 U16_NEXT(m_data, i, m_length, character)
258 allLower = allLower && Unicode::isLower(character);
259 }
260 return allLower;
261 }
262
lower()263 PassRefPtr<StringImpl> StringImpl::lower()
264 {
265 UChar* data;
266 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
267 int32_t length = m_length;
268
269 // Do a faster loop for the case where all the characters are ASCII.
270 UChar ored = 0;
271 for (int i = 0; i < length; i++) {
272 UChar c = m_data[i];
273 ored |= c;
274 data[i] = toASCIILower(c);
275 }
276 if (!(ored & ~0x7F))
277 return newImpl;
278
279 // Do a slower implementation for cases that include non-ASCII characters.
280 bool error;
281 int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
282 if (!error && realLength == length)
283 return newImpl;
284 newImpl = createUninitialized(realLength, data);
285 Unicode::toLower(data, realLength, m_data, m_length, &error);
286 if (error)
287 return this;
288 return newImpl;
289 }
290
upper()291 PassRefPtr<StringImpl> StringImpl::upper()
292 {
293 UChar* data;
294 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
295 int32_t length = m_length;
296
297 // Do a faster loop for the case where all the characters are ASCII.
298 UChar ored = 0;
299 for (int i = 0; i < length; i++) {
300 UChar c = m_data[i];
301 ored |= c;
302 data[i] = toASCIIUpper(c);
303 }
304 if (!(ored & ~0x7F))
305 return newImpl;
306
307 // Do a slower implementation for cases that include non-ASCII characters.
308 bool error;
309 int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
310 if (!error && realLength == length)
311 return newImpl;
312 newImpl = createUninitialized(realLength, data);
313 Unicode::toUpper(data, realLength, m_data, m_length, &error);
314 if (error)
315 return this;
316 return newImpl;
317 }
318
secure(UChar aChar)319 PassRefPtr<StringImpl> StringImpl::secure(UChar aChar)
320 {
321 UChar* data;
322 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
323 int32_t length = m_length;
324 for (int i = 0; i < length; ++i)
325 data[i] = aChar;
326 return newImpl;
327 }
328
foldCase()329 PassRefPtr<StringImpl> StringImpl::foldCase()
330 {
331 UChar* data;
332 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
333 int32_t length = m_length;
334
335 // Do a faster loop for the case where all the characters are ASCII.
336 UChar ored = 0;
337 for (int i = 0; i < length; i++) {
338 UChar c = m_data[i];
339 ored |= c;
340 data[i] = toASCIILower(c);
341 }
342 if (!(ored & ~0x7F))
343 return newImpl;
344
345 // Do a slower implementation for cases that include non-ASCII characters.
346 bool error;
347 int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
348 if (!error && realLength == length)
349 return newImpl;
350 newImpl = createUninitialized(realLength, data);
351 Unicode::foldCase(data, realLength, m_data, m_length, &error);
352 if (error)
353 return this;
354 return newImpl;
355 }
356
stripWhiteSpace()357 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
358 {
359 if (!m_length)
360 return empty();
361
362 unsigned start = 0;
363 unsigned end = m_length - 1;
364
365 // skip white space from start
366 while (start <= end && isSpaceOrNewline(m_data[start]))
367 start++;
368
369 // only white space
370 if (start > end)
371 return empty();
372
373 // skip white space from end
374 while (end && isSpaceOrNewline(m_data[end]))
375 end--;
376
377 return create(m_data + start, end + 1 - start);
378 }
379
removeCharacters(CharacterMatchFunctionPtr findMatch)380 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
381 {
382 const UChar* from = m_data;
383 const UChar* fromend = from + m_length;
384
385 // Assume the common case will not remove any characters
386 while (from != fromend && !findMatch(*from))
387 from++;
388 if (from == fromend)
389 return this;
390
391 StringBuffer data(m_length);
392 UChar* to = data.characters();
393 unsigned outc = from - m_data;
394
395 if (outc)
396 memcpy(to, m_data, outc * sizeof(UChar));
397
398 while (true) {
399 while (from != fromend && findMatch(*from))
400 from++;
401 while (from != fromend && !findMatch(*from))
402 to[outc++] = *from++;
403 if (from == fromend)
404 break;
405 }
406
407 data.shrink(outc);
408
409 return adopt(data);
410 }
411
simplifyWhiteSpace()412 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
413 {
414 StringBuffer data(m_length);
415
416 const UChar* from = m_data;
417 const UChar* fromend = from + m_length;
418 int outc = 0;
419
420 UChar* to = data.characters();
421
422 while (true) {
423 while (from != fromend && isSpaceOrNewline(*from))
424 from++;
425 while (from != fromend && !isSpaceOrNewline(*from))
426 to[outc++] = *from++;
427 if (from != fromend)
428 to[outc++] = ' ';
429 else
430 break;
431 }
432
433 if (outc > 0 && to[outc - 1] == ' ')
434 outc--;
435
436 data.shrink(outc);
437
438 return adopt(data);
439 }
440
capitalize(UChar previous)441 PassRefPtr<StringImpl> StringImpl::capitalize(UChar previous)
442 {
443 StringBuffer stringWithPrevious(m_length + 1);
444 stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous;
445 for (unsigned i = 1; i < m_length + 1; i++) {
446 // Replace   with a real space since ICU no longer treats   as a word separator.
447 if (m_data[i - 1] == noBreakSpace)
448 stringWithPrevious[i] = ' ';
449 else
450 stringWithPrevious[i] = m_data[i - 1];
451 }
452
453 TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1);
454 if (!boundary)
455 return this;
456
457 StringBuffer data(m_length);
458
459 int32_t endOfWord;
460 int32_t startOfWord = textBreakFirst(boundary);
461 for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) {
462 if (startOfWord != 0) // Ignore first char of previous string
463 data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]);
464 for (int i = startOfWord + 1; i < endOfWord; i++)
465 data[i - 1] = m_data[i - 1];
466 }
467
468 return adopt(data);
469 }
470
toIntStrict(bool * ok,int base)471 int StringImpl::toIntStrict(bool* ok, int base)
472 {
473 return charactersToIntStrict(m_data, m_length, ok, base);
474 }
475
toUIntStrict(bool * ok,int base)476 unsigned StringImpl::toUIntStrict(bool* ok, int base)
477 {
478 return charactersToUIntStrict(m_data, m_length, ok, base);
479 }
480
toInt64Strict(bool * ok,int base)481 int64_t StringImpl::toInt64Strict(bool* ok, int base)
482 {
483 return charactersToInt64Strict(m_data, m_length, ok, base);
484 }
485
toUInt64Strict(bool * ok,int base)486 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
487 {
488 return charactersToUInt64Strict(m_data, m_length, ok, base);
489 }
490
toIntPtrStrict(bool * ok,int base)491 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
492 {
493 return charactersToIntPtrStrict(m_data, m_length, ok, base);
494 }
495
toInt(bool * ok)496 int StringImpl::toInt(bool* ok)
497 {
498 return charactersToInt(m_data, m_length, ok);
499 }
500
toUInt(bool * ok)501 unsigned StringImpl::toUInt(bool* ok)
502 {
503 return charactersToUInt(m_data, m_length, ok);
504 }
505
toInt64(bool * ok)506 int64_t StringImpl::toInt64(bool* ok)
507 {
508 return charactersToInt64(m_data, m_length, ok);
509 }
510
toUInt64(bool * ok)511 uint64_t StringImpl::toUInt64(bool* ok)
512 {
513 return charactersToUInt64(m_data, m_length, ok);
514 }
515
toIntPtr(bool * ok)516 intptr_t StringImpl::toIntPtr(bool* ok)
517 {
518 return charactersToIntPtr(m_data, m_length, ok);
519 }
520
toDouble(bool * ok)521 double StringImpl::toDouble(bool* ok)
522 {
523 return charactersToDouble(m_data, m_length, ok);
524 }
525
toFloat(bool * ok)526 float StringImpl::toFloat(bool* ok)
527 {
528 return charactersToFloat(m_data, m_length, ok);
529 }
530
equal(const UChar * a,const char * b,int length)531 static bool equal(const UChar* a, const char* b, int length)
532 {
533 ASSERT(length >= 0);
534 while (length--) {
535 unsigned char bc = *b++;
536 if (*a++ != bc)
537 return false;
538 }
539 return true;
540 }
541
equalIgnoringCase(const UChar * a,const char * b,unsigned length)542 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
543 {
544 while (length--) {
545 unsigned char bc = *b++;
546 if (foldCase(*a++) != foldCase(bc))
547 return false;
548 }
549 return true;
550 }
551
equalIgnoringCase(const UChar * a,const UChar * b,int length)552 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
553 {
554 ASSERT(length >= 0);
555 return umemcasecmp(a, b, length) == 0;
556 }
557
find(const char * chs,int index,bool caseSensitive)558 int StringImpl::find(const char* chs, int index, bool caseSensitive)
559 {
560 if (!chs || index < 0)
561 return -1;
562
563 int chsLength = strlen(chs);
564 int n = m_length - index;
565 if (n < 0)
566 return -1;
567 n -= chsLength - 1;
568 if (n <= 0)
569 return -1;
570
571 const char* chsPlusOne = chs + 1;
572 int chsLengthMinusOne = chsLength - 1;
573
574 const UChar* ptr = m_data + index - 1;
575 if (caseSensitive) {
576 UChar c = *chs;
577 do {
578 if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne))
579 return m_length - chsLength - n + 1;
580 } while (--n);
581 } else {
582 UChar lc = Unicode::foldCase(*chs);
583 do {
584 if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne))
585 return m_length - chsLength - n + 1;
586 } while (--n);
587 }
588
589 return -1;
590 }
591
find(UChar c,int start)592 int StringImpl::find(UChar c, int start)
593 {
594 return WebCore::find(m_data, m_length, c, start);
595 }
596
find(CharacterMatchFunctionPtr matchFunction,int start)597 int StringImpl::find(CharacterMatchFunctionPtr matchFunction, int start)
598 {
599 return WebCore::find(m_data, m_length, matchFunction, start);
600 }
601
find(StringImpl * str,int index,bool caseSensitive)602 int StringImpl::find(StringImpl* str, int index, bool caseSensitive)
603 {
604 /*
605 We use a simple trick for efficiency's sake. Instead of
606 comparing strings, we compare the sum of str with that of
607 a part of this string. Only if that matches, we call memcmp
608 or ucstrnicmp.
609 */
610 ASSERT(str);
611 if (index < 0)
612 index += m_length;
613 int lstr = str->m_length;
614 int lthis = m_length - index;
615 if ((unsigned)lthis > m_length)
616 return -1;
617 int delta = lthis - lstr;
618 if (delta < 0)
619 return -1;
620
621 const UChar* uthis = m_data + index;
622 const UChar* ustr = str->m_data;
623 unsigned hthis = 0;
624 unsigned hstr = 0;
625 if (caseSensitive) {
626 for (int i = 0; i < lstr; i++) {
627 hthis += uthis[i];
628 hstr += ustr[i];
629 }
630 int i = 0;
631 while (1) {
632 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
633 return index + i;
634 if (i == delta)
635 return -1;
636 hthis += uthis[i + lstr];
637 hthis -= uthis[i];
638 i++;
639 }
640 } else {
641 for (int i = 0; i < lstr; i++ ) {
642 hthis += toASCIILower(uthis[i]);
643 hstr += toASCIILower(ustr[i]);
644 }
645 int i = 0;
646 while (1) {
647 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr))
648 return index + i;
649 if (i == delta)
650 return -1;
651 hthis += toASCIILower(uthis[i + lstr]);
652 hthis -= toASCIILower(uthis[i]);
653 i++;
654 }
655 }
656 }
657
reverseFind(UChar c,int index)658 int StringImpl::reverseFind(UChar c, int index)
659 {
660 return WebCore::reverseFind(m_data, m_length, c, index);
661 }
662
reverseFind(StringImpl * str,int index,bool caseSensitive)663 int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive)
664 {
665 /*
666 See StringImpl::find() for explanations.
667 */
668 ASSERT(str);
669 int lthis = m_length;
670 if (index < 0)
671 index += lthis;
672
673 int lstr = str->m_length;
674 int delta = lthis - lstr;
675 if ( index < 0 || index > lthis || delta < 0 )
676 return -1;
677 if ( index > delta )
678 index = delta;
679
680 const UChar *uthis = m_data;
681 const UChar *ustr = str->m_data;
682 unsigned hthis = 0;
683 unsigned hstr = 0;
684 int i;
685 if (caseSensitive) {
686 for ( i = 0; i < lstr; i++ ) {
687 hthis += uthis[index + i];
688 hstr += ustr[i];
689 }
690 i = index;
691 while (1) {
692 if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0)
693 return i;
694 if (i == 0)
695 return -1;
696 i--;
697 hthis -= uthis[i + lstr];
698 hthis += uthis[i];
699 }
700 } else {
701 for (i = 0; i < lstr; i++) {
702 hthis += toASCIILower(uthis[index + i]);
703 hstr += toASCIILower(ustr[i]);
704 }
705 i = index;
706 while (1) {
707 if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) )
708 return i;
709 if (i == 0)
710 return -1;
711 i--;
712 hthis -= toASCIILower(uthis[i + lstr]);
713 hthis += toASCIILower(uthis[i]);
714 }
715 }
716
717 // Should never get here.
718 return -1;
719 }
720
endsWith(StringImpl * m_data,bool caseSensitive)721 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
722 {
723 ASSERT(m_data);
724 int start = m_length - m_data->m_length;
725 if (start >= 0)
726 return (find(m_data, start, caseSensitive) == start);
727 return false;
728 }
729
replace(UChar oldC,UChar newC)730 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
731 {
732 if (oldC == newC)
733 return this;
734 unsigned i;
735 for (i = 0; i != m_length; ++i)
736 if (m_data[i] == oldC)
737 break;
738 if (i == m_length)
739 return this;
740
741 UChar* data;
742 PassRefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
743
744 for (i = 0; i != m_length; ++i) {
745 UChar ch = m_data[i];
746 if (ch == oldC)
747 ch = newC;
748 data[i] = ch;
749 }
750 return newImpl;
751 }
752
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)753 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
754 {
755 position = min(position, length());
756 lengthToReplace = min(lengthToReplace, length() - position);
757 unsigned lengthToInsert = str ? str->length() : 0;
758 if (!lengthToReplace && !lengthToInsert)
759 return this;
760 UChar* data;
761 PassRefPtr<StringImpl> newImpl =
762 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
763 memcpy(data, characters(), position * sizeof(UChar));
764 if (str)
765 memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
766 memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
767 (length() - position - lengthToReplace) * sizeof(UChar));
768 return newImpl;
769 }
770
replace(UChar pattern,StringImpl * replacement)771 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
772 {
773 if (!replacement)
774 return this;
775
776 int repStrLength = replacement->length();
777 int srcSegmentStart = 0;
778 int matchCount = 0;
779
780 // Count the matches
781 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
782 ++matchCount;
783 ++srcSegmentStart;
784 }
785
786 // If we have 0 matches, we don't have to do any more work
787 if (!matchCount)
788 return this;
789
790 UChar* data;
791 PassRefPtr<StringImpl> newImpl =
792 createUninitialized(m_length - matchCount + (matchCount * repStrLength), data);
793
794 // Construct the new data
795 int srcSegmentEnd;
796 int srcSegmentLength;
797 srcSegmentStart = 0;
798 int dstOffset = 0;
799
800 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
801 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
802 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
803 dstOffset += srcSegmentLength;
804 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
805 dstOffset += repStrLength;
806 srcSegmentStart = srcSegmentEnd + 1;
807 }
808
809 srcSegmentLength = m_length - srcSegmentStart;
810 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
811
812 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
813
814 return newImpl;
815 }
816
replace(StringImpl * pattern,StringImpl * replacement)817 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
818 {
819 if (!pattern || !replacement)
820 return this;
821
822 int patternLength = pattern->length();
823 if (!patternLength)
824 return this;
825
826 int repStrLength = replacement->length();
827 int srcSegmentStart = 0;
828 int matchCount = 0;
829
830 // Count the matches
831 while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) {
832 ++matchCount;
833 srcSegmentStart += patternLength;
834 }
835
836 // If we have 0 matches, we don't have to do any more work
837 if (!matchCount)
838 return this;
839
840 UChar* data;
841 PassRefPtr<StringImpl> newImpl =
842 createUninitialized(m_length + matchCount * (repStrLength - patternLength), data);
843
844 // Construct the new data
845 int srcSegmentEnd;
846 int srcSegmentLength;
847 srcSegmentStart = 0;
848 int dstOffset = 0;
849
850 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) {
851 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
852 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
853 dstOffset += srcSegmentLength;
854 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
855 dstOffset += repStrLength;
856 srcSegmentStart = srcSegmentEnd + patternLength;
857 }
858
859 srcSegmentLength = m_length - srcSegmentStart;
860 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
861
862 ASSERT(dstOffset + srcSegmentLength == static_cast<int>(newImpl->length()));
863
864 return newImpl;
865 }
866
equal(StringImpl * a,StringImpl * b)867 bool equal(StringImpl* a, StringImpl* b)
868 {
869 return StringHash::equal(a, b);
870 }
871
equal(StringImpl * a,const char * b)872 bool equal(StringImpl* a, const char* b)
873 {
874 if (!a)
875 return !b;
876 if (!b)
877 return !a;
878
879 unsigned length = a->length();
880 const UChar* as = a->characters();
881 for (unsigned i = 0; i != length; ++i) {
882 unsigned char bc = b[i];
883 if (!bc)
884 return false;
885 if (as[i] != bc)
886 return false;
887 }
888
889 return !b[length];
890 }
891
equalIgnoringCase(StringImpl * a,StringImpl * b)892 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
893 {
894 return CaseFoldingHash::equal(a, b);
895 }
896
equalIgnoringCase(StringImpl * a,const char * b)897 bool equalIgnoringCase(StringImpl* a, const char* b)
898 {
899 if (!a)
900 return !b;
901 if (!b)
902 return !a;
903
904 unsigned length = a->length();
905 const UChar* as = a->characters();
906
907 // Do a faster loop for the case where all the characters are ASCII.
908 UChar ored = 0;
909 bool equal = true;
910 for (unsigned i = 0; i != length; ++i) {
911 char bc = b[i];
912 if (!bc)
913 return false;
914 UChar ac = as[i];
915 ored |= ac;
916 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
917 }
918
919 // Do a slower implementation for cases that include non-ASCII characters.
920 if (ored & ~0x7F) {
921 equal = true;
922 for (unsigned i = 0; i != length; ++i) {
923 unsigned char bc = b[i];
924 equal = equal && (foldCase(as[i]) == foldCase(bc));
925 }
926 }
927
928 return equal && !b[length];
929 }
930
equalIgnoringNullity(StringImpl * a,StringImpl * b)931 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
932 {
933 if (StringHash::equal(a, b))
934 return true;
935 if (!a && b && !b->length())
936 return true;
937 if (!b && a && !a->length())
938 return true;
939
940 return false;
941 }
942
ascii()943 Vector<char> StringImpl::ascii()
944 {
945 Vector<char> buffer(m_length + 1);
946 for (unsigned i = 0; i != m_length; ++i) {
947 UChar c = m_data[i];
948 if ((c >= 0x20 && c < 0x7F) || c == 0x00)
949 buffer[i] = c;
950 else
951 buffer[i] = '?';
952 }
953 buffer[m_length] = '\0';
954 return buffer;
955 }
956
defaultWritingDirection()957 WTF::Unicode::Direction StringImpl::defaultWritingDirection()
958 {
959 for (unsigned i = 0; i < m_length; ++i) {
960 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
961 if (charDirection == WTF::Unicode::LeftToRight)
962 return WTF::Unicode::LeftToRight;
963 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic)
964 return WTF::Unicode::RightToLeft;
965 }
966 return WTF::Unicode::LeftToRight;
967 }
968
969 // This is a hot function because it's used when parsing HTML.
createStrippingNullCharactersSlowCase(const UChar * characters,unsigned length)970 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
971 {
972 StringBuffer strippedCopy(length);
973 unsigned strippedLength = 0;
974 for (unsigned i = 0; i < length; i++) {
975 if (int c = characters[i])
976 strippedCopy[strippedLength++] = c;
977 }
978 ASSERT(strippedLength < length); // Only take the slow case when stripping.
979 strippedCopy.shrink(strippedLength);
980 return adopt(strippedCopy);
981 }
982
adopt(StringBuffer & buffer)983 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
984 {
985 unsigned length = buffer.length();
986 if (length == 0)
987 return empty();
988 return adoptRef(new StringImpl(buffer.release(), length, AdoptBuffer()));
989 }
990
adopt(Vector<UChar> & vector)991 PassRefPtr<StringImpl> StringImpl::adopt(Vector<UChar>& vector)
992 {
993 size_t size = vector.size();
994 if (size == 0)
995 return empty();
996 return adoptRef(new StringImpl(vector.releaseBuffer(), size, AdoptBuffer()));
997 }
998
createUninitialized(unsigned length,UChar * & data)999 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
1000 {
1001 if (!length) {
1002 data = 0;
1003 return empty();
1004 }
1005
1006 // Allocate a single buffer large enough to contain the StringImpl
1007 // struct as well as the data which it contains. This removes one
1008 // heap allocation from this call.
1009 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
1010 char* buffer = static_cast<char*>(fastMalloc(size));
1011 data = reinterpret_cast<UChar*>(buffer + sizeof(StringImpl));
1012 StringImpl* string = new (buffer) StringImpl(data, length, AdoptBuffer());
1013 string->m_bufferIsInternal = true;
1014 return adoptRef(string);
1015 }
1016
create(const UChar * characters,unsigned length)1017 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
1018 {
1019 if (!characters || !length)
1020 return empty();
1021
1022 UChar* data;
1023 PassRefPtr<StringImpl> string = createUninitialized(length, data);
1024 memcpy(data, characters, length * sizeof(UChar));
1025 return string;
1026 }
1027
create(const char * characters,unsigned length)1028 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
1029 {
1030 if (!characters || !length)
1031 return empty();
1032
1033 UChar* data;
1034 PassRefPtr<StringImpl> string = createUninitialized(length, data);
1035 for (unsigned i = 0; i != length; ++i) {
1036 unsigned char c = characters[i];
1037 data[i] = c;
1038 }
1039 return string;
1040 }
1041
create(const char * string)1042 PassRefPtr<StringImpl> StringImpl::create(const char* string)
1043 {
1044 if (!string)
1045 return empty();
1046 return create(string, strlen(string));
1047 }
1048
1049 #if USE(JSC)
create(const JSC::UString & str)1050 PassRefPtr<StringImpl> StringImpl::create(const JSC::UString& str)
1051 {
1052 SharedUChar* sharedBuffer = const_cast<JSC::UString*>(&str)->rep()->sharedBuffer();
1053 if (sharedBuffer) {
1054 PassRefPtr<StringImpl> impl = adoptRef(new StringImpl(const_cast<UChar*>(str.data()), str.size(), AdoptBuffer()));
1055 sharedBuffer->ref();
1056 impl->m_sharedBufferAndFlags.set(sharedBuffer);
1057 return impl;
1058 }
1059 return StringImpl::create(str.data(), str.size());
1060 }
1061
ustring()1062 JSC::UString StringImpl::ustring()
1063 {
1064 SharedUChar* sharedBuffer = this->sharedBuffer();
1065 if (sharedBuffer)
1066 return JSC::UString::Rep::create(const_cast<UChar*>(m_data), m_length, sharedBuffer);
1067
1068 return JSC::UString(m_data, m_length);
1069 }
1070 #endif
1071
createWithTerminatingNullCharacter(const StringImpl & string)1072 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
1073 {
1074 return adoptRef(new StringImpl(string, WithTerminatingNullCharacter()));
1075 }
1076
copy()1077 PassRefPtr<StringImpl> StringImpl::copy()
1078 {
1079 // Using the constructor directly to make sure that per-thread empty string instance isn't returned.
1080 return adoptRef(new StringImpl(m_data, m_length));
1081 }
1082
sharedBuffer()1083 StringImpl::SharedUChar* StringImpl::sharedBuffer()
1084 {
1085 if (m_length < minLengthToShare || m_bufferIsInternal)
1086 return 0;
1087
1088 if (!m_sharedBufferAndFlags.get())
1089 m_sharedBufferAndFlags.set(SharedUChar::create(new OwnFastMallocPtr<UChar>(const_cast<UChar*>(m_data))).releaseRef());
1090 return m_sharedBufferAndFlags.get();
1091 }
1092
1093
1094 } // namespace WebCore
1095