1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "wtf/text/StringImpl.h"
27
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/OwnPtr.h"
32 #include "wtf/PartitionAlloc.h"
33 #include "wtf/PassOwnPtr.h"
34 #include "wtf/StdLibExtras.h"
35 #include "wtf/WTF.h"
36 #include "wtf/text/AtomicString.h"
37 #include "wtf/text/StringBuffer.h"
38 #include "wtf/text/StringHash.h"
39 #include "wtf/unicode/CharacterNames.h"
40 #include <unicode/translit.h>
41 #include <unicode/unistr.h>
42
43 #ifdef STRING_STATS
44 #include "wtf/DataLog.h"
45 #include "wtf/HashMap.h"
46 #include "wtf/HashSet.h"
47 #include "wtf/ProcessID.h"
48 #include "wtf/RefCounted.h"
49 #include "wtf/ThreadingPrimitives.h"
50 #include <unistd.h>
51 #endif
52
53 using namespace std;
54
55 namespace WTF {
56
57 using namespace Unicode;
58
59 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
60
61 #ifdef STRING_STATS
62
statsMutex()63 static Mutex& statsMutex()
64 {
65 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
66 return mutex;
67 }
68
liveStrings()69 static HashSet<void*>& liveStrings()
70 {
71 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
72 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
73 return strings;
74 }
75
addStringForStats(StringImpl * string)76 void addStringForStats(StringImpl* string)
77 {
78 MutexLocker locker(statsMutex());
79 liveStrings().add(string);
80 }
81
removeStringForStats(StringImpl * string)82 void removeStringForStats(StringImpl* string)
83 {
84 MutexLocker locker(statsMutex());
85 liveStrings().remove(string);
86 }
87
fillWithSnippet(const StringImpl * string,Vector<char> & snippet)88 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
89 {
90 const unsigned kMaxSnippetLength = 64;
91 snippet.clear();
92
93 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
94 if (expectedLength == kMaxSnippetLength)
95 expectedLength += 3; // For the "...".
96 ++expectedLength; // For the terminating '\0'.
97 snippet.reserveCapacity(expectedLength);
98
99 size_t i;
100 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
101 UChar c = (*string)[i];
102 if (isASCIIPrintable(c))
103 snippet.append(c);
104 else
105 snippet.append('?');
106 }
107 if (i < string->length()) {
108 snippet.append('.');
109 snippet.append('.');
110 snippet.append('.');
111 }
112 snippet.append('\0');
113 }
114
isUnnecessarilyWide(const StringImpl * string)115 static bool isUnnecessarilyWide(const StringImpl* string)
116 {
117 if (string->is8Bit())
118 return false;
119 UChar c = 0;
120 for (unsigned i = 0; i < string->length(); ++i)
121 c |= (*string)[i] >> 8;
122 return !c;
123 }
124
125 class PerStringStats : public RefCounted<PerStringStats> {
126 public:
create()127 static PassRefPtr<PerStringStats> create()
128 {
129 return adoptRef(new PerStringStats);
130 }
131
add(const StringImpl * string)132 void add(const StringImpl* string)
133 {
134 ++m_numberOfCopies;
135 if (!m_length) {
136 m_length = string->length();
137 fillWithSnippet(string, m_snippet);
138 }
139 if (string->isAtomic())
140 ++m_numberOfAtomicCopies;
141 if (isUnnecessarilyWide(string))
142 m_unnecessarilyWide = true;
143 }
144
totalCharacters() const145 size_t totalCharacters() const
146 {
147 return m_numberOfCopies * m_length;
148 }
149
print()150 void print()
151 {
152 const char* status = "ok";
153 if (m_unnecessarilyWide)
154 status = "16";
155 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
156 }
157
158 bool m_unnecessarilyWide;
159 unsigned m_numberOfCopies;
160 unsigned m_length;
161 unsigned m_numberOfAtomicCopies;
162 Vector<char> m_snippet;
163
164 private:
PerStringStats()165 PerStringStats()
166 : m_unnecessarilyWide(false)
167 , m_numberOfCopies(0)
168 , m_length(0)
169 , m_numberOfAtomicCopies(0)
170 {
171 }
172 };
173
operator <(const RefPtr<PerStringStats> & a,const RefPtr<PerStringStats> & b)174 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
175 {
176 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
177 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
178 if (a->totalCharacters() != b->totalCharacters())
179 return a->totalCharacters() < b->totalCharacters();
180 if (a->m_numberOfCopies != b->m_numberOfCopies)
181 return a->m_numberOfCopies < b->m_numberOfCopies;
182 if (a->m_length != b->m_length)
183 return a->m_length < b->m_length;
184 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
185 }
186
printLiveStringStats(void *)187 static void printLiveStringStats(void*)
188 {
189 MutexLocker locker(statsMutex());
190 HashSet<void*>& strings = liveStrings();
191
192 HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
193 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
194 StringImpl* string = static_cast<StringImpl*>(*iter);
195 HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
196 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
197 value->add(string);
198 stats.set(string, value.release());
199 }
200
201 Vector<RefPtr<PerStringStats> > all;
202 for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
203 all.append(iter->value);
204
205 std::sort(all.begin(), all.end());
206 std::reverse(all.begin(), all.end());
207 for (size_t i = 0; i < 20 && i < all.size(); ++i)
208 all[i]->print();
209 }
210
211 StringStats StringImpl::m_stringStats;
212
213 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
214
removeString(StringImpl * string)215 void StringStats::removeString(StringImpl* string)
216 {
217 unsigned length = string->length();
218 --m_totalNumberStrings;
219
220 if (string->is8Bit()) {
221 --m_number8BitStrings;
222 m_total8BitData -= length;
223 } else {
224 --m_number16BitStrings;
225 m_total16BitData -= length;
226 }
227
228 if (!--s_stringRemovesTillPrintStats) {
229 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
230 printStats();
231 }
232 }
233
printStats()234 void StringStats::printStats()
235 {
236 dataLogF("String stats for process id %d:\n", getCurrentProcessID());
237
238 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
239 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
241 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
242
243 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
244 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
245 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
246
247 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
248 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
249 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
250 unsigned long long totalSavedBytes = m_total8BitData;
251 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
252 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
253
254 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
255 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
256 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
257
258 callOnMainThread(printLiveStringStats, 0);
259 }
260 #endif
261
operator new(size_t size)262 void* StringImpl::operator new(size_t size)
263 {
264 ASSERT(size == sizeof(StringImpl));
265 return partitionAllocGeneric(Partitions::getBufferPartition(), size);
266 }
267
operator delete(void * ptr)268 void StringImpl::operator delete(void* ptr)
269 {
270 partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
271 }
272
~StringImpl()273 inline StringImpl::~StringImpl()
274 {
275 ASSERT(!isStatic());
276
277 STRING_STATS_REMOVE_STRING(this);
278
279 if (isAtomic())
280 AtomicString::remove(this);
281 }
282
destroyIfNotStatic()283 void StringImpl::destroyIfNotStatic()
284 {
285 if (!isStatic())
286 delete this;
287 }
288
createUninitialized(unsigned length,LChar * & data)289 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
290 {
291 if (!length) {
292 data = 0;
293 return empty();
294 }
295
296 // Allocate a single buffer large enough to contain the StringImpl
297 // struct as well as the data which it contains. This removes one
298 // heap allocation from this call.
299 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
300
301 data = reinterpret_cast<LChar*>(string + 1);
302 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
303 }
304
createUninitialized(unsigned length,UChar * & data)305 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
306 {
307 if (!length) {
308 data = 0;
309 return empty();
310 }
311
312 // Allocate a single buffer large enough to contain the StringImpl
313 // struct as well as the data which it contains. This removes one
314 // heap allocation from this call.
315 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
316
317 data = reinterpret_cast<UChar*>(string + 1);
318 return adoptRef(new (string) StringImpl(length));
319 }
320
reallocate(PassRefPtr<StringImpl> originalString,unsigned length)321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
322 {
323 ASSERT(originalString->hasOneRef());
324
325 if (!length)
326 return empty();
327
328 bool is8Bit = originalString->is8Bit();
329 // Same as createUninitialized() except here we use realloc.
330 size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
331 originalString->~StringImpl();
332 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
333 if (is8Bit)
334 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
335 return adoptRef(new (string) StringImpl(length));
336 }
337
staticStrings()338 static StaticStringsTable& staticStrings()
339 {
340 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
341 return staticStrings;
342 }
343
344 #if ENABLE(ASSERT)
345 static bool s_allowCreationOfStaticStrings = true;
346 #endif
347
allStaticStrings()348 const StaticStringsTable& StringImpl::allStaticStrings()
349 {
350 return staticStrings();
351 }
352
freezeStaticStrings()353 void StringImpl::freezeStaticStrings()
354 {
355 ASSERT(isMainThread());
356
357 #if ENABLE(ASSERT)
358 s_allowCreationOfStaticStrings = false;
359 #endif
360 }
361
362 unsigned StringImpl::m_highestStaticStringLength = 0;
363
createStatic(const char * string,unsigned length,unsigned hash)364 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
365 {
366 ASSERT(s_allowCreationOfStaticStrings);
367 ASSERT(string);
368 ASSERT(length);
369
370 StaticStringsTable::const_iterator it = staticStrings().find(hash);
371 if (it != staticStrings().end()) {
372 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
373 return it->value;
374 }
375
376 // Allocate a single buffer large enough to contain the StringImpl
377 // struct as well as the data which it contains. This removes one
378 // heap allocation from this call.
379 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
380 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
381
382 WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
383 StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
384
385 LChar* data = reinterpret_cast<LChar*>(impl + 1);
386 impl = new (impl) StringImpl(length, hash, StaticString);
387 memcpy(data, string, length * sizeof(LChar));
388 #if ENABLE(ASSERT)
389 impl->assertHashIsCorrect();
390 #endif
391
392 ASSERT(isMainThread());
393 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
394 staticStrings().add(hash, impl);
395 WTF_ANNOTATE_BENIGN_RACE(impl,
396 "Benign race on the reference counter of a static string created by StringImpl::createStatic");
397
398 return impl;
399 }
400
create(const UChar * characters,unsigned length)401 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
402 {
403 if (!characters || !length)
404 return empty();
405
406 UChar* data;
407 RefPtr<StringImpl> string = createUninitialized(length, data);
408 memcpy(data, characters, length * sizeof(UChar));
409 return string.release();
410 }
411
create(const LChar * characters,unsigned length)412 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
413 {
414 if (!characters || !length)
415 return empty();
416
417 LChar* data;
418 RefPtr<StringImpl> string = createUninitialized(length, data);
419 memcpy(data, characters, length * sizeof(LChar));
420 return string.release();
421 }
422
create8BitIfPossible(const UChar * characters,unsigned length)423 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
424 {
425 if (!characters || !length)
426 return empty();
427
428 LChar* data;
429 RefPtr<StringImpl> string = createUninitialized(length, data);
430
431 for (size_t i = 0; i < length; ++i) {
432 if (characters[i] & 0xff00)
433 return create(characters, length);
434 data[i] = static_cast<LChar>(characters[i]);
435 }
436
437 return string.release();
438 }
439
create(const LChar * string)440 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
441 {
442 if (!string)
443 return empty();
444 size_t length = strlen(reinterpret_cast<const char*>(string));
445 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
446 return create(string, length);
447 }
448
containsOnlyWhitespace()449 bool StringImpl::containsOnlyWhitespace()
450 {
451 // FIXME: The definition of whitespace here includes a number of characters
452 // that are not whitespace from the point of view of RenderText; I wonder if
453 // that's a problem in practice.
454 if (is8Bit()) {
455 for (unsigned i = 0; i < m_length; ++i) {
456 UChar c = characters8()[i];
457 if (!isASCIISpace(c))
458 return false;
459 }
460
461 return true;
462 }
463
464 for (unsigned i = 0; i < m_length; ++i) {
465 UChar c = characters16()[i];
466 if (!isASCIISpace(c))
467 return false;
468 }
469 return true;
470 }
471
substring(unsigned start,unsigned length)472 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
473 {
474 if (start >= m_length)
475 return empty();
476 unsigned maxLength = m_length - start;
477 if (length >= maxLength) {
478 if (!start)
479 return this;
480 length = maxLength;
481 }
482 if (is8Bit())
483 return create(characters8() + start, length);
484
485 return create(characters16() + start, length);
486 }
487
characterStartingAt(unsigned i)488 UChar32 StringImpl::characterStartingAt(unsigned i)
489 {
490 if (is8Bit())
491 return characters8()[i];
492 if (U16_IS_SINGLE(characters16()[i]))
493 return characters16()[i];
494 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
495 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
496 return 0;
497 }
498
lower()499 PassRefPtr<StringImpl> StringImpl::lower()
500 {
501 // Note: This is a hot function in the Dromaeo benchmark, specifically the
502 // no-op code path up through the first 'return' statement.
503
504 // First scan the string for uppercase and non-ASCII characters:
505 bool noUpper = true;
506 UChar ored = 0;
507 if (is8Bit()) {
508 const LChar* end = characters8() + m_length;
509 for (const LChar* chp = characters8(); chp != end; ++chp) {
510 if (UNLIKELY(isASCIIUpper(*chp)))
511 noUpper = false;
512 ored |= *chp;
513 }
514 // Nothing to do if the string is all ASCII with no uppercase.
515 if (noUpper && !(ored & ~0x7F))
516 return this;
517
518 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
519 int32_t length = m_length;
520
521 LChar* data8;
522 RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
523
524 if (!(ored & ~0x7F)) {
525 for (int32_t i = 0; i < length; ++i)
526 data8[i] = toASCIILower(characters8()[i]);
527
528 return newImpl.release();
529 }
530
531 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
532 for (int32_t i = 0; i < length; ++i)
533 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
534
535 return newImpl.release();
536 }
537
538 const UChar* end = characters16() + m_length;
539 for (const UChar* chp = characters16(); chp != end; ++chp) {
540 if (UNLIKELY(isASCIIUpper(*chp)))
541 noUpper = false;
542 ored |= *chp;
543 }
544 // Nothing to do if the string is all ASCII with no uppercase.
545 if (noUpper && !(ored & ~0x7F))
546 return this;
547
548 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
549 int32_t length = m_length;
550
551 if (!(ored & ~0x7F)) {
552 UChar* data16;
553 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
554
555 for (int32_t i = 0; i < length; ++i) {
556 UChar c = characters16()[i];
557 data16[i] = toASCIILower(c);
558 }
559 return newImpl.release();
560 }
561
562 // Do a slower implementation for cases that include non-ASCII characters.
563 UChar* data16;
564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
565
566 bool error;
567 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
568 if (!error && realLength == length)
569 return newImpl.release();
570
571 newImpl = createUninitialized(realLength, data16);
572 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
573 if (error)
574 return this;
575 return newImpl.release();
576 }
577
upper()578 PassRefPtr<StringImpl> StringImpl::upper()
579 {
580 // This function could be optimized for no-op cases the way lower() is,
581 // but in empirical testing, few actual calls to upper() are no-ops, so
582 // it wouldn't be worth the extra time for pre-scanning.
583
584 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
585 int32_t length = m_length;
586
587 if (is8Bit()) {
588 LChar* data8;
589 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
590
591 // Do a faster loop for the case where all the characters are ASCII.
592 LChar ored = 0;
593 for (int i = 0; i < length; ++i) {
594 LChar c = characters8()[i];
595 ored |= c;
596 data8[i] = toASCIIUpper(c);
597 }
598 if (!(ored & ~0x7F))
599 return newImpl.release();
600
601 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
602 int numberSharpSCharacters = 0;
603
604 // There are two special cases.
605 // 1. latin-1 characters when converted to upper case are 16 bit characters.
606 // 2. Lower case sharp-S converts to "SS" (two characters)
607 for (int32_t i = 0; i < length; ++i) {
608 LChar c = characters8()[i];
609 if (UNLIKELY(c == smallLetterSharpS))
610 ++numberSharpSCharacters;
611 UChar upper = Unicode::toUpper(c);
612 if (UNLIKELY(upper > 0xff)) {
613 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
614 goto upconvert;
615 }
616 data8[i] = static_cast<LChar>(upper);
617 }
618
619 if (!numberSharpSCharacters)
620 return newImpl.release();
621
622 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
623 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
624
625 LChar* dest = data8;
626
627 for (int32_t i = 0; i < length; ++i) {
628 LChar c = characters8()[i];
629 if (c == smallLetterSharpS) {
630 *dest++ = 'S';
631 *dest++ = 'S';
632 } else
633 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
634 }
635
636 return newImpl.release();
637 }
638
639 upconvert:
640 RefPtr<StringImpl> upconverted = upconvertedString();
641 const UChar* source16 = upconverted->characters16();
642
643 UChar* data16;
644 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
645
646 // Do a faster loop for the case where all the characters are ASCII.
647 UChar ored = 0;
648 for (int i = 0; i < length; ++i) {
649 UChar c = source16[i];
650 ored |= c;
651 data16[i] = toASCIIUpper(c);
652 }
653 if (!(ored & ~0x7F))
654 return newImpl.release();
655
656 // Do a slower implementation for cases that include non-ASCII characters.
657 bool error;
658 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
659 if (!error && realLength == length)
660 return newImpl;
661 newImpl = createUninitialized(realLength, data16);
662 Unicode::toUpper(data16, realLength, source16, m_length, &error);
663 if (error)
664 return this;
665 return newImpl.release();
666 }
667
localeIdMatchesLang(const AtomicString & localeId,const char * lang)668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
669 {
670 if (equalIgnoringCase(localeId, lang))
671 return true;
672 static char localeIdPrefix[4];
673 static const char delimeter[4] = "-_@";
674
675 size_t langLength = strlen(lang);
676 RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
677 strncpy(localeIdPrefix, lang, langLength);
678 for (int i = 0; i < 3; ++i) {
679 localeIdPrefix[langLength] = delimeter[i];
680 // case-insensitive comparison
681 if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
682 return true;
683 }
684 return false;
685 }
686
687 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
688
caseConvert(const UChar * source16,size_t length,icuCaseConverter converter,const char * locale,StringImpl * originalString)689 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
690 {
691 UChar* data16;
692 int32_t targetLength = length;
693 RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
694 do {
695 UErrorCode status = U_ZERO_ERROR;
696 targetLength = converter(data16, targetLength, source16, length, locale, &status);
697 if (U_SUCCESS(status)) {
698 output->truncateAssumingIsolated(targetLength);
699 return output.release();
700 }
701 if (status != U_BUFFER_OVERFLOW_ERROR)
702 return originalString;
703 // Expand the buffer.
704 output = StringImpl::createUninitialized(targetLength, data16);
705 } while (true);
706 }
707
lower(const AtomicString & localeIdentifier)708 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
709 {
710 // Use the more-optimized code path most of the time.
711 // Only Turkic (tr and az) languages and Lithuanian requires
712 // locale-specific lowercasing rules. Even though CLDR has el-Lower,
713 // it's identical to the locale-agnostic lowercasing. Context-dependent
714 // handling of Greek capital sigma is built into the common lowercasing
715 // function in ICU.
716 const char* localeForConversion = 0;
717 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
718 localeForConversion = "tr";
719 else if (localeIdMatchesLang(localeIdentifier, "lt"))
720 localeForConversion = "lt";
721 else
722 return lower();
723
724 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
725 CRASH();
726 int length = m_length;
727
728 RefPtr<StringImpl> upconverted = upconvertedString();
729 const UChar* source16 = upconverted->characters16();
730 return caseConvert(source16, length, u_strToLower, localeForConversion, this);
731 }
732
upper(const AtomicString & localeIdentifier)733 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
734 {
735 // Use the more-optimized code path most of the time.
736 // Only Turkic (tr and az) languages and Greek require locale-specific
737 // lowercasing rules.
738 icu::UnicodeString transliteratorId;
739 const char* localeForConversion = 0;
740 if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
741 localeForConversion = "tr";
742 else if (localeIdMatchesLang(localeIdentifier, "el"))
743 transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
744 else if (localeIdMatchesLang(localeIdentifier, "lt"))
745 localeForConversion = "lt";
746 else
747 return upper();
748
749 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
750 CRASH();
751 int length = m_length;
752
753 RefPtr<StringImpl> upconverted = upconvertedString();
754 const UChar* source16 = upconverted->characters16();
755
756 if (localeForConversion)
757 return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
758
759 // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
760 UErrorCode status = U_ZERO_ERROR;
761 OwnPtr<icu::Transliterator> translit =
762 adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
763 if (U_FAILURE(status))
764 return upper();
765
766 // target will be copy-on-write.
767 icu::UnicodeString target(false, source16, length);
768 translit->transliterate(target);
769
770 return create(target.getBuffer(), target.length());
771 }
772
fill(UChar character)773 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
774 {
775 if (!(character & ~0x7F)) {
776 LChar* data;
777 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
778 for (unsigned i = 0; i < m_length; ++i)
779 data[i] = character;
780 return newImpl.release();
781 }
782 UChar* data;
783 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
784 for (unsigned i = 0; i < m_length; ++i)
785 data[i] = character;
786 return newImpl.release();
787 }
788
foldCase()789 PassRefPtr<StringImpl> StringImpl::foldCase()
790 {
791 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
792 int32_t length = m_length;
793
794 if (is8Bit()) {
795 // Do a faster loop for the case where all the characters are ASCII.
796 LChar* data;
797 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
798 LChar ored = 0;
799
800 for (int32_t i = 0; i < length; ++i) {
801 LChar c = characters8()[i];
802 data[i] = toASCIILower(c);
803 ored |= c;
804 }
805
806 if (!(ored & ~0x7F))
807 return newImpl.release();
808
809 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
810 for (int32_t i = 0; i < length; ++i)
811 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
812
813 return newImpl.release();
814 }
815
816 // Do a faster loop for the case where all the characters are ASCII.
817 UChar* data;
818 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
819 UChar ored = 0;
820 for (int32_t i = 0; i < length; ++i) {
821 UChar c = characters16()[i];
822 ored |= c;
823 data[i] = toASCIILower(c);
824 }
825 if (!(ored & ~0x7F))
826 return newImpl.release();
827
828 // Do a slower implementation for cases that include non-ASCII characters.
829 bool error;
830 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
831 if (!error && realLength == length)
832 return newImpl.release();
833 newImpl = createUninitialized(realLength, data);
834 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
835 if (error)
836 return this;
837 return newImpl.release();
838 }
839
840 template <class UCharPredicate>
stripMatchedCharacters(UCharPredicate predicate)841 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
842 {
843 if (!m_length)
844 return empty();
845
846 unsigned start = 0;
847 unsigned end = m_length - 1;
848
849 // skip white space from start
850 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
851 ++start;
852
853 // only white space
854 if (start > end)
855 return empty();
856
857 // skip white space from end
858 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
859 --end;
860
861 if (!start && end == m_length - 1)
862 return this;
863 if (is8Bit())
864 return create(characters8() + start, end + 1 - start);
865 return create(characters16() + start, end + 1 - start);
866 }
867
868 class UCharPredicate {
869 public:
UCharPredicate(CharacterMatchFunctionPtr function)870 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
871
operator ()(UChar ch) const872 inline bool operator()(UChar ch) const
873 {
874 return m_function(ch);
875 }
876
877 private:
878 const CharacterMatchFunctionPtr m_function;
879 };
880
881 class SpaceOrNewlinePredicate {
882 public:
operator ()(UChar ch) const883 inline bool operator()(UChar ch) const
884 {
885 return isSpaceOrNewline(ch);
886 }
887 };
888
stripWhiteSpace()889 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
890 {
891 return stripMatchedCharacters(SpaceOrNewlinePredicate());
892 }
893
stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)894 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
895 {
896 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
897 }
898
899 template <typename CharType>
removeCharacters(const CharType * characters,CharacterMatchFunctionPtr findMatch)900 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
901 {
902 const CharType* from = characters;
903 const CharType* fromend = from + m_length;
904
905 // Assume the common case will not remove any characters
906 while (from != fromend && !findMatch(*from))
907 ++from;
908 if (from == fromend)
909 return this;
910
911 StringBuffer<CharType> data(m_length);
912 CharType* to = data.characters();
913 unsigned outc = from - characters;
914
915 if (outc)
916 memcpy(to, characters, outc * sizeof(CharType));
917
918 while (true) {
919 while (from != fromend && findMatch(*from))
920 ++from;
921 while (from != fromend && !findMatch(*from))
922 to[outc++] = *from++;
923 if (from == fromend)
924 break;
925 }
926
927 data.shrink(outc);
928
929 return data.release();
930 }
931
removeCharacters(CharacterMatchFunctionPtr findMatch)932 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
933 {
934 if (is8Bit())
935 return removeCharacters(characters8(), findMatch);
936 return removeCharacters(characters16(), findMatch);
937 }
938
939 template <typename CharType, class UCharPredicate>
simplifyMatchedCharactersToSpace(UCharPredicate predicate,StripBehavior stripBehavior)940 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
941 {
942 StringBuffer<CharType> data(m_length);
943
944 const CharType* from = getCharacters<CharType>();
945 const CharType* fromend = from + m_length;
946 int outc = 0;
947 bool changedToSpace = false;
948
949 CharType* to = data.characters();
950
951 if (stripBehavior == StripExtraWhiteSpace) {
952 while (true) {
953 while (from != fromend && predicate(*from)) {
954 if (*from != ' ')
955 changedToSpace = true;
956 ++from;
957 }
958 while (from != fromend && !predicate(*from))
959 to[outc++] = *from++;
960 if (from != fromend)
961 to[outc++] = ' ';
962 else
963 break;
964 }
965
966 if (outc > 0 && to[outc - 1] == ' ')
967 --outc;
968 } else {
969 for (; from != fromend; ++from) {
970 if (predicate(*from)) {
971 if (*from != ' ')
972 changedToSpace = true;
973 to[outc++] = ' ';
974 } else {
975 to[outc++] = *from;
976 }
977 }
978 }
979
980 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
981 return this;
982
983 data.shrink(outc);
984
985 return data.release();
986 }
987
simplifyWhiteSpace(StripBehavior stripBehavior)988 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
989 {
990 if (is8Bit())
991 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
992 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
993 }
994
simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace,StripBehavior stripBehavior)995 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
996 {
997 if (is8Bit())
998 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
999 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
1000 }
1001
toIntStrict(bool * ok,int base)1002 int StringImpl::toIntStrict(bool* ok, int base)
1003 {
1004 if (is8Bit())
1005 return charactersToIntStrict(characters8(), m_length, ok, base);
1006 return charactersToIntStrict(characters16(), m_length, ok, base);
1007 }
1008
toUIntStrict(bool * ok,int base)1009 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1010 {
1011 if (is8Bit())
1012 return charactersToUIntStrict(characters8(), m_length, ok, base);
1013 return charactersToUIntStrict(characters16(), m_length, ok, base);
1014 }
1015
toInt64Strict(bool * ok,int base)1016 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1017 {
1018 if (is8Bit())
1019 return charactersToInt64Strict(characters8(), m_length, ok, base);
1020 return charactersToInt64Strict(characters16(), m_length, ok, base);
1021 }
1022
toUInt64Strict(bool * ok,int base)1023 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1024 {
1025 if (is8Bit())
1026 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1027 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1028 }
1029
toIntPtrStrict(bool * ok,int base)1030 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1031 {
1032 if (is8Bit())
1033 return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1034 return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1035 }
1036
toInt(bool * ok)1037 int StringImpl::toInt(bool* ok)
1038 {
1039 if (is8Bit())
1040 return charactersToInt(characters8(), m_length, ok);
1041 return charactersToInt(characters16(), m_length, ok);
1042 }
1043
toUInt(bool * ok)1044 unsigned StringImpl::toUInt(bool* ok)
1045 {
1046 if (is8Bit())
1047 return charactersToUInt(characters8(), m_length, ok);
1048 return charactersToUInt(characters16(), m_length, ok);
1049 }
1050
toInt64(bool * ok)1051 int64_t StringImpl::toInt64(bool* ok)
1052 {
1053 if (is8Bit())
1054 return charactersToInt64(characters8(), m_length, ok);
1055 return charactersToInt64(characters16(), m_length, ok);
1056 }
1057
toUInt64(bool * ok)1058 uint64_t StringImpl::toUInt64(bool* ok)
1059 {
1060 if (is8Bit())
1061 return charactersToUInt64(characters8(), m_length, ok);
1062 return charactersToUInt64(characters16(), m_length, ok);
1063 }
1064
toIntPtr(bool * ok)1065 intptr_t StringImpl::toIntPtr(bool* ok)
1066 {
1067 if (is8Bit())
1068 return charactersToIntPtr(characters8(), m_length, ok);
1069 return charactersToIntPtr(characters16(), m_length, ok);
1070 }
1071
toDouble(bool * ok)1072 double StringImpl::toDouble(bool* ok)
1073 {
1074 if (is8Bit())
1075 return charactersToDouble(characters8(), m_length, ok);
1076 return charactersToDouble(characters16(), m_length, ok);
1077 }
1078
toFloat(bool * ok)1079 float StringImpl::toFloat(bool* ok)
1080 {
1081 if (is8Bit())
1082 return charactersToFloat(characters8(), m_length, ok);
1083 return charactersToFloat(characters16(), m_length, ok);
1084 }
1085
equalIgnoringCase(const LChar * a,const LChar * b,unsigned length)1086 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1087 {
1088 while (length--) {
1089 LChar bc = *b++;
1090 if (foldCase(*a++) != foldCase(bc))
1091 return false;
1092 }
1093 return true;
1094 }
1095
equalIgnoringCase(const UChar * a,const LChar * b,unsigned length)1096 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1097 {
1098 while (length--) {
1099 LChar bc = *b++;
1100 if (foldCase(*a++) != foldCase(bc))
1101 return false;
1102 }
1103 return true;
1104 }
1105
find(CharacterMatchFunctionPtr matchFunction,unsigned start)1106 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1107 {
1108 if (is8Bit())
1109 return WTF::find(characters8(), m_length, matchFunction, start);
1110 return WTF::find(characters16(), m_length, matchFunction, start);
1111 }
1112
find(const LChar * matchString,unsigned index)1113 size_t StringImpl::find(const LChar* matchString, unsigned index)
1114 {
1115 // Check for null or empty string to match against
1116 if (!matchString)
1117 return kNotFound;
1118 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1119 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1120 unsigned matchLength = matchStringLength;
1121 if (!matchLength)
1122 return min(index, length());
1123
1124 // Optimization 1: fast case for strings of length 1.
1125 if (matchLength == 1)
1126 return WTF::find(characters16(), length(), *matchString, index);
1127
1128 // Check index & matchLength are in range.
1129 if (index > length())
1130 return kNotFound;
1131 unsigned searchLength = length() - index;
1132 if (matchLength > searchLength)
1133 return kNotFound;
1134 // delta is the number of additional times to test; delta == 0 means test only once.
1135 unsigned delta = searchLength - matchLength;
1136
1137 const UChar* searchCharacters = characters16() + index;
1138
1139 // Optimization 2: keep a running hash of the strings,
1140 // only call equal if the hashes match.
1141 unsigned searchHash = 0;
1142 unsigned matchHash = 0;
1143 for (unsigned i = 0; i < matchLength; ++i) {
1144 searchHash += searchCharacters[i];
1145 matchHash += matchString[i];
1146 }
1147
1148 unsigned i = 0;
1149 // keep looping until we match
1150 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1151 if (i == delta)
1152 return kNotFound;
1153 searchHash += searchCharacters[i + matchLength];
1154 searchHash -= searchCharacters[i];
1155 ++i;
1156 }
1157 return index + i;
1158 }
1159
1160 template<typename CharType>
findIgnoringCaseInternal(const CharType * searchCharacters,const LChar * matchString,unsigned index,unsigned searchLength,unsigned matchLength)1161 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1162 {
1163 // delta is the number of additional times to test; delta == 0 means test only once.
1164 unsigned delta = searchLength - matchLength;
1165
1166 unsigned i = 0;
1167 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1168 if (i == delta)
1169 return kNotFound;
1170 ++i;
1171 }
1172 return index + i;
1173 }
1174
findIgnoringCase(const LChar * matchString,unsigned index)1175 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1176 {
1177 // Check for null or empty string to match against
1178 if (!matchString)
1179 return kNotFound;
1180 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1181 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1182 unsigned matchLength = matchStringLength;
1183 if (!matchLength)
1184 return min(index, length());
1185
1186 // Check index & matchLength are in range.
1187 if (index > length())
1188 return kNotFound;
1189 unsigned searchLength = length() - index;
1190 if (matchLength > searchLength)
1191 return kNotFound;
1192
1193 if (is8Bit())
1194 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1195 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1196 }
1197
1198 template <typename SearchCharacterType, typename MatchCharacterType>
findInternal(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1199 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1200 {
1201 // Optimization: keep a running hash of the strings,
1202 // only call equal() if the hashes match.
1203
1204 // delta is the number of additional times to test; delta == 0 means test only once.
1205 unsigned delta = searchLength - matchLength;
1206
1207 unsigned searchHash = 0;
1208 unsigned matchHash = 0;
1209
1210 for (unsigned i = 0; i < matchLength; ++i) {
1211 searchHash += searchCharacters[i];
1212 matchHash += matchCharacters[i];
1213 }
1214
1215 unsigned i = 0;
1216 // keep looping until we match
1217 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1218 if (i == delta)
1219 return kNotFound;
1220 searchHash += searchCharacters[i + matchLength];
1221 searchHash -= searchCharacters[i];
1222 ++i;
1223 }
1224 return index + i;
1225 }
1226
find(StringImpl * matchString)1227 size_t StringImpl::find(StringImpl* matchString)
1228 {
1229 // Check for null string to match against
1230 if (UNLIKELY(!matchString))
1231 return kNotFound;
1232 unsigned matchLength = matchString->length();
1233
1234 // Optimization 1: fast case for strings of length 1.
1235 if (matchLength == 1) {
1236 if (is8Bit()) {
1237 if (matchString->is8Bit())
1238 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1239 return WTF::find(characters8(), length(), matchString->characters16()[0]);
1240 }
1241 if (matchString->is8Bit())
1242 return WTF::find(characters16(), length(), matchString->characters8()[0]);
1243 return WTF::find(characters16(), length(), matchString->characters16()[0]);
1244 }
1245
1246 // Check matchLength is in range.
1247 if (matchLength > length())
1248 return kNotFound;
1249
1250 // Check for empty string to match against
1251 if (UNLIKELY(!matchLength))
1252 return 0;
1253
1254 if (is8Bit()) {
1255 if (matchString->is8Bit())
1256 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1257 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1258 }
1259
1260 if (matchString->is8Bit())
1261 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1262
1263 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1264 }
1265
find(StringImpl * matchString,unsigned index)1266 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1267 {
1268 // Check for null or empty string to match against
1269 if (UNLIKELY(!matchString))
1270 return kNotFound;
1271
1272 unsigned matchLength = matchString->length();
1273
1274 // Optimization 1: fast case for strings of length 1.
1275 if (matchLength == 1) {
1276 if (is8Bit())
1277 return WTF::find(characters8(), length(), (*matchString)[0], index);
1278 return WTF::find(characters16(), length(), (*matchString)[0], index);
1279 }
1280
1281 if (UNLIKELY(!matchLength))
1282 return min(index, length());
1283
1284 // Check index & matchLength are in range.
1285 if (index > length())
1286 return kNotFound;
1287 unsigned searchLength = length() - index;
1288 if (matchLength > searchLength)
1289 return kNotFound;
1290
1291 if (is8Bit()) {
1292 if (matchString->is8Bit())
1293 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1294 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1295 }
1296
1297 if (matchString->is8Bit())
1298 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1299
1300 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1301 }
1302
1303 template <typename SearchCharacterType, typename MatchCharacterType>
findIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1304 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1305 {
1306 // delta is the number of additional times to test; delta == 0 means test only once.
1307 unsigned delta = searchLength - matchLength;
1308
1309 unsigned i = 0;
1310 // keep looping until we match
1311 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1312 if (i == delta)
1313 return kNotFound;
1314 ++i;
1315 }
1316 return index + i;
1317 }
1318
findIgnoringCase(StringImpl * matchString,unsigned index)1319 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1320 {
1321 // Check for null or empty string to match against
1322 if (!matchString)
1323 return kNotFound;
1324 unsigned matchLength = matchString->length();
1325 if (!matchLength)
1326 return min(index, length());
1327
1328 // Check index & matchLength are in range.
1329 if (index > length())
1330 return kNotFound;
1331 unsigned searchLength = length() - index;
1332 if (matchLength > searchLength)
1333 return kNotFound;
1334
1335 if (is8Bit()) {
1336 if (matchString->is8Bit())
1337 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1338 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1339 }
1340
1341 if (matchString->is8Bit())
1342 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1343
1344 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1345 }
1346
findNextLineStart(unsigned index)1347 size_t StringImpl::findNextLineStart(unsigned index)
1348 {
1349 if (is8Bit())
1350 return WTF::findNextLineStart(characters8(), m_length, index);
1351 return WTF::findNextLineStart(characters16(), m_length, index);
1352 }
1353
count(LChar c) const1354 size_t StringImpl::count(LChar c) const
1355 {
1356 int count = 0;
1357 if (is8Bit()) {
1358 for (size_t i = 0; i < m_length; ++i)
1359 count += characters8()[i] == c;
1360 } else {
1361 for (size_t i = 0; i < m_length; ++i)
1362 count += characters16()[i] == c;
1363 }
1364 return count;
1365 }
1366
reverseFind(UChar c,unsigned index)1367 size_t StringImpl::reverseFind(UChar c, unsigned index)
1368 {
1369 if (is8Bit())
1370 return WTF::reverseFind(characters8(), m_length, c, index);
1371 return WTF::reverseFind(characters16(), m_length, c, index);
1372 }
1373
1374 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1375 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1376 {
1377 // Optimization: keep a running hash of the strings,
1378 // only call equal if the hashes match.
1379
1380 // delta is the number of additional times to test; delta == 0 means test only once.
1381 unsigned delta = min(index, length - matchLength);
1382
1383 unsigned searchHash = 0;
1384 unsigned matchHash = 0;
1385 for (unsigned i = 0; i < matchLength; ++i) {
1386 searchHash += searchCharacters[delta + i];
1387 matchHash += matchCharacters[i];
1388 }
1389
1390 // keep looping until we match
1391 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1392 if (!delta)
1393 return kNotFound;
1394 --delta;
1395 searchHash -= searchCharacters[delta + matchLength];
1396 searchHash += searchCharacters[delta];
1397 }
1398 return delta;
1399 }
1400
reverseFind(StringImpl * matchString,unsigned index)1401 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1402 {
1403 // Check for null or empty string to match against
1404 if (!matchString)
1405 return kNotFound;
1406 unsigned matchLength = matchString->length();
1407 unsigned ourLength = length();
1408 if (!matchLength)
1409 return min(index, ourLength);
1410
1411 // Optimization 1: fast case for strings of length 1.
1412 if (matchLength == 1) {
1413 if (is8Bit())
1414 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1415 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1416 }
1417
1418 // Check index & matchLength are in range.
1419 if (matchLength > ourLength)
1420 return kNotFound;
1421
1422 if (is8Bit()) {
1423 if (matchString->is8Bit())
1424 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1425 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1426 }
1427
1428 if (matchString->is8Bit())
1429 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1430
1431 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1432 }
1433
1434 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1435 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1436 {
1437 // delta is the number of additional times to test; delta == 0 means test only once.
1438 unsigned delta = min(index, length - matchLength);
1439
1440 // keep looping until we match
1441 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1442 if (!delta)
1443 return kNotFound;
1444 --delta;
1445 }
1446 return delta;
1447 }
1448
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)1449 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1450 {
1451 // Check for null or empty string to match against
1452 if (!matchString)
1453 return kNotFound;
1454 unsigned matchLength = matchString->length();
1455 unsigned ourLength = length();
1456 if (!matchLength)
1457 return min(index, ourLength);
1458
1459 // Check index & matchLength are in range.
1460 if (matchLength > ourLength)
1461 return kNotFound;
1462
1463 if (is8Bit()) {
1464 if (matchString->is8Bit())
1465 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1466 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1467 }
1468
1469 if (matchString->is8Bit())
1470 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1471
1472 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1473 }
1474
equalInner(const StringImpl * stringImpl,unsigned startOffset,const char * matchString,unsigned matchLength,bool caseSensitive)1475 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1476 {
1477 ASSERT(stringImpl);
1478 ASSERT(matchLength <= stringImpl->length());
1479 ASSERT(startOffset + matchLength <= stringImpl->length());
1480
1481 if (caseSensitive) {
1482 if (stringImpl->is8Bit())
1483 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1484 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1485 }
1486 if (stringImpl->is8Bit())
1487 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1488 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1489 }
1490
startsWith(UChar character) const1491 bool StringImpl::startsWith(UChar character) const
1492 {
1493 return m_length && (*this)[0] == character;
1494 }
1495
startsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1496 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1497 {
1498 ASSERT(matchLength);
1499 if (matchLength > length())
1500 return false;
1501 return equalInner(this, 0, matchString, matchLength, caseSensitive);
1502 }
1503
endsWith(StringImpl * matchString,bool caseSensitive)1504 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1505 {
1506 ASSERT(matchString);
1507 if (m_length >= matchString->m_length) {
1508 unsigned start = m_length - matchString->m_length;
1509 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1510 }
1511 return false;
1512 }
1513
endsWith(UChar character) const1514 bool StringImpl::endsWith(UChar character) const
1515 {
1516 return m_length && (*this)[m_length - 1] == character;
1517 }
1518
endsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1519 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1520 {
1521 ASSERT(matchLength);
1522 if (matchLength > length())
1523 return false;
1524 unsigned startOffset = length() - matchLength;
1525 return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1526 }
1527
replace(UChar oldC,UChar newC)1528 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1529 {
1530 if (oldC == newC)
1531 return this;
1532
1533 if (find(oldC) == kNotFound)
1534 return this;
1535
1536 unsigned i;
1537 if (is8Bit()) {
1538 if (newC <= 0xff) {
1539 LChar* data;
1540 LChar oldChar = static_cast<LChar>(oldC);
1541 LChar newChar = static_cast<LChar>(newC);
1542
1543 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1544
1545 for (i = 0; i != m_length; ++i) {
1546 LChar ch = characters8()[i];
1547 if (ch == oldChar)
1548 ch = newChar;
1549 data[i] = ch;
1550 }
1551 return newImpl.release();
1552 }
1553
1554 // There is the possibility we need to up convert from 8 to 16 bit,
1555 // create a 16 bit string for the result.
1556 UChar* data;
1557 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1558
1559 for (i = 0; i != m_length; ++i) {
1560 UChar ch = characters8()[i];
1561 if (ch == oldC)
1562 ch = newC;
1563 data[i] = ch;
1564 }
1565
1566 return newImpl.release();
1567 }
1568
1569 UChar* data;
1570 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1571
1572 for (i = 0; i != m_length; ++i) {
1573 UChar ch = characters16()[i];
1574 if (ch == oldC)
1575 ch = newC;
1576 data[i] = ch;
1577 }
1578 return newImpl.release();
1579 }
1580
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)1581 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1582 {
1583 position = min(position, length());
1584 lengthToReplace = min(lengthToReplace, length() - position);
1585 unsigned lengthToInsert = str ? str->length() : 0;
1586 if (!lengthToReplace && !lengthToInsert)
1587 return this;
1588
1589 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1590
1591 if (is8Bit() && (!str || str->is8Bit())) {
1592 LChar* data;
1593 RefPtr<StringImpl> newImpl =
1594 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1595 memcpy(data, characters8(), position * sizeof(LChar));
1596 if (str)
1597 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1598 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1599 (length() - position - lengthToReplace) * sizeof(LChar));
1600 return newImpl.release();
1601 }
1602 UChar* data;
1603 RefPtr<StringImpl> newImpl =
1604 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1605 if (is8Bit())
1606 for (unsigned i = 0; i < position; ++i)
1607 data[i] = characters8()[i];
1608 else
1609 memcpy(data, characters16(), position * sizeof(UChar));
1610 if (str) {
1611 if (str->is8Bit())
1612 for (unsigned i = 0; i < lengthToInsert; ++i)
1613 data[i + position] = str->characters8()[i];
1614 else
1615 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1616 }
1617 if (is8Bit()) {
1618 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1619 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1620 } else {
1621 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1622 (length() - position - lengthToReplace) * sizeof(UChar));
1623 }
1624 return newImpl.release();
1625 }
1626
replace(UChar pattern,StringImpl * replacement)1627 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1628 {
1629 if (!replacement)
1630 return this;
1631
1632 if (replacement->is8Bit())
1633 return replace(pattern, replacement->characters8(), replacement->length());
1634
1635 return replace(pattern, replacement->characters16(), replacement->length());
1636 }
1637
replace(UChar pattern,const LChar * replacement,unsigned repStrLength)1638 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1639 {
1640 ASSERT(replacement);
1641
1642 size_t srcSegmentStart = 0;
1643 unsigned matchCount = 0;
1644
1645 // Count the matches.
1646 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1647 ++matchCount;
1648 ++srcSegmentStart;
1649 }
1650
1651 // If we have 0 matches then we don't have to do any more work.
1652 if (!matchCount)
1653 return this;
1654
1655 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1656
1657 unsigned replaceSize = matchCount * repStrLength;
1658 unsigned newSize = m_length - matchCount;
1659 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1660
1661 newSize += replaceSize;
1662
1663 // Construct the new data.
1664 size_t srcSegmentEnd;
1665 unsigned srcSegmentLength;
1666 srcSegmentStart = 0;
1667 unsigned dstOffset = 0;
1668
1669 if (is8Bit()) {
1670 LChar* data;
1671 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1672
1673 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1674 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1675 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1676 dstOffset += srcSegmentLength;
1677 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1678 dstOffset += repStrLength;
1679 srcSegmentStart = srcSegmentEnd + 1;
1680 }
1681
1682 srcSegmentLength = m_length - srcSegmentStart;
1683 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1684
1685 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1686
1687 return newImpl.release();
1688 }
1689
1690 UChar* data;
1691 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1692
1693 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1694 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1695 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1696
1697 dstOffset += srcSegmentLength;
1698 for (unsigned i = 0; i < repStrLength; ++i)
1699 data[i + dstOffset] = replacement[i];
1700
1701 dstOffset += repStrLength;
1702 srcSegmentStart = srcSegmentEnd + 1;
1703 }
1704
1705 srcSegmentLength = m_length - srcSegmentStart;
1706 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1707
1708 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1709
1710 return newImpl.release();
1711 }
1712
replace(UChar pattern,const UChar * replacement,unsigned repStrLength)1713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1714 {
1715 ASSERT(replacement);
1716
1717 size_t srcSegmentStart = 0;
1718 unsigned matchCount = 0;
1719
1720 // Count the matches.
1721 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1722 ++matchCount;
1723 ++srcSegmentStart;
1724 }
1725
1726 // If we have 0 matches then we don't have to do any more work.
1727 if (!matchCount)
1728 return this;
1729
1730 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1731
1732 unsigned replaceSize = matchCount * repStrLength;
1733 unsigned newSize = m_length - matchCount;
1734 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1735
1736 newSize += replaceSize;
1737
1738 // Construct the new data.
1739 size_t srcSegmentEnd;
1740 unsigned srcSegmentLength;
1741 srcSegmentStart = 0;
1742 unsigned dstOffset = 0;
1743
1744 if (is8Bit()) {
1745 UChar* data;
1746 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1747
1748 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1749 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1750 for (unsigned i = 0; i < srcSegmentLength; ++i)
1751 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1752
1753 dstOffset += srcSegmentLength;
1754 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1755
1756 dstOffset += repStrLength;
1757 srcSegmentStart = srcSegmentEnd + 1;
1758 }
1759
1760 srcSegmentLength = m_length - srcSegmentStart;
1761 for (unsigned i = 0; i < srcSegmentLength; ++i)
1762 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1763
1764 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1765
1766 return newImpl.release();
1767 }
1768
1769 UChar* data;
1770 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1771
1772 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1773 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1774 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1775
1776 dstOffset += srcSegmentLength;
1777 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1778
1779 dstOffset += repStrLength;
1780 srcSegmentStart = srcSegmentEnd + 1;
1781 }
1782
1783 srcSegmentLength = m_length - srcSegmentStart;
1784 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1785
1786 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1787
1788 return newImpl.release();
1789 }
1790
replace(StringImpl * pattern,StringImpl * replacement)1791 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1792 {
1793 if (!pattern || !replacement)
1794 return this;
1795
1796 unsigned patternLength = pattern->length();
1797 if (!patternLength)
1798 return this;
1799
1800 unsigned repStrLength = replacement->length();
1801 size_t srcSegmentStart = 0;
1802 unsigned matchCount = 0;
1803
1804 // Count the matches.
1805 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1806 ++matchCount;
1807 srcSegmentStart += patternLength;
1808 }
1809
1810 // If we have 0 matches, we don't have to do any more work
1811 if (!matchCount)
1812 return this;
1813
1814 unsigned newSize = m_length - matchCount * patternLength;
1815 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1816
1817 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1818
1819 newSize += matchCount * repStrLength;
1820
1821
1822 // Construct the new data
1823 size_t srcSegmentEnd;
1824 unsigned srcSegmentLength;
1825 srcSegmentStart = 0;
1826 unsigned dstOffset = 0;
1827 bool srcIs8Bit = is8Bit();
1828 bool replacementIs8Bit = replacement->is8Bit();
1829
1830 // There are 4 cases:
1831 // 1. This and replacement are both 8 bit.
1832 // 2. This and replacement are both 16 bit.
1833 // 3. This is 8 bit and replacement is 16 bit.
1834 // 4. This is 16 bit and replacement is 8 bit.
1835 if (srcIs8Bit && replacementIs8Bit) {
1836 // Case 1
1837 LChar* data;
1838 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1839 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1840 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1841 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1842 dstOffset += srcSegmentLength;
1843 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1844 dstOffset += repStrLength;
1845 srcSegmentStart = srcSegmentEnd + patternLength;
1846 }
1847
1848 srcSegmentLength = m_length - srcSegmentStart;
1849 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1850
1851 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1852
1853 return newImpl.release();
1854 }
1855
1856 UChar* data;
1857 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1858 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1860 if (srcIs8Bit) {
1861 // Case 3.
1862 for (unsigned i = 0; i < srcSegmentLength; ++i)
1863 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1864 } else {
1865 // Case 2 & 4.
1866 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1867 }
1868 dstOffset += srcSegmentLength;
1869 if (replacementIs8Bit) {
1870 // Cases 2 & 3.
1871 for (unsigned i = 0; i < repStrLength; ++i)
1872 data[i + dstOffset] = replacement->characters8()[i];
1873 } else {
1874 // Case 4
1875 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1876 }
1877 dstOffset += repStrLength;
1878 srcSegmentStart = srcSegmentEnd + patternLength;
1879 }
1880
1881 srcSegmentLength = m_length - srcSegmentStart;
1882 if (srcIs8Bit) {
1883 // Case 3.
1884 for (unsigned i = 0; i < srcSegmentLength; ++i)
1885 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1886 } else {
1887 // Cases 2 & 4.
1888 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1889 }
1890
1891 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1892
1893 return newImpl.release();
1894 }
1895
upconvertedString()1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1897 {
1898 if (is8Bit())
1899 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1900 return this;
1901 }
1902
stringImplContentEqual(const StringImpl * a,const StringImpl * b)1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1904 {
1905 unsigned aLength = a->length();
1906 unsigned bLength = b->length();
1907 if (aLength != bLength)
1908 return false;
1909
1910 if (a->is8Bit()) {
1911 if (b->is8Bit())
1912 return equal(a->characters8(), b->characters8(), aLength);
1913
1914 return equal(a->characters8(), b->characters16(), aLength);
1915 }
1916
1917 if (b->is8Bit())
1918 return equal(a->characters16(), b->characters8(), aLength);
1919
1920 return equal(a->characters16(), b->characters16(), aLength);
1921 }
1922
equal(const StringImpl * a,const StringImpl * b)1923 bool equal(const StringImpl* a, const StringImpl* b)
1924 {
1925 if (a == b)
1926 return true;
1927 if (!a || !b)
1928 return false;
1929 if (a->isAtomic() && b->isAtomic())
1930 return false;
1931
1932 return stringImplContentEqual(a, b);
1933 }
1934
1935 template <typename CharType>
equalInternal(const StringImpl * a,const CharType * b,unsigned length)1936 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1937 {
1938 if (!a)
1939 return !b;
1940 if (!b)
1941 return false;
1942
1943 if (a->length() != length)
1944 return false;
1945 if (a->is8Bit())
1946 return equal(a->characters8(), b, length);
1947 return equal(a->characters16(), b, length);
1948 }
1949
equal(const StringImpl * a,const LChar * b,unsigned length)1950 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1951 {
1952 return equalInternal(a, b, length);
1953 }
1954
equal(const StringImpl * a,const UChar * b,unsigned length)1955 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1956 {
1957 return equalInternal(a, b, length);
1958 }
1959
equal(const StringImpl * a,const LChar * b)1960 bool equal(const StringImpl* a, const LChar* b)
1961 {
1962 if (!a)
1963 return !b;
1964 if (!b)
1965 return !a;
1966
1967 unsigned length = a->length();
1968
1969 if (a->is8Bit()) {
1970 const LChar* aPtr = a->characters8();
1971 for (unsigned i = 0; i != length; ++i) {
1972 LChar bc = b[i];
1973 LChar ac = aPtr[i];
1974 if (!bc)
1975 return false;
1976 if (ac != bc)
1977 return false;
1978 }
1979
1980 return !b[length];
1981 }
1982
1983 const UChar* aPtr = a->characters16();
1984 for (unsigned i = 0; i != length; ++i) {
1985 LChar bc = b[i];
1986 if (!bc)
1987 return false;
1988 if (aPtr[i] != bc)
1989 return false;
1990 }
1991
1992 return !b[length];
1993 }
1994
equalNonNull(const StringImpl * a,const StringImpl * b)1995 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1996 {
1997 ASSERT(a && b);
1998 if (a == b)
1999 return true;
2000
2001 return stringImplContentEqual(a, b);
2002 }
2003
equalIgnoringCase(const StringImpl * a,const StringImpl * b)2004 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2005 {
2006 if (a == b)
2007 return true;
2008 if (!a || !b)
2009 return false;
2010
2011 return CaseFoldingHash::equal(a, b);
2012 }
2013
equalIgnoringCase(const StringImpl * a,const LChar * b)2014 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2015 {
2016 if (!a)
2017 return !b;
2018 if (!b)
2019 return !a;
2020
2021 unsigned length = a->length();
2022
2023 // Do a faster loop for the case where all the characters are ASCII.
2024 UChar ored = 0;
2025 bool equal = true;
2026 if (a->is8Bit()) {
2027 const LChar* as = a->characters8();
2028 for (unsigned i = 0; i != length; ++i) {
2029 LChar bc = b[i];
2030 if (!bc)
2031 return false;
2032 UChar ac = as[i];
2033 ored |= ac;
2034 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2035 }
2036
2037 // Do a slower implementation for cases that include non-ASCII characters.
2038 if (ored & ~0x7F) {
2039 equal = true;
2040 for (unsigned i = 0; i != length; ++i)
2041 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2042 }
2043
2044 return equal && !b[length];
2045 }
2046
2047 const UChar* as = a->characters16();
2048 for (unsigned i = 0; i != length; ++i) {
2049 LChar bc = b[i];
2050 if (!bc)
2051 return false;
2052 UChar ac = as[i];
2053 ored |= ac;
2054 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2055 }
2056
2057 // Do a slower implementation for cases that include non-ASCII characters.
2058 if (ored & ~0x7F) {
2059 equal = true;
2060 for (unsigned i = 0; i != length; ++i) {
2061 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2062 }
2063 }
2064
2065 return equal && !b[length];
2066 }
2067
equalIgnoringCaseNonNull(const StringImpl * a,const StringImpl * b)2068 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2069 {
2070 ASSERT(a && b);
2071 if (a == b)
2072 return true;
2073
2074 unsigned length = a->length();
2075 if (length != b->length())
2076 return false;
2077
2078 if (a->is8Bit()) {
2079 if (b->is8Bit())
2080 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2081
2082 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2083 }
2084
2085 if (b->is8Bit())
2086 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2087
2088 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2089 }
2090
equalIgnoringNullity(StringImpl * a,StringImpl * b)2091 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2092 {
2093 if (!a && b && !b->length())
2094 return true;
2095 if (!b && a && !a->length())
2096 return true;
2097 return equal(a, b);
2098 }
2099
sizeInBytes() const2100 size_t StringImpl::sizeInBytes() const
2101 {
2102 size_t size = length();
2103 if (!is8Bit())
2104 size *= 2;
2105 return size + sizeof(*this);
2106 }
2107
2108 } // namespace WTF
2109