• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  *
23  */
24 
25 #include "config.h"
26 #include "wtf/text/StringImpl.h"
27 
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/PartitionAlloc.h"
32 #include "wtf/StdLibExtras.h"
33 #include "wtf/WTF.h"
34 #include "wtf/text/AtomicString.h"
35 #include "wtf/text/StringBuffer.h"
36 #include "wtf/text/StringHash.h"
37 #include "wtf/unicode/CharacterNames.h"
38 
39 #ifdef STRING_STATS
40 #include "wtf/DataLog.h"
41 #include "wtf/HashMap.h"
42 #include "wtf/HashSet.h"
43 #include "wtf/ProcessID.h"
44 #include "wtf/RefCounted.h"
45 #include "wtf/ThreadingPrimitives.h"
46 #include <unistd.h>
47 #endif
48 
49 using namespace std;
50 
51 namespace WTF {
52 
53 using namespace Unicode;
54 
55 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
56 
57 #ifdef STRING_STATS
58 
statsMutex()59 static Mutex& statsMutex()
60 {
61     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
62     return mutex;
63 }
64 
liveStrings()65 static HashSet<void*>& liveStrings()
66 {
67     // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
68     DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
69     return strings;
70 }
71 
addStringForStats(StringImpl * string)72 void addStringForStats(StringImpl* string)
73 {
74     MutexLocker locker(statsMutex());
75     liveStrings().add(string);
76 }
77 
removeStringForStats(StringImpl * string)78 void removeStringForStats(StringImpl* string)
79 {
80     MutexLocker locker(statsMutex());
81     liveStrings().remove(string);
82 }
83 
fillWithSnippet(const StringImpl * string,Vector<char> & snippet)84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
85 {
86     const unsigned kMaxSnippetLength = 64;
87     snippet.clear();
88 
89     size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
90     if (expectedLength == kMaxSnippetLength)
91         expectedLength += 3; // For the "...".
92     ++expectedLength; // For the terminating '\0'.
93     snippet.reserveCapacity(expectedLength);
94 
95     size_t i;
96     for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
97         UChar c = (*string)[i];
98         if (isASCIIPrintable(c))
99             snippet.append(c);
100         else
101             snippet.append('?');
102     }
103     if (i < string->length()) {
104         snippet.append('.');
105         snippet.append('.');
106         snippet.append('.');
107     }
108     snippet.append('\0');
109 }
110 
isUnnecessarilyWide(const StringImpl * string)111 static bool isUnnecessarilyWide(const StringImpl* string)
112 {
113     if (string->is8Bit())
114         return false;
115     UChar c = 0;
116     for (unsigned i = 0; i < string->length(); ++i)
117         c |= (*string)[i] >> 8;
118     return !c;
119 }
120 
121 class PerStringStats : public RefCounted<PerStringStats> {
122 public:
create()123     static PassRefPtr<PerStringStats> create()
124     {
125         return adoptRef(new PerStringStats);
126     }
127 
add(const StringImpl * string)128     void add(const StringImpl* string)
129     {
130         ++m_numberOfCopies;
131         if (!m_length) {
132             m_length = string->length();
133             fillWithSnippet(string, m_snippet);
134         }
135         if (string->isAtomic())
136             ++m_numberOfAtomicCopies;
137         if (isUnnecessarilyWide(string))
138             m_unnecessarilyWide = true;
139     }
140 
totalCharacters() const141     size_t totalCharacters() const
142     {
143         return m_numberOfCopies * m_length;
144     }
145 
print()146     void print()
147     {
148         const char* status = "ok";
149         if (m_unnecessarilyWide)
150             status = "16";
151         dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
152     }
153 
154     bool m_unnecessarilyWide;
155     unsigned m_numberOfCopies;
156     unsigned m_length;
157     unsigned m_numberOfAtomicCopies;
158     Vector<char> m_snippet;
159 
160 private:
PerStringStats()161     PerStringStats()
162         : m_unnecessarilyWide(false)
163         , m_numberOfCopies(0)
164         , m_length(0)
165         , m_numberOfAtomicCopies(0)
166     {
167     }
168 };
169 
operator <(const RefPtr<PerStringStats> & a,const RefPtr<PerStringStats> & b)170 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
171 {
172     if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
173         return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
174     if (a->totalCharacters() != b->totalCharacters())
175         return a->totalCharacters() < b->totalCharacters();
176     if (a->m_numberOfCopies != b->m_numberOfCopies)
177         return a->m_numberOfCopies < b->m_numberOfCopies;
178     if (a->m_length != b->m_length)
179         return a->m_length < b->m_length;
180     return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
181 }
182 
printLiveStringStats(void *)183 static void printLiveStringStats(void*)
184 {
185     MutexLocker locker(statsMutex());
186     HashSet<void*>& strings = liveStrings();
187 
188     HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
189     for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
190         StringImpl* string = static_cast<StringImpl*>(*iter);
191         HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
192         RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
193         value->add(string);
194         stats.set(string, value.release());
195     }
196 
197     Vector<RefPtr<PerStringStats> > all;
198     for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
199         all.append(iter->value);
200 
201     std::sort(all.begin(), all.end());
202     std::reverse(all.begin(), all.end());
203     for (size_t i = 0; i < 20 && i < all.size(); ++i)
204         all[i]->print();
205 }
206 
207 StringStats StringImpl::m_stringStats;
208 
209 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
210 
removeString(StringImpl * string)211 void StringStats::removeString(StringImpl* string)
212 {
213     unsigned length = string->length();
214     --m_totalNumberStrings;
215 
216     if (string->is8Bit()) {
217         --m_number8BitStrings;
218         m_total8BitData -= length;
219     } else {
220         --m_number16BitStrings;
221         m_total16BitData -= length;
222     }
223 
224     if (!--s_stringRemovesTillPrintStats) {
225         s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
226         printStats();
227     }
228 }
229 
printStats()230 void StringStats::printStats()
231 {
232     dataLogF("String stats for process id %d:\n", getCurrentProcessID());
233 
234     unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
235     double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
236     double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
237     dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
238 
239     double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240     double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
241     dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
242 
243     double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
244     unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
245     dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
246     unsigned long long totalSavedBytes = m_total8BitData;
247     double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
248     dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
249 
250     unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
251     double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
252     dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
253 
254     callOnMainThread(printLiveStringStats, 0);
255 }
256 #endif
257 
operator new(size_t size)258 void* StringImpl::operator new(size_t size)
259 {
260     ASSERT(size == sizeof(StringImpl));
261     return partitionAllocGeneric(Partitions::getBufferPartition(), size);
262 }
263 
operator delete(void * ptr)264 void StringImpl::operator delete(void* ptr)
265 {
266     partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
267 }
268 
~StringImpl()269 inline StringImpl::~StringImpl()
270 {
271     ASSERT(!isStatic());
272 
273     STRING_STATS_REMOVE_STRING(this);
274 
275     if (isAtomic())
276         AtomicString::remove(this);
277 }
278 
destroyIfNotStatic()279 void StringImpl::destroyIfNotStatic()
280 {
281     if (!isStatic())
282         delete this;
283 }
284 
createUninitialized(unsigned length,LChar * & data)285 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
286 {
287     if (!length) {
288         data = 0;
289         return empty();
290     }
291 
292     // Allocate a single buffer large enough to contain the StringImpl
293     // struct as well as the data which it contains. This removes one
294     // heap allocation from this call.
295     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
296     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
297     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
298 
299     data = reinterpret_cast<LChar*>(string + 1);
300     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
301 }
302 
createUninitialized(unsigned length,UChar * & data)303 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
304 {
305     if (!length) {
306         data = 0;
307         return empty();
308     }
309 
310     // Allocate a single buffer large enough to contain the StringImpl
311     // struct as well as the data which it contains. This removes one
312     // heap allocation from this call.
313     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
314     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
315     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
316 
317     data = reinterpret_cast<UChar*>(string + 1);
318     return adoptRef(new (string) StringImpl(length));
319 }
320 
reallocate(PassRefPtr<StringImpl> originalString,unsigned length,LChar * & data)321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data)
322 {
323     ASSERT(originalString->is8Bit());
324     ASSERT(originalString->hasOneRef());
325 
326     if (!length) {
327         data = 0;
328         return empty();
329     }
330 
331     // Same as createUninitialized() except here we use realloc.
332     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
333     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
334     originalString->~StringImpl();
335     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
336 
337     data = reinterpret_cast<LChar*>(string + 1);
338     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
339 }
340 
reallocate(PassRefPtr<StringImpl> originalString,unsigned length,UChar * & data)341 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data)
342 {
343     ASSERT(!originalString->is8Bit());
344     ASSERT(originalString->hasOneRef());
345 
346     if (!length) {
347         data = 0;
348         return empty();
349     }
350 
351     // Same as createUninitialized() except here we use realloc.
352     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
353     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
354     originalString->~StringImpl();
355     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
356 
357     data = reinterpret_cast<UChar*>(string + 1);
358     return adoptRef(new (string) StringImpl(length));
359 }
360 
staticStrings()361 static StaticStringsTable& staticStrings()
362 {
363     DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
364     return staticStrings;
365 }
366 
367 #ifndef NDEBUG
368 static bool s_allowCreationOfStaticStrings = true;
369 #endif
370 
allStaticStrings()371 const StaticStringsTable& StringImpl::allStaticStrings()
372 {
373     return staticStrings();
374 }
375 
freezeStaticStrings()376 void StringImpl::freezeStaticStrings()
377 {
378     ASSERT(isMainThread());
379 
380 #ifndef NDEBUG
381     s_allowCreationOfStaticStrings = false;
382 #endif
383 }
384 
385 unsigned StringImpl::m_highestStaticStringLength = 0;
386 
createStatic(const char * string,unsigned length,unsigned hash)387 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
388 {
389     ASSERT(s_allowCreationOfStaticStrings);
390     ASSERT(string);
391     ASSERT(length);
392 
393     StaticStringsTable::const_iterator it = staticStrings().find(hash);
394     if (it != staticStrings().end()) {
395         ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
396         return it->value;
397     }
398 
399     // Allocate a single buffer large enough to contain the StringImpl
400     // struct as well as the data which it contains. This removes one
401     // heap allocation from this call.
402     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
403     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
404 
405     WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
406     StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
407 
408     LChar* data = reinterpret_cast<LChar*>(impl + 1);
409     impl = new (impl) StringImpl(length, hash, StaticString);
410     memcpy(data, string, length * sizeof(LChar));
411 #ifndef NDEBUG
412     impl->assertHashIsCorrect();
413 #endif
414 
415     ASSERT(isMainThread());
416     m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
417     staticStrings().add(hash, impl);
418     WTF_ANNOTATE_BENIGN_RACE(impl,
419         "Benign race on the reference counter of a static string created by StringImpl::createStatic");
420 
421     return impl;
422 }
423 
create(const UChar * characters,unsigned length)424 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
425 {
426     if (!characters || !length)
427         return empty();
428 
429     UChar* data;
430     RefPtr<StringImpl> string = createUninitialized(length, data);
431     memcpy(data, characters, length * sizeof(UChar));
432     return string.release();
433 }
434 
create(const LChar * characters,unsigned length)435 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
436 {
437     if (!characters || !length)
438         return empty();
439 
440     LChar* data;
441     RefPtr<StringImpl> string = createUninitialized(length, data);
442     memcpy(data, characters, length * sizeof(LChar));
443     return string.release();
444 }
445 
create8BitIfPossible(const UChar * characters,unsigned length)446 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
447 {
448     if (!characters || !length)
449         return empty();
450 
451     LChar* data;
452     RefPtr<StringImpl> string = createUninitialized(length, data);
453 
454     for (size_t i = 0; i < length; ++i) {
455         if (characters[i] & 0xff00)
456             return create(characters, length);
457         data[i] = static_cast<LChar>(characters[i]);
458     }
459 
460     return string.release();
461 }
462 
create(const LChar * string)463 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
464 {
465     if (!string)
466         return empty();
467     size_t length = strlen(reinterpret_cast<const char*>(string));
468     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
469     return create(string, length);
470 }
471 
containsOnlyWhitespace()472 bool StringImpl::containsOnlyWhitespace()
473 {
474     // FIXME: The definition of whitespace here includes a number of characters
475     // that are not whitespace from the point of view of RenderText; I wonder if
476     // that's a problem in practice.
477     if (is8Bit()) {
478         for (unsigned i = 0; i < m_length; ++i) {
479             UChar c = characters8()[i];
480             if (!isASCIISpace(c))
481                 return false;
482         }
483 
484         return true;
485     }
486 
487     for (unsigned i = 0; i < m_length; ++i) {
488         UChar c = characters16()[i];
489         if (!isASCIISpace(c))
490             return false;
491     }
492     return true;
493 }
494 
substring(unsigned start,unsigned length)495 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
496 {
497     if (start >= m_length)
498         return empty();
499     unsigned maxLength = m_length - start;
500     if (length >= maxLength) {
501         if (!start)
502             return this;
503         length = maxLength;
504     }
505     if (is8Bit())
506         return create(characters8() + start, length);
507 
508     return create(characters16() + start, length);
509 }
510 
characterStartingAt(unsigned i)511 UChar32 StringImpl::characterStartingAt(unsigned i)
512 {
513     if (is8Bit())
514         return characters8()[i];
515     if (U16_IS_SINGLE(characters16()[i]))
516         return characters16()[i];
517     if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
518         return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
519     return 0;
520 }
521 
lower()522 PassRefPtr<StringImpl> StringImpl::lower()
523 {
524     // Note: This is a hot function in the Dromaeo benchmark, specifically the
525     // no-op code path up through the first 'return' statement.
526 
527     // First scan the string for uppercase and non-ASCII characters:
528     bool noUpper = true;
529     UChar ored = 0;
530     if (is8Bit()) {
531         const LChar* end = characters8() + m_length;
532         for (const LChar* chp = characters8(); chp != end; ++chp) {
533             if (UNLIKELY(isASCIIUpper(*chp)))
534                 noUpper = false;
535             ored |= *chp;
536         }
537         // Nothing to do if the string is all ASCII with no uppercase.
538         if (noUpper && !(ored & ~0x7F))
539             return this;
540 
541         RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
542         int32_t length = m_length;
543 
544         LChar* data8;
545         RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
546 
547         if (!(ored & ~0x7F)) {
548             for (int32_t i = 0; i < length; ++i)
549                 data8[i] = toASCIILower(characters8()[i]);
550 
551             return newImpl.release();
552         }
553 
554         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
555         for (int32_t i = 0; i < length; ++i)
556             data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
557 
558         return newImpl.release();
559     }
560 
561     const UChar* end = characters16() + m_length;
562     for (const UChar* chp = characters16(); chp != end; ++chp) {
563         if (UNLIKELY(isASCIIUpper(*chp)))
564             noUpper = false;
565         ored |= *chp;
566     }
567     // Nothing to do if the string is all ASCII with no uppercase.
568     if (noUpper && !(ored & ~0x7F))
569         return this;
570 
571     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
572     int32_t length = m_length;
573 
574     if (!(ored & ~0x7F)) {
575         UChar* data16;
576         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
577 
578         for (int32_t i = 0; i < length; ++i) {
579             UChar c = characters16()[i];
580             data16[i] = toASCIILower(c);
581         }
582         return newImpl.release();
583     }
584 
585     // Do a slower implementation for cases that include non-ASCII characters.
586     UChar* data16;
587     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
588 
589     bool error;
590     int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
591     if (!error && realLength == length)
592         return newImpl.release();
593 
594     newImpl = createUninitialized(realLength, data16);
595     Unicode::toLower(data16, realLength, characters16(), m_length, &error);
596     if (error)
597         return this;
598     return newImpl.release();
599 }
600 
upper()601 PassRefPtr<StringImpl> StringImpl::upper()
602 {
603     // This function could be optimized for no-op cases the way lower() is,
604     // but in empirical testing, few actual calls to upper() are no-ops, so
605     // it wouldn't be worth the extra time for pre-scanning.
606 
607     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
608     int32_t length = m_length;
609 
610     if (is8Bit()) {
611         LChar* data8;
612         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
613 
614         // Do a faster loop for the case where all the characters are ASCII.
615         LChar ored = 0;
616         for (int i = 0; i < length; ++i) {
617             LChar c = characters8()[i];
618             ored |= c;
619             data8[i] = toASCIIUpper(c);
620         }
621         if (!(ored & ~0x7F))
622             return newImpl.release();
623 
624         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
625         int numberSharpSCharacters = 0;
626 
627         // There are two special cases.
628         //  1. latin-1 characters when converted to upper case are 16 bit characters.
629         //  2. Lower case sharp-S converts to "SS" (two characters)
630         for (int32_t i = 0; i < length; ++i) {
631             LChar c = characters8()[i];
632             if (UNLIKELY(c == smallLetterSharpS))
633                 ++numberSharpSCharacters;
634             UChar upper = Unicode::toUpper(c);
635             if (UNLIKELY(upper > 0xff)) {
636                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
637                 goto upconvert;
638             }
639             data8[i] = static_cast<LChar>(upper);
640         }
641 
642         if (!numberSharpSCharacters)
643             return newImpl.release();
644 
645         // We have numberSSCharacters sharp-s characters, but none of the other special characters.
646         newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
647 
648         LChar* dest = data8;
649 
650         for (int32_t i = 0; i < length; ++i) {
651             LChar c = characters8()[i];
652             if (c == smallLetterSharpS) {
653                 *dest++ = 'S';
654                 *dest++ = 'S';
655             } else
656                 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
657         }
658 
659         return newImpl.release();
660     }
661 
662 upconvert:
663     RefPtr<StringImpl> upconverted = upconvertedString();
664     const UChar* source16 = upconverted->characters16();
665 
666     UChar* data16;
667     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
668 
669     // Do a faster loop for the case where all the characters are ASCII.
670     UChar ored = 0;
671     for (int i = 0; i < length; ++i) {
672         UChar c = source16[i];
673         ored |= c;
674         data16[i] = toASCIIUpper(c);
675     }
676     if (!(ored & ~0x7F))
677         return newImpl.release();
678 
679     // Do a slower implementation for cases that include non-ASCII characters.
680     bool error;
681     int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
682     if (!error && realLength == length)
683         return newImpl;
684     newImpl = createUninitialized(realLength, data16);
685     Unicode::toUpper(data16, realLength, source16, m_length, &error);
686     if (error)
687         return this;
688     return newImpl.release();
689 }
690 
lower(const AtomicString & localeIdentifier)691 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
692 {
693     // Use the more-optimized code path most of the time.
694     // Note the assumption here that the only locale-specific lowercasing is
695     // in the "tr" and "az" locales.
696     // FIXME: Could possibly optimize further by looking for the specific sequences
697     // that have locale-specific lowercasing. There are only three of them.
698     if (!(localeIdentifier == "tr" || localeIdentifier == "az"))
699         return lower();
700 
701     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
702         CRASH();
703     int length = m_length;
704 
705     // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
706     // allocating memory just to turn localeIdentifier into a C string, and there is no
707     // difference between the uppercasing for "tr" and "az" locales.
708     RefPtr<StringImpl> upconverted = upconvertedString();
709     const UChar* source16 = upconverted->characters16();
710     UChar* data16;
711     RefPtr<StringImpl> newString = createUninitialized(length, data16);
712     do {
713         UErrorCode status = U_ZERO_ERROR;
714         int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
715         if (U_SUCCESS(status)) {
716             newString->truncateAssumingIsolated(realLength);
717             return newString.release();
718         }
719         if (status != U_BUFFER_OVERFLOW_ERROR)
720             return this;
721         // Expand the buffer.
722         newString = createUninitialized(realLength, data16);
723     } while (true);
724 }
725 
upper(const AtomicString & localeIdentifier)726 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
727 {
728     // Use the more-optimized code path most of the time.
729     // Note the assumption here that the only locale-specific uppercasing is of the
730     // letter "i" in the "tr" and "az" locales.
731     if (!(localeIdentifier == "tr" || localeIdentifier == "az") || find('i') == kNotFound)
732         return upper();
733 
734     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
735         CRASH();
736     int length = m_length;
737 
738     // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
739     // allocating memory just to turn localeIdentifier into a C string, and there is no
740     // difference between the uppercasing for "tr" and "az" locales.
741     RefPtr<StringImpl> upconverted = upconvertedString();
742     const UChar* source16 = upconverted->characters16();
743     UChar* data16;
744     RefPtr<StringImpl> newString = createUninitialized(length, data16);
745     do {
746         UErrorCode status = U_ZERO_ERROR;
747         int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
748         if (U_SUCCESS(status)) {
749             newString->truncateAssumingIsolated(realLength);
750             return newString.release();
751         }
752         if (status != U_BUFFER_OVERFLOW_ERROR)
753             return this;
754         // Expand the buffer.
755         newString = createUninitialized(realLength, data16);
756     } while (true);
757 }
758 
fill(UChar character)759 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
760 {
761     if (!(character & ~0x7F)) {
762         LChar* data;
763         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
764         for (unsigned i = 0; i < m_length; ++i)
765             data[i] = character;
766         return newImpl.release();
767     }
768     UChar* data;
769     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
770     for (unsigned i = 0; i < m_length; ++i)
771         data[i] = character;
772     return newImpl.release();
773 }
774 
foldCase()775 PassRefPtr<StringImpl> StringImpl::foldCase()
776 {
777     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
778     int32_t length = m_length;
779 
780     if (is8Bit()) {
781         // Do a faster loop for the case where all the characters are ASCII.
782         LChar* data;
783         RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
784         LChar ored = 0;
785 
786         for (int32_t i = 0; i < length; ++i) {
787             LChar c = characters8()[i];
788             data[i] = toASCIILower(c);
789             ored |= c;
790         }
791 
792         if (!(ored & ~0x7F))
793             return newImpl.release();
794 
795         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
796         for (int32_t i = 0; i < length; ++i)
797             data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
798 
799         return newImpl.release();
800     }
801 
802     // Do a faster loop for the case where all the characters are ASCII.
803     UChar* data;
804     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
805     UChar ored = 0;
806     for (int32_t i = 0; i < length; ++i) {
807         UChar c = characters16()[i];
808         ored |= c;
809         data[i] = toASCIILower(c);
810     }
811     if (!(ored & ~0x7F))
812         return newImpl.release();
813 
814     // Do a slower implementation for cases that include non-ASCII characters.
815     bool error;
816     int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
817     if (!error && realLength == length)
818         return newImpl.release();
819     newImpl = createUninitialized(realLength, data);
820     Unicode::foldCase(data, realLength, characters16(), m_length, &error);
821     if (error)
822         return this;
823     return newImpl.release();
824 }
825 
826 template <class UCharPredicate>
stripMatchedCharacters(UCharPredicate predicate)827 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
828 {
829     if (!m_length)
830         return empty();
831 
832     unsigned start = 0;
833     unsigned end = m_length - 1;
834 
835     // skip white space from start
836     while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
837         ++start;
838 
839     // only white space
840     if (start > end)
841         return empty();
842 
843     // skip white space from end
844     while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
845         --end;
846 
847     if (!start && end == m_length - 1)
848         return this;
849     if (is8Bit())
850         return create(characters8() + start, end + 1 - start);
851     return create(characters16() + start, end + 1 - start);
852 }
853 
854 class UCharPredicate {
855 public:
UCharPredicate(CharacterMatchFunctionPtr function)856     inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
857 
operator ()(UChar ch) const858     inline bool operator()(UChar ch) const
859     {
860         return m_function(ch);
861     }
862 
863 private:
864     const CharacterMatchFunctionPtr m_function;
865 };
866 
867 class SpaceOrNewlinePredicate {
868 public:
operator ()(UChar ch) const869     inline bool operator()(UChar ch) const
870     {
871         return isSpaceOrNewline(ch);
872     }
873 };
874 
stripWhiteSpace()875 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
876 {
877     return stripMatchedCharacters(SpaceOrNewlinePredicate());
878 }
879 
stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)880 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
881 {
882     return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
883 }
884 
885 template <typename CharType>
removeCharacters(const CharType * characters,CharacterMatchFunctionPtr findMatch)886 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
887 {
888     const CharType* from = characters;
889     const CharType* fromend = from + m_length;
890 
891     // Assume the common case will not remove any characters
892     while (from != fromend && !findMatch(*from))
893         ++from;
894     if (from == fromend)
895         return this;
896 
897     StringBuffer<CharType> data(m_length);
898     CharType* to = data.characters();
899     unsigned outc = from - characters;
900 
901     if (outc)
902         memcpy(to, characters, outc * sizeof(CharType));
903 
904     while (true) {
905         while (from != fromend && findMatch(*from))
906             ++from;
907         while (from != fromend && !findMatch(*from))
908             to[outc++] = *from++;
909         if (from == fromend)
910             break;
911     }
912 
913     data.shrink(outc);
914 
915     return data.release();
916 }
917 
removeCharacters(CharacterMatchFunctionPtr findMatch)918 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
919 {
920     if (is8Bit())
921         return removeCharacters(characters8(), findMatch);
922     return removeCharacters(characters16(), findMatch);
923 }
924 
925 template <typename CharType, class UCharPredicate>
simplifyMatchedCharactersToSpace(UCharPredicate predicate,StripBehavior stripBehavior)926 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
927 {
928     StringBuffer<CharType> data(m_length);
929 
930     const CharType* from = getCharacters<CharType>();
931     const CharType* fromend = from + m_length;
932     int outc = 0;
933     bool changedToSpace = false;
934 
935     CharType* to = data.characters();
936 
937     if (stripBehavior == StripExtraWhiteSpace) {
938         while (true) {
939             while (from != fromend && predicate(*from)) {
940                 if (*from != ' ')
941                     changedToSpace = true;
942                 ++from;
943             }
944             while (from != fromend && !predicate(*from))
945                 to[outc++] = *from++;
946             if (from != fromend)
947                 to[outc++] = ' ';
948             else
949                 break;
950         }
951 
952         if (outc > 0 && to[outc - 1] == ' ')
953             --outc;
954     } else {
955         for (; from != fromend; ++from) {
956             if (predicate(*from)) {
957                 if (*from != ' ')
958                     changedToSpace = true;
959                 to[outc++] = ' ';
960             } else {
961                 to[outc++] = *from;
962             }
963         }
964     }
965 
966     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
967         return this;
968 
969     data.shrink(outc);
970 
971     return data.release();
972 }
973 
simplifyWhiteSpace(StripBehavior stripBehavior)974 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
975 {
976     if (is8Bit())
977         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
978     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
979 }
980 
simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace,StripBehavior stripBehavior)981 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
982 {
983     if (is8Bit())
984         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
985     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
986 }
987 
toIntStrict(bool * ok,int base)988 int StringImpl::toIntStrict(bool* ok, int base)
989 {
990     if (is8Bit())
991         return charactersToIntStrict(characters8(), m_length, ok, base);
992     return charactersToIntStrict(characters16(), m_length, ok, base);
993 }
994 
toUIntStrict(bool * ok,int base)995 unsigned StringImpl::toUIntStrict(bool* ok, int base)
996 {
997     if (is8Bit())
998         return charactersToUIntStrict(characters8(), m_length, ok, base);
999     return charactersToUIntStrict(characters16(), m_length, ok, base);
1000 }
1001 
toInt64Strict(bool * ok,int base)1002 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1003 {
1004     if (is8Bit())
1005         return charactersToInt64Strict(characters8(), m_length, ok, base);
1006     return charactersToInt64Strict(characters16(), m_length, ok, base);
1007 }
1008 
toUInt64Strict(bool * ok,int base)1009 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1010 {
1011     if (is8Bit())
1012         return charactersToUInt64Strict(characters8(), m_length, ok, base);
1013     return charactersToUInt64Strict(characters16(), m_length, ok, base);
1014 }
1015 
toIntPtrStrict(bool * ok,int base)1016 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1017 {
1018     if (is8Bit())
1019         return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1020     return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1021 }
1022 
toInt(bool * ok)1023 int StringImpl::toInt(bool* ok)
1024 {
1025     if (is8Bit())
1026         return charactersToInt(characters8(), m_length, ok);
1027     return charactersToInt(characters16(), m_length, ok);
1028 }
1029 
toUInt(bool * ok)1030 unsigned StringImpl::toUInt(bool* ok)
1031 {
1032     if (is8Bit())
1033         return charactersToUInt(characters8(), m_length, ok);
1034     return charactersToUInt(characters16(), m_length, ok);
1035 }
1036 
toInt64(bool * ok)1037 int64_t StringImpl::toInt64(bool* ok)
1038 {
1039     if (is8Bit())
1040         return charactersToInt64(characters8(), m_length, ok);
1041     return charactersToInt64(characters16(), m_length, ok);
1042 }
1043 
toUInt64(bool * ok)1044 uint64_t StringImpl::toUInt64(bool* ok)
1045 {
1046     if (is8Bit())
1047         return charactersToUInt64(characters8(), m_length, ok);
1048     return charactersToUInt64(characters16(), m_length, ok);
1049 }
1050 
toIntPtr(bool * ok)1051 intptr_t StringImpl::toIntPtr(bool* ok)
1052 {
1053     if (is8Bit())
1054         return charactersToIntPtr(characters8(), m_length, ok);
1055     return charactersToIntPtr(characters16(), m_length, ok);
1056 }
1057 
toDouble(bool * ok)1058 double StringImpl::toDouble(bool* ok)
1059 {
1060     if (is8Bit())
1061         return charactersToDouble(characters8(), m_length, ok);
1062     return charactersToDouble(characters16(), m_length, ok);
1063 }
1064 
toFloat(bool * ok)1065 float StringImpl::toFloat(bool* ok)
1066 {
1067     if (is8Bit())
1068         return charactersToFloat(characters8(), m_length, ok);
1069     return charactersToFloat(characters16(), m_length, ok);
1070 }
1071 
equalIgnoringCase(const LChar * a,const LChar * b,unsigned length)1072 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1073 {
1074     while (length--) {
1075         LChar bc = *b++;
1076         if (foldCase(*a++) != foldCase(bc))
1077             return false;
1078     }
1079     return true;
1080 }
1081 
equalIgnoringCase(const UChar * a,const LChar * b,unsigned length)1082 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1083 {
1084     while (length--) {
1085         LChar bc = *b++;
1086         if (foldCase(*a++) != foldCase(bc))
1087             return false;
1088     }
1089     return true;
1090 }
1091 
find(CharacterMatchFunctionPtr matchFunction,unsigned start)1092 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1093 {
1094     if (is8Bit())
1095         return WTF::find(characters8(), m_length, matchFunction, start);
1096     return WTF::find(characters16(), m_length, matchFunction, start);
1097 }
1098 
find(const LChar * matchString,unsigned index)1099 size_t StringImpl::find(const LChar* matchString, unsigned index)
1100 {
1101     // Check for null or empty string to match against
1102     if (!matchString)
1103         return kNotFound;
1104     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1105     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1106     unsigned matchLength = matchStringLength;
1107     if (!matchLength)
1108         return min(index, length());
1109 
1110     // Optimization 1: fast case for strings of length 1.
1111     if (matchLength == 1)
1112         return WTF::find(characters16(), length(), *matchString, index);
1113 
1114     // Check index & matchLength are in range.
1115     if (index > length())
1116         return kNotFound;
1117     unsigned searchLength = length() - index;
1118     if (matchLength > searchLength)
1119         return kNotFound;
1120     // delta is the number of additional times to test; delta == 0 means test only once.
1121     unsigned delta = searchLength - matchLength;
1122 
1123     const UChar* searchCharacters = characters16() + index;
1124 
1125     // Optimization 2: keep a running hash of the strings,
1126     // only call equal if the hashes match.
1127     unsigned searchHash = 0;
1128     unsigned matchHash = 0;
1129     for (unsigned i = 0; i < matchLength; ++i) {
1130         searchHash += searchCharacters[i];
1131         matchHash += matchString[i];
1132     }
1133 
1134     unsigned i = 0;
1135     // keep looping until we match
1136     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1137         if (i == delta)
1138             return kNotFound;
1139         searchHash += searchCharacters[i + matchLength];
1140         searchHash -= searchCharacters[i];
1141         ++i;
1142     }
1143     return index + i;
1144 }
1145 
1146 template<typename CharType>
findIgnoringCaseInternal(const CharType * searchCharacters,const LChar * matchString,unsigned index,unsigned searchLength,unsigned matchLength)1147 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1148 {
1149     // delta is the number of additional times to test; delta == 0 means test only once.
1150     unsigned delta = searchLength - matchLength;
1151 
1152     unsigned i = 0;
1153     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1154         if (i == delta)
1155             return kNotFound;
1156         ++i;
1157     }
1158     return index + i;
1159 }
1160 
findIgnoringCase(const LChar * matchString,unsigned index)1161 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1162 {
1163     // Check for null or empty string to match against
1164     if (!matchString)
1165         return kNotFound;
1166     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1167     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1168     unsigned matchLength = matchStringLength;
1169     if (!matchLength)
1170         return min(index, length());
1171 
1172     // Check index & matchLength are in range.
1173     if (index > length())
1174         return kNotFound;
1175     unsigned searchLength = length() - index;
1176     if (matchLength > searchLength)
1177         return kNotFound;
1178 
1179     if (is8Bit())
1180         return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1181     return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1182 }
1183 
1184 template <typename SearchCharacterType, typename MatchCharacterType>
findInternal(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1185 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1186 {
1187     // Optimization: keep a running hash of the strings,
1188     // only call equal() if the hashes match.
1189 
1190     // delta is the number of additional times to test; delta == 0 means test only once.
1191     unsigned delta = searchLength - matchLength;
1192 
1193     unsigned searchHash = 0;
1194     unsigned matchHash = 0;
1195 
1196     for (unsigned i = 0; i < matchLength; ++i) {
1197         searchHash += searchCharacters[i];
1198         matchHash += matchCharacters[i];
1199     }
1200 
1201     unsigned i = 0;
1202     // keep looping until we match
1203     while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1204         if (i == delta)
1205             return kNotFound;
1206         searchHash += searchCharacters[i + matchLength];
1207         searchHash -= searchCharacters[i];
1208         ++i;
1209     }
1210     return index + i;
1211 }
1212 
find(StringImpl * matchString)1213 size_t StringImpl::find(StringImpl* matchString)
1214 {
1215     // Check for null string to match against
1216     if (UNLIKELY(!matchString))
1217         return kNotFound;
1218     unsigned matchLength = matchString->length();
1219 
1220     // Optimization 1: fast case for strings of length 1.
1221     if (matchLength == 1) {
1222         if (is8Bit()) {
1223             if (matchString->is8Bit())
1224                 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1225             return WTF::find(characters8(), length(), matchString->characters16()[0]);
1226         }
1227         if (matchString->is8Bit())
1228             return WTF::find(characters16(), length(), matchString->characters8()[0]);
1229         return WTF::find(characters16(), length(), matchString->characters16()[0]);
1230     }
1231 
1232     // Check matchLength is in range.
1233     if (matchLength > length())
1234         return kNotFound;
1235 
1236     // Check for empty string to match against
1237     if (UNLIKELY(!matchLength))
1238         return 0;
1239 
1240     if (is8Bit()) {
1241         if (matchString->is8Bit())
1242             return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1243         return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1244     }
1245 
1246     if (matchString->is8Bit())
1247         return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1248 
1249     return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1250 }
1251 
find(StringImpl * matchString,unsigned index)1252 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1253 {
1254     // Check for null or empty string to match against
1255     if (UNLIKELY(!matchString))
1256         return kNotFound;
1257 
1258     unsigned matchLength = matchString->length();
1259 
1260     // Optimization 1: fast case for strings of length 1.
1261     if (matchLength == 1) {
1262         if (is8Bit())
1263             return WTF::find(characters8(), length(), (*matchString)[0], index);
1264         return WTF::find(characters16(), length(), (*matchString)[0], index);
1265     }
1266 
1267     if (UNLIKELY(!matchLength))
1268         return min(index, length());
1269 
1270     // Check index & matchLength are in range.
1271     if (index > length())
1272         return kNotFound;
1273     unsigned searchLength = length() - index;
1274     if (matchLength > searchLength)
1275         return kNotFound;
1276 
1277     if (is8Bit()) {
1278         if (matchString->is8Bit())
1279             return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1280         return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1281     }
1282 
1283     if (matchString->is8Bit())
1284         return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1285 
1286     return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1287 }
1288 
1289 template <typename SearchCharacterType, typename MatchCharacterType>
findIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1290 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1291 {
1292     // delta is the number of additional times to test; delta == 0 means test only once.
1293     unsigned delta = searchLength - matchLength;
1294 
1295     unsigned i = 0;
1296     // keep looping until we match
1297     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1298         if (i == delta)
1299             return kNotFound;
1300         ++i;
1301     }
1302     return index + i;
1303 }
1304 
findIgnoringCase(StringImpl * matchString,unsigned index)1305 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1306 {
1307     // Check for null or empty string to match against
1308     if (!matchString)
1309         return kNotFound;
1310     unsigned matchLength = matchString->length();
1311     if (!matchLength)
1312         return min(index, length());
1313 
1314     // Check index & matchLength are in range.
1315     if (index > length())
1316         return kNotFound;
1317     unsigned searchLength = length() - index;
1318     if (matchLength > searchLength)
1319         return kNotFound;
1320 
1321     if (is8Bit()) {
1322         if (matchString->is8Bit())
1323             return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1324         return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1325     }
1326 
1327     if (matchString->is8Bit())
1328         return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1329 
1330     return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1331 }
1332 
findNextLineStart(unsigned index)1333 size_t StringImpl::findNextLineStart(unsigned index)
1334 {
1335     if (is8Bit())
1336         return WTF::findNextLineStart(characters8(), m_length, index);
1337     return WTF::findNextLineStart(characters16(), m_length, index);
1338 }
1339 
count(LChar c) const1340 size_t StringImpl::count(LChar c) const
1341 {
1342     int count = 0;
1343     if (is8Bit()) {
1344         for (size_t i = 0; i < m_length; ++i)
1345             count += characters8()[i] == c;
1346     } else {
1347         for (size_t i = 0; i < m_length; ++i)
1348             count += characters16()[i] == c;
1349     }
1350     return count;
1351 }
1352 
reverseFind(UChar c,unsigned index)1353 size_t StringImpl::reverseFind(UChar c, unsigned index)
1354 {
1355     if (is8Bit())
1356         return WTF::reverseFind(characters8(), m_length, c, index);
1357     return WTF::reverseFind(characters16(), m_length, c, index);
1358 }
1359 
1360 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1361 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1362 {
1363     // Optimization: keep a running hash of the strings,
1364     // only call equal if the hashes match.
1365 
1366     // delta is the number of additional times to test; delta == 0 means test only once.
1367     unsigned delta = min(index, length - matchLength);
1368 
1369     unsigned searchHash = 0;
1370     unsigned matchHash = 0;
1371     for (unsigned i = 0; i < matchLength; ++i) {
1372         searchHash += searchCharacters[delta + i];
1373         matchHash += matchCharacters[i];
1374     }
1375 
1376     // keep looping until we match
1377     while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1378         if (!delta)
1379             return kNotFound;
1380         --delta;
1381         searchHash -= searchCharacters[delta + matchLength];
1382         searchHash += searchCharacters[delta];
1383     }
1384     return delta;
1385 }
1386 
reverseFind(StringImpl * matchString,unsigned index)1387 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1388 {
1389     // Check for null or empty string to match against
1390     if (!matchString)
1391         return kNotFound;
1392     unsigned matchLength = matchString->length();
1393     unsigned ourLength = length();
1394     if (!matchLength)
1395         return min(index, ourLength);
1396 
1397     // Optimization 1: fast case for strings of length 1.
1398     if (matchLength == 1) {
1399         if (is8Bit())
1400             return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1401         return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1402     }
1403 
1404     // Check index & matchLength are in range.
1405     if (matchLength > ourLength)
1406         return kNotFound;
1407 
1408     if (is8Bit()) {
1409         if (matchString->is8Bit())
1410             return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1411         return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1412     }
1413 
1414     if (matchString->is8Bit())
1415         return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1416 
1417     return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1418 }
1419 
1420 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1421 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1422 {
1423     // delta is the number of additional times to test; delta == 0 means test only once.
1424     unsigned delta = min(index, length - matchLength);
1425 
1426     // keep looping until we match
1427     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1428         if (!delta)
1429             return kNotFound;
1430         --delta;
1431     }
1432     return delta;
1433 }
1434 
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)1435 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1436 {
1437     // Check for null or empty string to match against
1438     if (!matchString)
1439         return kNotFound;
1440     unsigned matchLength = matchString->length();
1441     unsigned ourLength = length();
1442     if (!matchLength)
1443         return min(index, ourLength);
1444 
1445     // Check index & matchLength are in range.
1446     if (matchLength > ourLength)
1447         return kNotFound;
1448 
1449     if (is8Bit()) {
1450         if (matchString->is8Bit())
1451             return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1452         return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1453     }
1454 
1455     if (matchString->is8Bit())
1456         return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1457 
1458     return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1459 }
1460 
equalInner(const StringImpl * stringImpl,unsigned startOffset,const char * matchString,unsigned matchLength,bool caseSensitive)1461 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1462 {
1463     ASSERT(stringImpl);
1464     ASSERT(matchLength <= stringImpl->length());
1465     ASSERT(startOffset + matchLength <= stringImpl->length());
1466 
1467     if (caseSensitive) {
1468         if (stringImpl->is8Bit())
1469             return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1470         return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1471     }
1472     if (stringImpl->is8Bit())
1473         return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1474     return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1475 }
1476 
startsWith(UChar character) const1477 bool StringImpl::startsWith(UChar character) const
1478 {
1479     return m_length && (*this)[0] == character;
1480 }
1481 
startsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1482 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1483 {
1484     ASSERT(matchLength);
1485     if (matchLength > length())
1486         return false;
1487     return equalInner(this, 0, matchString, matchLength, caseSensitive);
1488 }
1489 
endsWith(StringImpl * matchString,bool caseSensitive)1490 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1491 {
1492     ASSERT(matchString);
1493     if (m_length >= matchString->m_length) {
1494         unsigned start = m_length - matchString->m_length;
1495         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1496     }
1497     return false;
1498 }
1499 
endsWith(UChar character) const1500 bool StringImpl::endsWith(UChar character) const
1501 {
1502     return m_length && (*this)[m_length - 1] == character;
1503 }
1504 
endsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1505 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1506 {
1507     ASSERT(matchLength);
1508     if (matchLength > length())
1509         return false;
1510     unsigned startOffset = length() - matchLength;
1511     return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1512 }
1513 
replace(UChar oldC,UChar newC)1514 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1515 {
1516     if (oldC == newC)
1517         return this;
1518     unsigned i;
1519     for (i = 0; i != m_length; ++i) {
1520         UChar c = is8Bit() ? characters8()[i] : characters16()[i];
1521         if (c == oldC)
1522             break;
1523     }
1524     if (i == m_length)
1525         return this;
1526 
1527     if (is8Bit()) {
1528         if (oldC > 0xff)
1529             // Looking for a 16 bit char in an 8 bit string, we're done.
1530             return this;
1531 
1532         if (newC <= 0xff) {
1533             LChar* data;
1534             LChar oldChar = static_cast<LChar>(oldC);
1535             LChar newChar = static_cast<LChar>(newC);
1536 
1537             RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1538 
1539             for (i = 0; i != m_length; ++i) {
1540                 LChar ch = characters8()[i];
1541                 if (ch == oldChar)
1542                     ch = newChar;
1543                 data[i] = ch;
1544             }
1545             return newImpl.release();
1546         }
1547 
1548         // There is the possibility we need to up convert from 8 to 16 bit,
1549         // create a 16 bit string for the result.
1550         UChar* data;
1551         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1552 
1553         for (i = 0; i != m_length; ++i) {
1554             UChar ch = characters8()[i];
1555             if (ch == oldC)
1556                 ch = newC;
1557             data[i] = ch;
1558         }
1559 
1560         return newImpl.release();
1561     }
1562 
1563     UChar* data;
1564     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1565 
1566     for (i = 0; i != m_length; ++i) {
1567         UChar ch = characters16()[i];
1568         if (ch == oldC)
1569             ch = newC;
1570         data[i] = ch;
1571     }
1572     return newImpl.release();
1573 }
1574 
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)1575 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1576 {
1577     position = min(position, length());
1578     lengthToReplace = min(lengthToReplace, length() - position);
1579     unsigned lengthToInsert = str ? str->length() : 0;
1580     if (!lengthToReplace && !lengthToInsert)
1581         return this;
1582 
1583     RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1584 
1585     if (is8Bit() && (!str || str->is8Bit())) {
1586         LChar* data;
1587         RefPtr<StringImpl> newImpl =
1588         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1589         memcpy(data, characters8(), position * sizeof(LChar));
1590         if (str)
1591             memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1592         memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1593                (length() - position - lengthToReplace) * sizeof(LChar));
1594         return newImpl.release();
1595     }
1596     UChar* data;
1597     RefPtr<StringImpl> newImpl =
1598         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1599     if (is8Bit())
1600         for (unsigned i = 0; i < position; ++i)
1601             data[i] = characters8()[i];
1602     else
1603         memcpy(data, characters16(), position * sizeof(UChar));
1604     if (str) {
1605         if (str->is8Bit())
1606             for (unsigned i = 0; i < lengthToInsert; ++i)
1607                 data[i + position] = str->characters8()[i];
1608         else
1609             memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1610     }
1611     if (is8Bit()) {
1612         for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1613             data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1614     } else {
1615         memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1616             (length() - position - lengthToReplace) * sizeof(UChar));
1617     }
1618     return newImpl.release();
1619 }
1620 
replace(UChar pattern,StringImpl * replacement)1621 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1622 {
1623     if (!replacement)
1624         return this;
1625 
1626     if (replacement->is8Bit())
1627         return replace(pattern, replacement->characters8(), replacement->length());
1628 
1629     return replace(pattern, replacement->characters16(), replacement->length());
1630 }
1631 
replace(UChar pattern,const LChar * replacement,unsigned repStrLength)1632 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1633 {
1634     ASSERT(replacement);
1635 
1636     size_t srcSegmentStart = 0;
1637     unsigned matchCount = 0;
1638 
1639     // Count the matches.
1640     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1641         ++matchCount;
1642         ++srcSegmentStart;
1643     }
1644 
1645     // If we have 0 matches then we don't have to do any more work.
1646     if (!matchCount)
1647         return this;
1648 
1649     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1650 
1651     unsigned replaceSize = matchCount * repStrLength;
1652     unsigned newSize = m_length - matchCount;
1653     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1654 
1655     newSize += replaceSize;
1656 
1657     // Construct the new data.
1658     size_t srcSegmentEnd;
1659     unsigned srcSegmentLength;
1660     srcSegmentStart = 0;
1661     unsigned dstOffset = 0;
1662 
1663     if (is8Bit()) {
1664         LChar* data;
1665         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1666 
1667         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1668             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1669             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1670             dstOffset += srcSegmentLength;
1671             memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1672             dstOffset += repStrLength;
1673             srcSegmentStart = srcSegmentEnd + 1;
1674         }
1675 
1676         srcSegmentLength = m_length - srcSegmentStart;
1677         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1678 
1679         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1680 
1681         return newImpl.release();
1682     }
1683 
1684     UChar* data;
1685     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1686 
1687     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1688         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1689         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1690 
1691         dstOffset += srcSegmentLength;
1692         for (unsigned i = 0; i < repStrLength; ++i)
1693             data[i + dstOffset] = replacement[i];
1694 
1695         dstOffset += repStrLength;
1696         srcSegmentStart = srcSegmentEnd + 1;
1697     }
1698 
1699     srcSegmentLength = m_length - srcSegmentStart;
1700     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1701 
1702     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1703 
1704     return newImpl.release();
1705 }
1706 
replace(UChar pattern,const UChar * replacement,unsigned repStrLength)1707 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1708 {
1709     ASSERT(replacement);
1710 
1711     size_t srcSegmentStart = 0;
1712     unsigned matchCount = 0;
1713 
1714     // Count the matches.
1715     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1716         ++matchCount;
1717         ++srcSegmentStart;
1718     }
1719 
1720     // If we have 0 matches then we don't have to do any more work.
1721     if (!matchCount)
1722         return this;
1723 
1724     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1725 
1726     unsigned replaceSize = matchCount * repStrLength;
1727     unsigned newSize = m_length - matchCount;
1728     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1729 
1730     newSize += replaceSize;
1731 
1732     // Construct the new data.
1733     size_t srcSegmentEnd;
1734     unsigned srcSegmentLength;
1735     srcSegmentStart = 0;
1736     unsigned dstOffset = 0;
1737 
1738     if (is8Bit()) {
1739         UChar* data;
1740         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1741 
1742         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1743             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1744             for (unsigned i = 0; i < srcSegmentLength; ++i)
1745                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1746 
1747             dstOffset += srcSegmentLength;
1748             memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1749 
1750             dstOffset += repStrLength;
1751             srcSegmentStart = srcSegmentEnd + 1;
1752         }
1753 
1754         srcSegmentLength = m_length - srcSegmentStart;
1755         for (unsigned i = 0; i < srcSegmentLength; ++i)
1756             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1757 
1758         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1759 
1760         return newImpl.release();
1761     }
1762 
1763     UChar* data;
1764     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1765 
1766     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1767         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1768         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1769 
1770         dstOffset += srcSegmentLength;
1771         memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1772 
1773         dstOffset += repStrLength;
1774         srcSegmentStart = srcSegmentEnd + 1;
1775     }
1776 
1777     srcSegmentLength = m_length - srcSegmentStart;
1778     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1779 
1780     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1781 
1782     return newImpl.release();
1783 }
1784 
replace(StringImpl * pattern,StringImpl * replacement)1785 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1786 {
1787     if (!pattern || !replacement)
1788         return this;
1789 
1790     unsigned patternLength = pattern->length();
1791     if (!patternLength)
1792         return this;
1793 
1794     unsigned repStrLength = replacement->length();
1795     size_t srcSegmentStart = 0;
1796     unsigned matchCount = 0;
1797 
1798     // Count the matches.
1799     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1800         ++matchCount;
1801         srcSegmentStart += patternLength;
1802     }
1803 
1804     // If we have 0 matches, we don't have to do any more work
1805     if (!matchCount)
1806         return this;
1807 
1808     unsigned newSize = m_length - matchCount * patternLength;
1809     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1810 
1811     RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1812 
1813     newSize += matchCount * repStrLength;
1814 
1815 
1816     // Construct the new data
1817     size_t srcSegmentEnd;
1818     unsigned srcSegmentLength;
1819     srcSegmentStart = 0;
1820     unsigned dstOffset = 0;
1821     bool srcIs8Bit = is8Bit();
1822     bool replacementIs8Bit = replacement->is8Bit();
1823 
1824     // There are 4 cases:
1825     // 1. This and replacement are both 8 bit.
1826     // 2. This and replacement are both 16 bit.
1827     // 3. This is 8 bit and replacement is 16 bit.
1828     // 4. This is 16 bit and replacement is 8 bit.
1829     if (srcIs8Bit && replacementIs8Bit) {
1830         // Case 1
1831         LChar* data;
1832         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1833         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1834             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1835             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1836             dstOffset += srcSegmentLength;
1837             memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1838             dstOffset += repStrLength;
1839             srcSegmentStart = srcSegmentEnd + patternLength;
1840         }
1841 
1842         srcSegmentLength = m_length - srcSegmentStart;
1843         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1844 
1845         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1846 
1847         return newImpl.release();
1848     }
1849 
1850     UChar* data;
1851     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1852     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1853         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1854         if (srcIs8Bit) {
1855             // Case 3.
1856             for (unsigned i = 0; i < srcSegmentLength; ++i)
1857                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1858         } else {
1859             // Case 2 & 4.
1860             memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1861         }
1862         dstOffset += srcSegmentLength;
1863         if (replacementIs8Bit) {
1864             // Cases 2 & 3.
1865             for (unsigned i = 0; i < repStrLength; ++i)
1866                 data[i + dstOffset] = replacement->characters8()[i];
1867         } else {
1868             // Case 4
1869             memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1870         }
1871         dstOffset += repStrLength;
1872         srcSegmentStart = srcSegmentEnd + patternLength;
1873     }
1874 
1875     srcSegmentLength = m_length - srcSegmentStart;
1876     if (srcIs8Bit) {
1877         // Case 3.
1878         for (unsigned i = 0; i < srcSegmentLength; ++i)
1879             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1880     } else {
1881         // Cases 2 & 4.
1882         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1883     }
1884 
1885     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1886 
1887     return newImpl.release();
1888 }
1889 
upconvertedString()1890 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1891 {
1892     if (is8Bit())
1893         return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1894     return this;
1895 }
1896 
stringImplContentEqual(const StringImpl * a,const StringImpl * b)1897 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1898 {
1899     unsigned aLength = a->length();
1900     unsigned bLength = b->length();
1901     if (aLength != bLength)
1902         return false;
1903 
1904     if (a->is8Bit()) {
1905         if (b->is8Bit())
1906             return equal(a->characters8(), b->characters8(), aLength);
1907 
1908         return equal(a->characters8(), b->characters16(), aLength);
1909     }
1910 
1911     if (b->is8Bit())
1912         return equal(a->characters16(), b->characters8(), aLength);
1913 
1914     return equal(a->characters16(), b->characters16(), aLength);
1915 }
1916 
equal(const StringImpl * a,const StringImpl * b)1917 bool equal(const StringImpl* a, const StringImpl* b)
1918 {
1919     if (a == b)
1920         return true;
1921     if (!a || !b)
1922         return false;
1923 
1924     return stringImplContentEqual(a, b);
1925 }
1926 
1927 template <typename CharType>
equalInternal(const StringImpl * a,const CharType * b,unsigned length)1928 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1929 {
1930     if (!a)
1931         return !b;
1932     if (!b)
1933         return false;
1934 
1935     if (a->length() != length)
1936         return false;
1937     if (a->is8Bit())
1938         return equal(a->characters8(), b, length);
1939     return equal(a->characters16(), b, length);
1940 }
1941 
equal(const StringImpl * a,const LChar * b,unsigned length)1942 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1943 {
1944     return equalInternal(a, b, length);
1945 }
1946 
equal(const StringImpl * a,const UChar * b,unsigned length)1947 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1948 {
1949     return equalInternal(a, b, length);
1950 }
1951 
equal(const StringImpl * a,const LChar * b)1952 bool equal(const StringImpl* a, const LChar* b)
1953 {
1954     if (!a)
1955         return !b;
1956     if (!b)
1957         return !a;
1958 
1959     unsigned length = a->length();
1960 
1961     if (a->is8Bit()) {
1962         const LChar* aPtr = a->characters8();
1963         for (unsigned i = 0; i != length; ++i) {
1964             LChar bc = b[i];
1965             LChar ac = aPtr[i];
1966             if (!bc)
1967                 return false;
1968             if (ac != bc)
1969                 return false;
1970         }
1971 
1972         return !b[length];
1973     }
1974 
1975     const UChar* aPtr = a->characters16();
1976     for (unsigned i = 0; i != length; ++i) {
1977         LChar bc = b[i];
1978         if (!bc)
1979             return false;
1980         if (aPtr[i] != bc)
1981             return false;
1982     }
1983 
1984     return !b[length];
1985 }
1986 
equalNonNull(const StringImpl * a,const StringImpl * b)1987 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1988 {
1989     ASSERT(a && b);
1990     if (a == b)
1991         return true;
1992 
1993     return stringImplContentEqual(a, b);
1994 }
1995 
equalIgnoringCase(const StringImpl * a,const StringImpl * b)1996 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
1997 {
1998     if (a == b)
1999         return true;
2000     if (!a || !b)
2001         return false;
2002 
2003     return CaseFoldingHash::equal(a, b);
2004 }
2005 
equalIgnoringCase(const StringImpl * a,const LChar * b)2006 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2007 {
2008     if (!a)
2009         return !b;
2010     if (!b)
2011         return !a;
2012 
2013     unsigned length = a->length();
2014 
2015     // Do a faster loop for the case where all the characters are ASCII.
2016     UChar ored = 0;
2017     bool equal = true;
2018     if (a->is8Bit()) {
2019         const LChar* as = a->characters8();
2020         for (unsigned i = 0; i != length; ++i) {
2021             LChar bc = b[i];
2022             if (!bc)
2023                 return false;
2024             UChar ac = as[i];
2025             ored |= ac;
2026             equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2027         }
2028 
2029         // Do a slower implementation for cases that include non-ASCII characters.
2030         if (ored & ~0x7F) {
2031             equal = true;
2032             for (unsigned i = 0; i != length; ++i)
2033                 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2034         }
2035 
2036         return equal && !b[length];
2037     }
2038 
2039     const UChar* as = a->characters16();
2040     for (unsigned i = 0; i != length; ++i) {
2041         LChar bc = b[i];
2042         if (!bc)
2043             return false;
2044         UChar ac = as[i];
2045         ored |= ac;
2046         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2047     }
2048 
2049     // Do a slower implementation for cases that include non-ASCII characters.
2050     if (ored & ~0x7F) {
2051         equal = true;
2052         for (unsigned i = 0; i != length; ++i) {
2053             equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2054         }
2055     }
2056 
2057     return equal && !b[length];
2058 }
2059 
equalIgnoringCaseNonNull(const StringImpl * a,const StringImpl * b)2060 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2061 {
2062     ASSERT(a && b);
2063     if (a == b)
2064         return true;
2065 
2066     unsigned length = a->length();
2067     if (length != b->length())
2068         return false;
2069 
2070     if (a->is8Bit()) {
2071         if (b->is8Bit())
2072             return equalIgnoringCase(a->characters8(), b->characters8(), length);
2073 
2074         return equalIgnoringCase(b->characters16(), a->characters8(), length);
2075     }
2076 
2077     if (b->is8Bit())
2078         return equalIgnoringCase(a->characters16(), b->characters8(), length);
2079 
2080     return equalIgnoringCase(a->characters16(), b->characters16(), length);
2081 }
2082 
equalIgnoringNullity(StringImpl * a,StringImpl * b)2083 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2084 {
2085     if (!a && b && !b->length())
2086         return true;
2087     if (!b && a && !a->length())
2088         return true;
2089     return equal(a, b);
2090 }
2091 
sizeInBytes() const2092 size_t StringImpl::sizeInBytes() const
2093 {
2094     size_t size = length();
2095     if (!is8Bit())
2096         size *= 2;
2097     return size + sizeof(*this);
2098 }
2099 
2100 } // namespace WTF
2101