• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  *
23  */
24 
25 #include "config.h"
26 #include "wtf/text/StringImpl.h"
27 
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/OwnPtr.h"
32 #include "wtf/PartitionAlloc.h"
33 #include "wtf/PassOwnPtr.h"
34 #include "wtf/StdLibExtras.h"
35 #include "wtf/WTF.h"
36 #include "wtf/text/AtomicString.h"
37 #include "wtf/text/StringBuffer.h"
38 #include "wtf/text/StringHash.h"
39 #include "wtf/unicode/CharacterNames.h"
40 #include <unicode/translit.h>
41 #include <unicode/unistr.h>
42 
43 #ifdef STRING_STATS
44 #include "wtf/DataLog.h"
45 #include "wtf/HashMap.h"
46 #include "wtf/HashSet.h"
47 #include "wtf/ProcessID.h"
48 #include "wtf/RefCounted.h"
49 #include "wtf/ThreadingPrimitives.h"
50 #include <unistd.h>
51 #endif
52 
53 using namespace std;
54 
55 namespace WTF {
56 
57 using namespace Unicode;
58 
59 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
60 
61 #ifdef STRING_STATS
62 
statsMutex()63 static Mutex& statsMutex()
64 {
65     DEFINE_STATIC_LOCAL(Mutex, mutex, ());
66     return mutex;
67 }
68 
liveStrings()69 static HashSet<void*>& liveStrings()
70 {
71     // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
72     DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
73     return strings;
74 }
75 
addStringForStats(StringImpl * string)76 void addStringForStats(StringImpl* string)
77 {
78     MutexLocker locker(statsMutex());
79     liveStrings().add(string);
80 }
81 
removeStringForStats(StringImpl * string)82 void removeStringForStats(StringImpl* string)
83 {
84     MutexLocker locker(statsMutex());
85     liveStrings().remove(string);
86 }
87 
fillWithSnippet(const StringImpl * string,Vector<char> & snippet)88 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
89 {
90     const unsigned kMaxSnippetLength = 64;
91     snippet.clear();
92 
93     size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
94     if (expectedLength == kMaxSnippetLength)
95         expectedLength += 3; // For the "...".
96     ++expectedLength; // For the terminating '\0'.
97     snippet.reserveCapacity(expectedLength);
98 
99     size_t i;
100     for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
101         UChar c = (*string)[i];
102         if (isASCIIPrintable(c))
103             snippet.append(c);
104         else
105             snippet.append('?');
106     }
107     if (i < string->length()) {
108         snippet.append('.');
109         snippet.append('.');
110         snippet.append('.');
111     }
112     snippet.append('\0');
113 }
114 
isUnnecessarilyWide(const StringImpl * string)115 static bool isUnnecessarilyWide(const StringImpl* string)
116 {
117     if (string->is8Bit())
118         return false;
119     UChar c = 0;
120     for (unsigned i = 0; i < string->length(); ++i)
121         c |= (*string)[i] >> 8;
122     return !c;
123 }
124 
125 class PerStringStats : public RefCounted<PerStringStats> {
126 public:
create()127     static PassRefPtr<PerStringStats> create()
128     {
129         return adoptRef(new PerStringStats);
130     }
131 
add(const StringImpl * string)132     void add(const StringImpl* string)
133     {
134         ++m_numberOfCopies;
135         if (!m_length) {
136             m_length = string->length();
137             fillWithSnippet(string, m_snippet);
138         }
139         if (string->isAtomic())
140             ++m_numberOfAtomicCopies;
141         if (isUnnecessarilyWide(string))
142             m_unnecessarilyWide = true;
143     }
144 
totalCharacters() const145     size_t totalCharacters() const
146     {
147         return m_numberOfCopies * m_length;
148     }
149 
print()150     void print()
151     {
152         const char* status = "ok";
153         if (m_unnecessarilyWide)
154             status = "16";
155         dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
156     }
157 
158     bool m_unnecessarilyWide;
159     unsigned m_numberOfCopies;
160     unsigned m_length;
161     unsigned m_numberOfAtomicCopies;
162     Vector<char> m_snippet;
163 
164 private:
PerStringStats()165     PerStringStats()
166         : m_unnecessarilyWide(false)
167         , m_numberOfCopies(0)
168         , m_length(0)
169         , m_numberOfAtomicCopies(0)
170     {
171     }
172 };
173 
operator <(const RefPtr<PerStringStats> & a,const RefPtr<PerStringStats> & b)174 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
175 {
176     if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
177         return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
178     if (a->totalCharacters() != b->totalCharacters())
179         return a->totalCharacters() < b->totalCharacters();
180     if (a->m_numberOfCopies != b->m_numberOfCopies)
181         return a->m_numberOfCopies < b->m_numberOfCopies;
182     if (a->m_length != b->m_length)
183         return a->m_length < b->m_length;
184     return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
185 }
186 
printLiveStringStats(void *)187 static void printLiveStringStats(void*)
188 {
189     MutexLocker locker(statsMutex());
190     HashSet<void*>& strings = liveStrings();
191 
192     HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
193     for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
194         StringImpl* string = static_cast<StringImpl*>(*iter);
195         HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
196         RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
197         value->add(string);
198         stats.set(string, value.release());
199     }
200 
201     Vector<RefPtr<PerStringStats> > all;
202     for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
203         all.append(iter->value);
204 
205     std::sort(all.begin(), all.end());
206     std::reverse(all.begin(), all.end());
207     for (size_t i = 0; i < 20 && i < all.size(); ++i)
208         all[i]->print();
209 }
210 
211 StringStats StringImpl::m_stringStats;
212 
213 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
214 
removeString(StringImpl * string)215 void StringStats::removeString(StringImpl* string)
216 {
217     unsigned length = string->length();
218     --m_totalNumberStrings;
219 
220     if (string->is8Bit()) {
221         --m_number8BitStrings;
222         m_total8BitData -= length;
223     } else {
224         --m_number16BitStrings;
225         m_total16BitData -= length;
226     }
227 
228     if (!--s_stringRemovesTillPrintStats) {
229         s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
230         printStats();
231     }
232 }
233 
printStats()234 void StringStats::printStats()
235 {
236     dataLogF("String stats for process id %d:\n", getCurrentProcessID());
237 
238     unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
239     double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240     double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
241     dataLogF("%8u (%5.2f%%) 8 bit        %12llu chars  %12llu bytes  avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
242 
243     double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
244     double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
245     dataLogF("%8u (%5.2f%%) 16 bit       %12llu chars  %12llu bytes  avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
246 
247     double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
248     unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
249     dataLogF("%8u Total                 %12llu chars  %12llu bytes  avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
250     unsigned long long totalSavedBytes = m_total8BitData;
251     double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
252     dataLogF("         Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
253 
254     unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
255     double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
256     dataLogF("         StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
257 
258     callOnMainThread(printLiveStringStats, 0);
259 }
260 #endif
261 
operator new(size_t size)262 void* StringImpl::operator new(size_t size)
263 {
264     ASSERT(size == sizeof(StringImpl));
265     return partitionAllocGeneric(Partitions::getBufferPartition(), size);
266 }
267 
operator delete(void * ptr)268 void StringImpl::operator delete(void* ptr)
269 {
270     partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
271 }
272 
~StringImpl()273 inline StringImpl::~StringImpl()
274 {
275     ASSERT(!isStatic());
276 
277     STRING_STATS_REMOVE_STRING(this);
278 
279     if (isAtomic())
280         AtomicString::remove(this);
281 }
282 
destroyIfNotStatic()283 void StringImpl::destroyIfNotStatic()
284 {
285     if (!isStatic())
286         delete this;
287 }
288 
createUninitialized(unsigned length,LChar * & data)289 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
290 {
291     if (!length) {
292         data = 0;
293         return empty();
294     }
295 
296     // Allocate a single buffer large enough to contain the StringImpl
297     // struct as well as the data which it contains. This removes one
298     // heap allocation from this call.
299     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<LChar>(length)));
300 
301     data = reinterpret_cast<LChar*>(string + 1);
302     return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
303 }
304 
createUninitialized(unsigned length,UChar * & data)305 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
306 {
307     if (!length) {
308         data = 0;
309         return empty();
310     }
311 
312     // Allocate a single buffer large enough to contain the StringImpl
313     // struct as well as the data which it contains. This removes one
314     // heap allocation from this call.
315     StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), allocationSize<UChar>(length)));
316 
317     data = reinterpret_cast<UChar*>(string + 1);
318     return adoptRef(new (string) StringImpl(length));
319 }
320 
reallocate(PassRefPtr<StringImpl> originalString,unsigned length)321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length)
322 {
323     ASSERT(originalString->hasOneRef());
324 
325     if (!length)
326         return empty();
327 
328     bool is8Bit = originalString->is8Bit();
329     // Same as createUninitialized() except here we use realloc.
330     size_t size = is8Bit ? allocationSize<LChar>(length) : allocationSize<UChar>(length);
331     originalString->~StringImpl();
332     StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
333     if (is8Bit)
334         return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
335     return adoptRef(new (string) StringImpl(length));
336 }
337 
staticStrings()338 static StaticStringsTable& staticStrings()
339 {
340     DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
341     return staticStrings;
342 }
343 
344 #ifndef NDEBUG
345 static bool s_allowCreationOfStaticStrings = true;
346 #endif
347 
allStaticStrings()348 const StaticStringsTable& StringImpl::allStaticStrings()
349 {
350     return staticStrings();
351 }
352 
freezeStaticStrings()353 void StringImpl::freezeStaticStrings()
354 {
355     ASSERT(isMainThread());
356 
357 #ifndef NDEBUG
358     s_allowCreationOfStaticStrings = false;
359 #endif
360 }
361 
362 unsigned StringImpl::m_highestStaticStringLength = 0;
363 
createStatic(const char * string,unsigned length,unsigned hash)364 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
365 {
366     ASSERT(s_allowCreationOfStaticStrings);
367     ASSERT(string);
368     ASSERT(length);
369 
370     StaticStringsTable::const_iterator it = staticStrings().find(hash);
371     if (it != staticStrings().end()) {
372         ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
373         return it->value;
374     }
375 
376     // Allocate a single buffer large enough to contain the StringImpl
377     // struct as well as the data which it contains. This removes one
378     // heap allocation from this call.
379     RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
380     size_t size = sizeof(StringImpl) + length * sizeof(LChar);
381 
382     WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
383     StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
384 
385     LChar* data = reinterpret_cast<LChar*>(impl + 1);
386     impl = new (impl) StringImpl(length, hash, StaticString);
387     memcpy(data, string, length * sizeof(LChar));
388 #ifndef NDEBUG
389     impl->assertHashIsCorrect();
390 #endif
391 
392     ASSERT(isMainThread());
393     m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
394     staticStrings().add(hash, impl);
395     WTF_ANNOTATE_BENIGN_RACE(impl,
396         "Benign race on the reference counter of a static string created by StringImpl::createStatic");
397 
398     return impl;
399 }
400 
create(const UChar * characters,unsigned length)401 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
402 {
403     if (!characters || !length)
404         return empty();
405 
406     UChar* data;
407     RefPtr<StringImpl> string = createUninitialized(length, data);
408     memcpy(data, characters, length * sizeof(UChar));
409     return string.release();
410 }
411 
create(const LChar * characters,unsigned length)412 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
413 {
414     if (!characters || !length)
415         return empty();
416 
417     LChar* data;
418     RefPtr<StringImpl> string = createUninitialized(length, data);
419     memcpy(data, characters, length * sizeof(LChar));
420     return string.release();
421 }
422 
create8BitIfPossible(const UChar * characters,unsigned length)423 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
424 {
425     if (!characters || !length)
426         return empty();
427 
428     LChar* data;
429     RefPtr<StringImpl> string = createUninitialized(length, data);
430 
431     for (size_t i = 0; i < length; ++i) {
432         if (characters[i] & 0xff00)
433             return create(characters, length);
434         data[i] = static_cast<LChar>(characters[i]);
435     }
436 
437     return string.release();
438 }
439 
create(const LChar * string)440 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
441 {
442     if (!string)
443         return empty();
444     size_t length = strlen(reinterpret_cast<const char*>(string));
445     RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
446     return create(string, length);
447 }
448 
containsOnlyWhitespace()449 bool StringImpl::containsOnlyWhitespace()
450 {
451     // FIXME: The definition of whitespace here includes a number of characters
452     // that are not whitespace from the point of view of RenderText; I wonder if
453     // that's a problem in practice.
454     if (is8Bit()) {
455         for (unsigned i = 0; i < m_length; ++i) {
456             UChar c = characters8()[i];
457             if (!isASCIISpace(c))
458                 return false;
459         }
460 
461         return true;
462     }
463 
464     for (unsigned i = 0; i < m_length; ++i) {
465         UChar c = characters16()[i];
466         if (!isASCIISpace(c))
467             return false;
468     }
469     return true;
470 }
471 
substring(unsigned start,unsigned length)472 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
473 {
474     if (start >= m_length)
475         return empty();
476     unsigned maxLength = m_length - start;
477     if (length >= maxLength) {
478         if (!start)
479             return this;
480         length = maxLength;
481     }
482     if (is8Bit())
483         return create(characters8() + start, length);
484 
485     return create(characters16() + start, length);
486 }
487 
characterStartingAt(unsigned i)488 UChar32 StringImpl::characterStartingAt(unsigned i)
489 {
490     if (is8Bit())
491         return characters8()[i];
492     if (U16_IS_SINGLE(characters16()[i]))
493         return characters16()[i];
494     if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
495         return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
496     return 0;
497 }
498 
lower()499 PassRefPtr<StringImpl> StringImpl::lower()
500 {
501     // Note: This is a hot function in the Dromaeo benchmark, specifically the
502     // no-op code path up through the first 'return' statement.
503 
504     // First scan the string for uppercase and non-ASCII characters:
505     bool noUpper = true;
506     UChar ored = 0;
507     if (is8Bit()) {
508         const LChar* end = characters8() + m_length;
509         for (const LChar* chp = characters8(); chp != end; ++chp) {
510             if (UNLIKELY(isASCIIUpper(*chp)))
511                 noUpper = false;
512             ored |= *chp;
513         }
514         // Nothing to do if the string is all ASCII with no uppercase.
515         if (noUpper && !(ored & ~0x7F))
516             return this;
517 
518         RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
519         int32_t length = m_length;
520 
521         LChar* data8;
522         RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
523 
524         if (!(ored & ~0x7F)) {
525             for (int32_t i = 0; i < length; ++i)
526                 data8[i] = toASCIILower(characters8()[i]);
527 
528             return newImpl.release();
529         }
530 
531         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
532         for (int32_t i = 0; i < length; ++i)
533             data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
534 
535         return newImpl.release();
536     }
537 
538     const UChar* end = characters16() + m_length;
539     for (const UChar* chp = characters16(); chp != end; ++chp) {
540         if (UNLIKELY(isASCIIUpper(*chp)))
541             noUpper = false;
542         ored |= *chp;
543     }
544     // Nothing to do if the string is all ASCII with no uppercase.
545     if (noUpper && !(ored & ~0x7F))
546         return this;
547 
548     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
549     int32_t length = m_length;
550 
551     if (!(ored & ~0x7F)) {
552         UChar* data16;
553         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
554 
555         for (int32_t i = 0; i < length; ++i) {
556             UChar c = characters16()[i];
557             data16[i] = toASCIILower(c);
558         }
559         return newImpl.release();
560     }
561 
562     // Do a slower implementation for cases that include non-ASCII characters.
563     UChar* data16;
564     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
565 
566     bool error;
567     int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
568     if (!error && realLength == length)
569         return newImpl.release();
570 
571     newImpl = createUninitialized(realLength, data16);
572     Unicode::toLower(data16, realLength, characters16(), m_length, &error);
573     if (error)
574         return this;
575     return newImpl.release();
576 }
577 
upper()578 PassRefPtr<StringImpl> StringImpl::upper()
579 {
580     // This function could be optimized for no-op cases the way lower() is,
581     // but in empirical testing, few actual calls to upper() are no-ops, so
582     // it wouldn't be worth the extra time for pre-scanning.
583 
584     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
585     int32_t length = m_length;
586 
587     if (is8Bit()) {
588         LChar* data8;
589         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
590 
591         // Do a faster loop for the case where all the characters are ASCII.
592         LChar ored = 0;
593         for (int i = 0; i < length; ++i) {
594             LChar c = characters8()[i];
595             ored |= c;
596             data8[i] = toASCIIUpper(c);
597         }
598         if (!(ored & ~0x7F))
599             return newImpl.release();
600 
601         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
602         int numberSharpSCharacters = 0;
603 
604         // There are two special cases.
605         //  1. latin-1 characters when converted to upper case are 16 bit characters.
606         //  2. Lower case sharp-S converts to "SS" (two characters)
607         for (int32_t i = 0; i < length; ++i) {
608             LChar c = characters8()[i];
609             if (UNLIKELY(c == smallLetterSharpS))
610                 ++numberSharpSCharacters;
611             UChar upper = Unicode::toUpper(c);
612             if (UNLIKELY(upper > 0xff)) {
613                 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
614                 goto upconvert;
615             }
616             data8[i] = static_cast<LChar>(upper);
617         }
618 
619         if (!numberSharpSCharacters)
620             return newImpl.release();
621 
622         // We have numberSSCharacters sharp-s characters, but none of the other special characters.
623         newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
624 
625         LChar* dest = data8;
626 
627         for (int32_t i = 0; i < length; ++i) {
628             LChar c = characters8()[i];
629             if (c == smallLetterSharpS) {
630                 *dest++ = 'S';
631                 *dest++ = 'S';
632             } else
633                 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
634         }
635 
636         return newImpl.release();
637     }
638 
639 upconvert:
640     RefPtr<StringImpl> upconverted = upconvertedString();
641     const UChar* source16 = upconverted->characters16();
642 
643     UChar* data16;
644     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
645 
646     // Do a faster loop for the case where all the characters are ASCII.
647     UChar ored = 0;
648     for (int i = 0; i < length; ++i) {
649         UChar c = source16[i];
650         ored |= c;
651         data16[i] = toASCIIUpper(c);
652     }
653     if (!(ored & ~0x7F))
654         return newImpl.release();
655 
656     // Do a slower implementation for cases that include non-ASCII characters.
657     bool error;
658     int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
659     if (!error && realLength == length)
660         return newImpl;
661     newImpl = createUninitialized(realLength, data16);
662     Unicode::toUpper(data16, realLength, source16, m_length, &error);
663     if (error)
664         return this;
665     return newImpl.release();
666 }
667 
localeIdMatchesLang(const AtomicString & localeId,const char * lang)668 static bool inline localeIdMatchesLang(const AtomicString& localeId, const char* lang)
669 {
670     if (equalIgnoringCase(localeId, lang))
671         return true;
672     static char localeIdPrefix[4];
673     static const char delimeter[4] = "-_@";
674 
675     size_t langLength = strlen(lang);
676     RELEASE_ASSERT(langLength >= 2 && langLength <= 3);
677     strncpy(localeIdPrefix, lang, langLength);
678     for (int i = 0; i < 3; ++i) {
679         localeIdPrefix[langLength] = delimeter[i];
680         // case-insensitive comparison
681         if (localeId.impl() && localeId.impl()->startsWith(localeIdPrefix, langLength + 1, false))
682             return true;
683     }
684     return false;
685 }
686 
687 typedef int32_t (*icuCaseConverter)(UChar*, int32_t, const UChar*, int32_t, const char*, UErrorCode*);
688 
caseConvert(const UChar * source16,size_t length,icuCaseConverter converter,const char * locale,StringImpl * originalString)689 static PassRefPtr<StringImpl> caseConvert(const UChar* source16, size_t length, icuCaseConverter converter, const char* locale, StringImpl* originalString)
690 {
691     UChar* data16;
692     int32_t targetLength = length;
693     RefPtr<StringImpl> output = StringImpl::createUninitialized(length, data16);
694     do {
695         UErrorCode status = U_ZERO_ERROR;
696         targetLength = converter(data16, targetLength, source16, length, locale, &status);
697         if (U_SUCCESS(status)) {
698             output->truncateAssumingIsolated(targetLength);
699             return output.release();
700         }
701         if (status != U_BUFFER_OVERFLOW_ERROR)
702             return originalString;
703         // Expand the buffer.
704         output = StringImpl::createUninitialized(targetLength, data16);
705     } while (true);
706 }
707 
lower(const AtomicString & localeIdentifier)708 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
709 {
710     // Use the more-optimized code path most of the time.
711     // Only Turkic (tr and az) languages and Lithuanian requires
712     // locale-specific lowercasing rules. Even though CLDR has el-Lower,
713     // it's identical to the locale-agnostic lowercasing. Context-dependent
714     // handling of Greek capital sigma is built into the common lowercasing
715     // function in ICU.
716     const char* localeForConversion = 0;
717     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
718         localeForConversion = "tr";
719     else if (localeIdMatchesLang(localeIdentifier, "lt"))
720         localeForConversion = "lt";
721     else
722         return lower();
723 
724     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
725         CRASH();
726     int length = m_length;
727 
728     RefPtr<StringImpl> upconverted = upconvertedString();
729     const UChar* source16 = upconverted->characters16();
730     return caseConvert(source16, length, u_strToLower, localeForConversion, this);
731 }
732 
upper(const AtomicString & localeIdentifier)733 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
734 {
735     // Use the more-optimized code path most of the time.
736     // Only Turkic (tr and az) languages and Greek require locale-specific
737     // lowercasing rules.
738     icu::UnicodeString transliteratorId;
739     const char* localeForConversion = 0;
740     if (localeIdMatchesLang(localeIdentifier, "tr") || localeIdMatchesLang(localeIdentifier, "az"))
741         localeForConversion = "tr";
742     else if (localeIdMatchesLang(localeIdentifier, "el"))
743         transliteratorId = UNICODE_STRING_SIMPLE("el-Upper");
744     else if (localeIdMatchesLang(localeIdentifier, "lt"))
745         localeForConversion = "lt";
746     else
747         return upper();
748 
749     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
750         CRASH();
751     int length = m_length;
752 
753     RefPtr<StringImpl> upconverted = upconvertedString();
754     const UChar* source16 = upconverted->characters16();
755 
756     if (localeForConversion)
757         return caseConvert(source16, length, u_strToUpper, localeForConversion, this);
758 
759     // TODO(jungshik): Cache transliterator if perf penaly warrants it for Greek.
760     UErrorCode status = U_ZERO_ERROR;
761     OwnPtr<icu::Transliterator> translit =
762         adoptPtr(icu::Transliterator::createInstance(transliteratorId, UTRANS_FORWARD, status));
763     if (U_FAILURE(status))
764         return upper();
765 
766     // target will be copy-on-write.
767     icu::UnicodeString target(false, source16, length);
768     translit->transliterate(target);
769 
770     return create(target.getBuffer(), target.length());
771 }
772 
fill(UChar character)773 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
774 {
775     if (!(character & ~0x7F)) {
776         LChar* data;
777         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
778         for (unsigned i = 0; i < m_length; ++i)
779             data[i] = character;
780         return newImpl.release();
781     }
782     UChar* data;
783     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
784     for (unsigned i = 0; i < m_length; ++i)
785         data[i] = character;
786     return newImpl.release();
787 }
788 
foldCase()789 PassRefPtr<StringImpl> StringImpl::foldCase()
790 {
791     RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
792     int32_t length = m_length;
793 
794     if (is8Bit()) {
795         // Do a faster loop for the case where all the characters are ASCII.
796         LChar* data;
797         RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
798         LChar ored = 0;
799 
800         for (int32_t i = 0; i < length; ++i) {
801             LChar c = characters8()[i];
802             data[i] = toASCIILower(c);
803             ored |= c;
804         }
805 
806         if (!(ored & ~0x7F))
807             return newImpl.release();
808 
809         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
810         for (int32_t i = 0; i < length; ++i)
811             data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
812 
813         return newImpl.release();
814     }
815 
816     // Do a faster loop for the case where all the characters are ASCII.
817     UChar* data;
818     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
819     UChar ored = 0;
820     for (int32_t i = 0; i < length; ++i) {
821         UChar c = characters16()[i];
822         ored |= c;
823         data[i] = toASCIILower(c);
824     }
825     if (!(ored & ~0x7F))
826         return newImpl.release();
827 
828     // Do a slower implementation for cases that include non-ASCII characters.
829     bool error;
830     int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
831     if (!error && realLength == length)
832         return newImpl.release();
833     newImpl = createUninitialized(realLength, data);
834     Unicode::foldCase(data, realLength, characters16(), m_length, &error);
835     if (error)
836         return this;
837     return newImpl.release();
838 }
839 
840 template <class UCharPredicate>
stripMatchedCharacters(UCharPredicate predicate)841 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
842 {
843     if (!m_length)
844         return empty();
845 
846     unsigned start = 0;
847     unsigned end = m_length - 1;
848 
849     // skip white space from start
850     while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
851         ++start;
852 
853     // only white space
854     if (start > end)
855         return empty();
856 
857     // skip white space from end
858     while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
859         --end;
860 
861     if (!start && end == m_length - 1)
862         return this;
863     if (is8Bit())
864         return create(characters8() + start, end + 1 - start);
865     return create(characters16() + start, end + 1 - start);
866 }
867 
868 class UCharPredicate {
869 public:
UCharPredicate(CharacterMatchFunctionPtr function)870     inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
871 
operator ()(UChar ch) const872     inline bool operator()(UChar ch) const
873     {
874         return m_function(ch);
875     }
876 
877 private:
878     const CharacterMatchFunctionPtr m_function;
879 };
880 
881 class SpaceOrNewlinePredicate {
882 public:
operator ()(UChar ch) const883     inline bool operator()(UChar ch) const
884     {
885         return isSpaceOrNewline(ch);
886     }
887 };
888 
stripWhiteSpace()889 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
890 {
891     return stripMatchedCharacters(SpaceOrNewlinePredicate());
892 }
893 
stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)894 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
895 {
896     return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
897 }
898 
899 template <typename CharType>
removeCharacters(const CharType * characters,CharacterMatchFunctionPtr findMatch)900 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
901 {
902     const CharType* from = characters;
903     const CharType* fromend = from + m_length;
904 
905     // Assume the common case will not remove any characters
906     while (from != fromend && !findMatch(*from))
907         ++from;
908     if (from == fromend)
909         return this;
910 
911     StringBuffer<CharType> data(m_length);
912     CharType* to = data.characters();
913     unsigned outc = from - characters;
914 
915     if (outc)
916         memcpy(to, characters, outc * sizeof(CharType));
917 
918     while (true) {
919         while (from != fromend && findMatch(*from))
920             ++from;
921         while (from != fromend && !findMatch(*from))
922             to[outc++] = *from++;
923         if (from == fromend)
924             break;
925     }
926 
927     data.shrink(outc);
928 
929     return data.release();
930 }
931 
removeCharacters(CharacterMatchFunctionPtr findMatch)932 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
933 {
934     if (is8Bit())
935         return removeCharacters(characters8(), findMatch);
936     return removeCharacters(characters16(), findMatch);
937 }
938 
939 template <typename CharType, class UCharPredicate>
simplifyMatchedCharactersToSpace(UCharPredicate predicate,StripBehavior stripBehavior)940 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
941 {
942     StringBuffer<CharType> data(m_length);
943 
944     const CharType* from = getCharacters<CharType>();
945     const CharType* fromend = from + m_length;
946     int outc = 0;
947     bool changedToSpace = false;
948 
949     CharType* to = data.characters();
950 
951     if (stripBehavior == StripExtraWhiteSpace) {
952         while (true) {
953             while (from != fromend && predicate(*from)) {
954                 if (*from != ' ')
955                     changedToSpace = true;
956                 ++from;
957             }
958             while (from != fromend && !predicate(*from))
959                 to[outc++] = *from++;
960             if (from != fromend)
961                 to[outc++] = ' ';
962             else
963                 break;
964         }
965 
966         if (outc > 0 && to[outc - 1] == ' ')
967             --outc;
968     } else {
969         for (; from != fromend; ++from) {
970             if (predicate(*from)) {
971                 if (*from != ' ')
972                     changedToSpace = true;
973                 to[outc++] = ' ';
974             } else {
975                 to[outc++] = *from;
976             }
977         }
978     }
979 
980     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
981         return this;
982 
983     data.shrink(outc);
984 
985     return data.release();
986 }
987 
simplifyWhiteSpace(StripBehavior stripBehavior)988 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
989 {
990     if (is8Bit())
991         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
992     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
993 }
994 
simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace,StripBehavior stripBehavior)995 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
996 {
997     if (is8Bit())
998         return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
999     return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
1000 }
1001 
toIntStrict(bool * ok,int base)1002 int StringImpl::toIntStrict(bool* ok, int base)
1003 {
1004     if (is8Bit())
1005         return charactersToIntStrict(characters8(), m_length, ok, base);
1006     return charactersToIntStrict(characters16(), m_length, ok, base);
1007 }
1008 
toUIntStrict(bool * ok,int base)1009 unsigned StringImpl::toUIntStrict(bool* ok, int base)
1010 {
1011     if (is8Bit())
1012         return charactersToUIntStrict(characters8(), m_length, ok, base);
1013     return charactersToUIntStrict(characters16(), m_length, ok, base);
1014 }
1015 
toInt64Strict(bool * ok,int base)1016 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1017 {
1018     if (is8Bit())
1019         return charactersToInt64Strict(characters8(), m_length, ok, base);
1020     return charactersToInt64Strict(characters16(), m_length, ok, base);
1021 }
1022 
toUInt64Strict(bool * ok,int base)1023 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1024 {
1025     if (is8Bit())
1026         return charactersToUInt64Strict(characters8(), m_length, ok, base);
1027     return charactersToUInt64Strict(characters16(), m_length, ok, base);
1028 }
1029 
toIntPtrStrict(bool * ok,int base)1030 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1031 {
1032     if (is8Bit())
1033         return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1034     return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1035 }
1036 
toInt(bool * ok)1037 int StringImpl::toInt(bool* ok)
1038 {
1039     if (is8Bit())
1040         return charactersToInt(characters8(), m_length, ok);
1041     return charactersToInt(characters16(), m_length, ok);
1042 }
1043 
toUInt(bool * ok)1044 unsigned StringImpl::toUInt(bool* ok)
1045 {
1046     if (is8Bit())
1047         return charactersToUInt(characters8(), m_length, ok);
1048     return charactersToUInt(characters16(), m_length, ok);
1049 }
1050 
toInt64(bool * ok)1051 int64_t StringImpl::toInt64(bool* ok)
1052 {
1053     if (is8Bit())
1054         return charactersToInt64(characters8(), m_length, ok);
1055     return charactersToInt64(characters16(), m_length, ok);
1056 }
1057 
toUInt64(bool * ok)1058 uint64_t StringImpl::toUInt64(bool* ok)
1059 {
1060     if (is8Bit())
1061         return charactersToUInt64(characters8(), m_length, ok);
1062     return charactersToUInt64(characters16(), m_length, ok);
1063 }
1064 
toIntPtr(bool * ok)1065 intptr_t StringImpl::toIntPtr(bool* ok)
1066 {
1067     if (is8Bit())
1068         return charactersToIntPtr(characters8(), m_length, ok);
1069     return charactersToIntPtr(characters16(), m_length, ok);
1070 }
1071 
toDouble(bool * ok)1072 double StringImpl::toDouble(bool* ok)
1073 {
1074     if (is8Bit())
1075         return charactersToDouble(characters8(), m_length, ok);
1076     return charactersToDouble(characters16(), m_length, ok);
1077 }
1078 
toFloat(bool * ok)1079 float StringImpl::toFloat(bool* ok)
1080 {
1081     if (is8Bit())
1082         return charactersToFloat(characters8(), m_length, ok);
1083     return charactersToFloat(characters16(), m_length, ok);
1084 }
1085 
equalIgnoringCase(const LChar * a,const LChar * b,unsigned length)1086 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1087 {
1088     while (length--) {
1089         LChar bc = *b++;
1090         if (foldCase(*a++) != foldCase(bc))
1091             return false;
1092     }
1093     return true;
1094 }
1095 
equalIgnoringCase(const UChar * a,const LChar * b,unsigned length)1096 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1097 {
1098     while (length--) {
1099         LChar bc = *b++;
1100         if (foldCase(*a++) != foldCase(bc))
1101             return false;
1102     }
1103     return true;
1104 }
1105 
find(CharacterMatchFunctionPtr matchFunction,unsigned start)1106 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1107 {
1108     if (is8Bit())
1109         return WTF::find(characters8(), m_length, matchFunction, start);
1110     return WTF::find(characters16(), m_length, matchFunction, start);
1111 }
1112 
find(const LChar * matchString,unsigned index)1113 size_t StringImpl::find(const LChar* matchString, unsigned index)
1114 {
1115     // Check for null or empty string to match against
1116     if (!matchString)
1117         return kNotFound;
1118     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1119     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1120     unsigned matchLength = matchStringLength;
1121     if (!matchLength)
1122         return min(index, length());
1123 
1124     // Optimization 1: fast case for strings of length 1.
1125     if (matchLength == 1)
1126         return WTF::find(characters16(), length(), *matchString, index);
1127 
1128     // Check index & matchLength are in range.
1129     if (index > length())
1130         return kNotFound;
1131     unsigned searchLength = length() - index;
1132     if (matchLength > searchLength)
1133         return kNotFound;
1134     // delta is the number of additional times to test; delta == 0 means test only once.
1135     unsigned delta = searchLength - matchLength;
1136 
1137     const UChar* searchCharacters = characters16() + index;
1138 
1139     // Optimization 2: keep a running hash of the strings,
1140     // only call equal if the hashes match.
1141     unsigned searchHash = 0;
1142     unsigned matchHash = 0;
1143     for (unsigned i = 0; i < matchLength; ++i) {
1144         searchHash += searchCharacters[i];
1145         matchHash += matchString[i];
1146     }
1147 
1148     unsigned i = 0;
1149     // keep looping until we match
1150     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1151         if (i == delta)
1152             return kNotFound;
1153         searchHash += searchCharacters[i + matchLength];
1154         searchHash -= searchCharacters[i];
1155         ++i;
1156     }
1157     return index + i;
1158 }
1159 
1160 template<typename CharType>
findIgnoringCaseInternal(const CharType * searchCharacters,const LChar * matchString,unsigned index,unsigned searchLength,unsigned matchLength)1161 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1162 {
1163     // delta is the number of additional times to test; delta == 0 means test only once.
1164     unsigned delta = searchLength - matchLength;
1165 
1166     unsigned i = 0;
1167     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1168         if (i == delta)
1169             return kNotFound;
1170         ++i;
1171     }
1172     return index + i;
1173 }
1174 
findIgnoringCase(const LChar * matchString,unsigned index)1175 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1176 {
1177     // Check for null or empty string to match against
1178     if (!matchString)
1179         return kNotFound;
1180     size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1181     RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1182     unsigned matchLength = matchStringLength;
1183     if (!matchLength)
1184         return min(index, length());
1185 
1186     // Check index & matchLength are in range.
1187     if (index > length())
1188         return kNotFound;
1189     unsigned searchLength = length() - index;
1190     if (matchLength > searchLength)
1191         return kNotFound;
1192 
1193     if (is8Bit())
1194         return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1195     return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1196 }
1197 
1198 template <typename SearchCharacterType, typename MatchCharacterType>
findInternal(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1199 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1200 {
1201     // Optimization: keep a running hash of the strings,
1202     // only call equal() if the hashes match.
1203 
1204     // delta is the number of additional times to test; delta == 0 means test only once.
1205     unsigned delta = searchLength - matchLength;
1206 
1207     unsigned searchHash = 0;
1208     unsigned matchHash = 0;
1209 
1210     for (unsigned i = 0; i < matchLength; ++i) {
1211         searchHash += searchCharacters[i];
1212         matchHash += matchCharacters[i];
1213     }
1214 
1215     unsigned i = 0;
1216     // keep looping until we match
1217     while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1218         if (i == delta)
1219             return kNotFound;
1220         searchHash += searchCharacters[i + matchLength];
1221         searchHash -= searchCharacters[i];
1222         ++i;
1223     }
1224     return index + i;
1225 }
1226 
find(StringImpl * matchString)1227 size_t StringImpl::find(StringImpl* matchString)
1228 {
1229     // Check for null string to match against
1230     if (UNLIKELY(!matchString))
1231         return kNotFound;
1232     unsigned matchLength = matchString->length();
1233 
1234     // Optimization 1: fast case for strings of length 1.
1235     if (matchLength == 1) {
1236         if (is8Bit()) {
1237             if (matchString->is8Bit())
1238                 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1239             return WTF::find(characters8(), length(), matchString->characters16()[0]);
1240         }
1241         if (matchString->is8Bit())
1242             return WTF::find(characters16(), length(), matchString->characters8()[0]);
1243         return WTF::find(characters16(), length(), matchString->characters16()[0]);
1244     }
1245 
1246     // Check matchLength is in range.
1247     if (matchLength > length())
1248         return kNotFound;
1249 
1250     // Check for empty string to match against
1251     if (UNLIKELY(!matchLength))
1252         return 0;
1253 
1254     if (is8Bit()) {
1255         if (matchString->is8Bit())
1256             return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1257         return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1258     }
1259 
1260     if (matchString->is8Bit())
1261         return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1262 
1263     return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1264 }
1265 
find(StringImpl * matchString,unsigned index)1266 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1267 {
1268     // Check for null or empty string to match against
1269     if (UNLIKELY(!matchString))
1270         return kNotFound;
1271 
1272     unsigned matchLength = matchString->length();
1273 
1274     // Optimization 1: fast case for strings of length 1.
1275     if (matchLength == 1) {
1276         if (is8Bit())
1277             return WTF::find(characters8(), length(), (*matchString)[0], index);
1278         return WTF::find(characters16(), length(), (*matchString)[0], index);
1279     }
1280 
1281     if (UNLIKELY(!matchLength))
1282         return min(index, length());
1283 
1284     // Check index & matchLength are in range.
1285     if (index > length())
1286         return kNotFound;
1287     unsigned searchLength = length() - index;
1288     if (matchLength > searchLength)
1289         return kNotFound;
1290 
1291     if (is8Bit()) {
1292         if (matchString->is8Bit())
1293             return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1294         return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1295     }
1296 
1297     if (matchString->is8Bit())
1298         return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1299 
1300     return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1301 }
1302 
1303 template <typename SearchCharacterType, typename MatchCharacterType>
findIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1304 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1305 {
1306     // delta is the number of additional times to test; delta == 0 means test only once.
1307     unsigned delta = searchLength - matchLength;
1308 
1309     unsigned i = 0;
1310     // keep looping until we match
1311     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1312         if (i == delta)
1313             return kNotFound;
1314         ++i;
1315     }
1316     return index + i;
1317 }
1318 
findIgnoringCase(StringImpl * matchString,unsigned index)1319 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1320 {
1321     // Check for null or empty string to match against
1322     if (!matchString)
1323         return kNotFound;
1324     unsigned matchLength = matchString->length();
1325     if (!matchLength)
1326         return min(index, length());
1327 
1328     // Check index & matchLength are in range.
1329     if (index > length())
1330         return kNotFound;
1331     unsigned searchLength = length() - index;
1332     if (matchLength > searchLength)
1333         return kNotFound;
1334 
1335     if (is8Bit()) {
1336         if (matchString->is8Bit())
1337             return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1338         return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1339     }
1340 
1341     if (matchString->is8Bit())
1342         return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1343 
1344     return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1345 }
1346 
findNextLineStart(unsigned index)1347 size_t StringImpl::findNextLineStart(unsigned index)
1348 {
1349     if (is8Bit())
1350         return WTF::findNextLineStart(characters8(), m_length, index);
1351     return WTF::findNextLineStart(characters16(), m_length, index);
1352 }
1353 
count(LChar c) const1354 size_t StringImpl::count(LChar c) const
1355 {
1356     int count = 0;
1357     if (is8Bit()) {
1358         for (size_t i = 0; i < m_length; ++i)
1359             count += characters8()[i] == c;
1360     } else {
1361         for (size_t i = 0; i < m_length; ++i)
1362             count += characters16()[i] == c;
1363     }
1364     return count;
1365 }
1366 
reverseFind(UChar c,unsigned index)1367 size_t StringImpl::reverseFind(UChar c, unsigned index)
1368 {
1369     if (is8Bit())
1370         return WTF::reverseFind(characters8(), m_length, c, index);
1371     return WTF::reverseFind(characters16(), m_length, c, index);
1372 }
1373 
1374 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1375 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1376 {
1377     // Optimization: keep a running hash of the strings,
1378     // only call equal if the hashes match.
1379 
1380     // delta is the number of additional times to test; delta == 0 means test only once.
1381     unsigned delta = min(index, length - matchLength);
1382 
1383     unsigned searchHash = 0;
1384     unsigned matchHash = 0;
1385     for (unsigned i = 0; i < matchLength; ++i) {
1386         searchHash += searchCharacters[delta + i];
1387         matchHash += matchCharacters[i];
1388     }
1389 
1390     // keep looping until we match
1391     while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1392         if (!delta)
1393             return kNotFound;
1394         --delta;
1395         searchHash -= searchCharacters[delta + matchLength];
1396         searchHash += searchCharacters[delta];
1397     }
1398     return delta;
1399 }
1400 
reverseFind(StringImpl * matchString,unsigned index)1401 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1402 {
1403     // Check for null or empty string to match against
1404     if (!matchString)
1405         return kNotFound;
1406     unsigned matchLength = matchString->length();
1407     unsigned ourLength = length();
1408     if (!matchLength)
1409         return min(index, ourLength);
1410 
1411     // Optimization 1: fast case for strings of length 1.
1412     if (matchLength == 1) {
1413         if (is8Bit())
1414             return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1415         return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1416     }
1417 
1418     // Check index & matchLength are in range.
1419     if (matchLength > ourLength)
1420         return kNotFound;
1421 
1422     if (is8Bit()) {
1423         if (matchString->is8Bit())
1424             return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1425         return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1426     }
1427 
1428     if (matchString->is8Bit())
1429         return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1430 
1431     return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1432 }
1433 
1434 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1435 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1436 {
1437     // delta is the number of additional times to test; delta == 0 means test only once.
1438     unsigned delta = min(index, length - matchLength);
1439 
1440     // keep looping until we match
1441     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1442         if (!delta)
1443             return kNotFound;
1444         --delta;
1445     }
1446     return delta;
1447 }
1448 
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)1449 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1450 {
1451     // Check for null or empty string to match against
1452     if (!matchString)
1453         return kNotFound;
1454     unsigned matchLength = matchString->length();
1455     unsigned ourLength = length();
1456     if (!matchLength)
1457         return min(index, ourLength);
1458 
1459     // Check index & matchLength are in range.
1460     if (matchLength > ourLength)
1461         return kNotFound;
1462 
1463     if (is8Bit()) {
1464         if (matchString->is8Bit())
1465             return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1466         return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1467     }
1468 
1469     if (matchString->is8Bit())
1470         return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1471 
1472     return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1473 }
1474 
equalInner(const StringImpl * stringImpl,unsigned startOffset,const char * matchString,unsigned matchLength,bool caseSensitive)1475 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1476 {
1477     ASSERT(stringImpl);
1478     ASSERT(matchLength <= stringImpl->length());
1479     ASSERT(startOffset + matchLength <= stringImpl->length());
1480 
1481     if (caseSensitive) {
1482         if (stringImpl->is8Bit())
1483             return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1484         return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1485     }
1486     if (stringImpl->is8Bit())
1487         return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1488     return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1489 }
1490 
startsWith(UChar character) const1491 bool StringImpl::startsWith(UChar character) const
1492 {
1493     return m_length && (*this)[0] == character;
1494 }
1495 
startsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1496 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1497 {
1498     ASSERT(matchLength);
1499     if (matchLength > length())
1500         return false;
1501     return equalInner(this, 0, matchString, matchLength, caseSensitive);
1502 }
1503 
endsWith(StringImpl * matchString,bool caseSensitive)1504 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1505 {
1506     ASSERT(matchString);
1507     if (m_length >= matchString->m_length) {
1508         unsigned start = m_length - matchString->m_length;
1509         return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1510     }
1511     return false;
1512 }
1513 
endsWith(UChar character) const1514 bool StringImpl::endsWith(UChar character) const
1515 {
1516     return m_length && (*this)[m_length - 1] == character;
1517 }
1518 
endsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1519 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1520 {
1521     ASSERT(matchLength);
1522     if (matchLength > length())
1523         return false;
1524     unsigned startOffset = length() - matchLength;
1525     return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1526 }
1527 
replace(UChar oldC,UChar newC)1528 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1529 {
1530     if (oldC == newC)
1531         return this;
1532 
1533     if (find(oldC) == kNotFound)
1534         return this;
1535 
1536     unsigned i;
1537     if (is8Bit()) {
1538         if (newC <= 0xff) {
1539             LChar* data;
1540             LChar oldChar = static_cast<LChar>(oldC);
1541             LChar newChar = static_cast<LChar>(newC);
1542 
1543             RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1544 
1545             for (i = 0; i != m_length; ++i) {
1546                 LChar ch = characters8()[i];
1547                 if (ch == oldChar)
1548                     ch = newChar;
1549                 data[i] = ch;
1550             }
1551             return newImpl.release();
1552         }
1553 
1554         // There is the possibility we need to up convert from 8 to 16 bit,
1555         // create a 16 bit string for the result.
1556         UChar* data;
1557         RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1558 
1559         for (i = 0; i != m_length; ++i) {
1560             UChar ch = characters8()[i];
1561             if (ch == oldC)
1562                 ch = newC;
1563             data[i] = ch;
1564         }
1565 
1566         return newImpl.release();
1567     }
1568 
1569     UChar* data;
1570     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1571 
1572     for (i = 0; i != m_length; ++i) {
1573         UChar ch = characters16()[i];
1574         if (ch == oldC)
1575             ch = newC;
1576         data[i] = ch;
1577     }
1578     return newImpl.release();
1579 }
1580 
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)1581 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1582 {
1583     position = min(position, length());
1584     lengthToReplace = min(lengthToReplace, length() - position);
1585     unsigned lengthToInsert = str ? str->length() : 0;
1586     if (!lengthToReplace && !lengthToInsert)
1587         return this;
1588 
1589     RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1590 
1591     if (is8Bit() && (!str || str->is8Bit())) {
1592         LChar* data;
1593         RefPtr<StringImpl> newImpl =
1594         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1595         memcpy(data, characters8(), position * sizeof(LChar));
1596         if (str)
1597             memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1598         memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1599                (length() - position - lengthToReplace) * sizeof(LChar));
1600         return newImpl.release();
1601     }
1602     UChar* data;
1603     RefPtr<StringImpl> newImpl =
1604         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1605     if (is8Bit())
1606         for (unsigned i = 0; i < position; ++i)
1607             data[i] = characters8()[i];
1608     else
1609         memcpy(data, characters16(), position * sizeof(UChar));
1610     if (str) {
1611         if (str->is8Bit())
1612             for (unsigned i = 0; i < lengthToInsert; ++i)
1613                 data[i + position] = str->characters8()[i];
1614         else
1615             memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1616     }
1617     if (is8Bit()) {
1618         for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1619             data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1620     } else {
1621         memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1622             (length() - position - lengthToReplace) * sizeof(UChar));
1623     }
1624     return newImpl.release();
1625 }
1626 
replace(UChar pattern,StringImpl * replacement)1627 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1628 {
1629     if (!replacement)
1630         return this;
1631 
1632     if (replacement->is8Bit())
1633         return replace(pattern, replacement->characters8(), replacement->length());
1634 
1635     return replace(pattern, replacement->characters16(), replacement->length());
1636 }
1637 
replace(UChar pattern,const LChar * replacement,unsigned repStrLength)1638 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1639 {
1640     ASSERT(replacement);
1641 
1642     size_t srcSegmentStart = 0;
1643     unsigned matchCount = 0;
1644 
1645     // Count the matches.
1646     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1647         ++matchCount;
1648         ++srcSegmentStart;
1649     }
1650 
1651     // If we have 0 matches then we don't have to do any more work.
1652     if (!matchCount)
1653         return this;
1654 
1655     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1656 
1657     unsigned replaceSize = matchCount * repStrLength;
1658     unsigned newSize = m_length - matchCount;
1659     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1660 
1661     newSize += replaceSize;
1662 
1663     // Construct the new data.
1664     size_t srcSegmentEnd;
1665     unsigned srcSegmentLength;
1666     srcSegmentStart = 0;
1667     unsigned dstOffset = 0;
1668 
1669     if (is8Bit()) {
1670         LChar* data;
1671         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1672 
1673         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1674             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1675             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1676             dstOffset += srcSegmentLength;
1677             memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1678             dstOffset += repStrLength;
1679             srcSegmentStart = srcSegmentEnd + 1;
1680         }
1681 
1682         srcSegmentLength = m_length - srcSegmentStart;
1683         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1684 
1685         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1686 
1687         return newImpl.release();
1688     }
1689 
1690     UChar* data;
1691     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1692 
1693     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1694         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1695         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1696 
1697         dstOffset += srcSegmentLength;
1698         for (unsigned i = 0; i < repStrLength; ++i)
1699             data[i + dstOffset] = replacement[i];
1700 
1701         dstOffset += repStrLength;
1702         srcSegmentStart = srcSegmentEnd + 1;
1703     }
1704 
1705     srcSegmentLength = m_length - srcSegmentStart;
1706     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1707 
1708     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1709 
1710     return newImpl.release();
1711 }
1712 
replace(UChar pattern,const UChar * replacement,unsigned repStrLength)1713 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1714 {
1715     ASSERT(replacement);
1716 
1717     size_t srcSegmentStart = 0;
1718     unsigned matchCount = 0;
1719 
1720     // Count the matches.
1721     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1722         ++matchCount;
1723         ++srcSegmentStart;
1724     }
1725 
1726     // If we have 0 matches then we don't have to do any more work.
1727     if (!matchCount)
1728         return this;
1729 
1730     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1731 
1732     unsigned replaceSize = matchCount * repStrLength;
1733     unsigned newSize = m_length - matchCount;
1734     RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1735 
1736     newSize += replaceSize;
1737 
1738     // Construct the new data.
1739     size_t srcSegmentEnd;
1740     unsigned srcSegmentLength;
1741     srcSegmentStart = 0;
1742     unsigned dstOffset = 0;
1743 
1744     if (is8Bit()) {
1745         UChar* data;
1746         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1747 
1748         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1749             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1750             for (unsigned i = 0; i < srcSegmentLength; ++i)
1751                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1752 
1753             dstOffset += srcSegmentLength;
1754             memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1755 
1756             dstOffset += repStrLength;
1757             srcSegmentStart = srcSegmentEnd + 1;
1758         }
1759 
1760         srcSegmentLength = m_length - srcSegmentStart;
1761         for (unsigned i = 0; i < srcSegmentLength; ++i)
1762             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1763 
1764         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1765 
1766         return newImpl.release();
1767     }
1768 
1769     UChar* data;
1770     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1771 
1772     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1773         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1774         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1775 
1776         dstOffset += srcSegmentLength;
1777         memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1778 
1779         dstOffset += repStrLength;
1780         srcSegmentStart = srcSegmentEnd + 1;
1781     }
1782 
1783     srcSegmentLength = m_length - srcSegmentStart;
1784     memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1785 
1786     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1787 
1788     return newImpl.release();
1789 }
1790 
replace(StringImpl * pattern,StringImpl * replacement)1791 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1792 {
1793     if (!pattern || !replacement)
1794         return this;
1795 
1796     unsigned patternLength = pattern->length();
1797     if (!patternLength)
1798         return this;
1799 
1800     unsigned repStrLength = replacement->length();
1801     size_t srcSegmentStart = 0;
1802     unsigned matchCount = 0;
1803 
1804     // Count the matches.
1805     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1806         ++matchCount;
1807         srcSegmentStart += patternLength;
1808     }
1809 
1810     // If we have 0 matches, we don't have to do any more work
1811     if (!matchCount)
1812         return this;
1813 
1814     unsigned newSize = m_length - matchCount * patternLength;
1815     RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1816 
1817     RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1818 
1819     newSize += matchCount * repStrLength;
1820 
1821 
1822     // Construct the new data
1823     size_t srcSegmentEnd;
1824     unsigned srcSegmentLength;
1825     srcSegmentStart = 0;
1826     unsigned dstOffset = 0;
1827     bool srcIs8Bit = is8Bit();
1828     bool replacementIs8Bit = replacement->is8Bit();
1829 
1830     // There are 4 cases:
1831     // 1. This and replacement are both 8 bit.
1832     // 2. This and replacement are both 16 bit.
1833     // 3. This is 8 bit and replacement is 16 bit.
1834     // 4. This is 16 bit and replacement is 8 bit.
1835     if (srcIs8Bit && replacementIs8Bit) {
1836         // Case 1
1837         LChar* data;
1838         RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1839         while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1840             srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1841             memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1842             dstOffset += srcSegmentLength;
1843             memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1844             dstOffset += repStrLength;
1845             srcSegmentStart = srcSegmentEnd + patternLength;
1846         }
1847 
1848         srcSegmentLength = m_length - srcSegmentStart;
1849         memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1850 
1851         ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1852 
1853         return newImpl.release();
1854     }
1855 
1856     UChar* data;
1857     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1858     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1859         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1860         if (srcIs8Bit) {
1861             // Case 3.
1862             for (unsigned i = 0; i < srcSegmentLength; ++i)
1863                 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1864         } else {
1865             // Case 2 & 4.
1866             memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1867         }
1868         dstOffset += srcSegmentLength;
1869         if (replacementIs8Bit) {
1870             // Cases 2 & 3.
1871             for (unsigned i = 0; i < repStrLength; ++i)
1872                 data[i + dstOffset] = replacement->characters8()[i];
1873         } else {
1874             // Case 4
1875             memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1876         }
1877         dstOffset += repStrLength;
1878         srcSegmentStart = srcSegmentEnd + patternLength;
1879     }
1880 
1881     srcSegmentLength = m_length - srcSegmentStart;
1882     if (srcIs8Bit) {
1883         // Case 3.
1884         for (unsigned i = 0; i < srcSegmentLength; ++i)
1885             data[i + dstOffset] = characters8()[i + srcSegmentStart];
1886     } else {
1887         // Cases 2 & 4.
1888         memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1889     }
1890 
1891     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1892 
1893     return newImpl.release();
1894 }
1895 
upconvertedString()1896 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1897 {
1898     if (is8Bit())
1899         return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1900     return this;
1901 }
1902 
stringImplContentEqual(const StringImpl * a,const StringImpl * b)1903 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1904 {
1905     unsigned aLength = a->length();
1906     unsigned bLength = b->length();
1907     if (aLength != bLength)
1908         return false;
1909 
1910     if (a->is8Bit()) {
1911         if (b->is8Bit())
1912             return equal(a->characters8(), b->characters8(), aLength);
1913 
1914         return equal(a->characters8(), b->characters16(), aLength);
1915     }
1916 
1917     if (b->is8Bit())
1918         return equal(a->characters16(), b->characters8(), aLength);
1919 
1920     return equal(a->characters16(), b->characters16(), aLength);
1921 }
1922 
equal(const StringImpl * a,const StringImpl * b)1923 bool equal(const StringImpl* a, const StringImpl* b)
1924 {
1925     if (a == b)
1926         return true;
1927     if (!a || !b)
1928         return false;
1929     if (a->isAtomic() && b->isAtomic())
1930         return false;
1931 
1932     return stringImplContentEqual(a, b);
1933 }
1934 
1935 template <typename CharType>
equalInternal(const StringImpl * a,const CharType * b,unsigned length)1936 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1937 {
1938     if (!a)
1939         return !b;
1940     if (!b)
1941         return false;
1942 
1943     if (a->length() != length)
1944         return false;
1945     if (a->is8Bit())
1946         return equal(a->characters8(), b, length);
1947     return equal(a->characters16(), b, length);
1948 }
1949 
equal(const StringImpl * a,const LChar * b,unsigned length)1950 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1951 {
1952     return equalInternal(a, b, length);
1953 }
1954 
equal(const StringImpl * a,const UChar * b,unsigned length)1955 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1956 {
1957     return equalInternal(a, b, length);
1958 }
1959 
equal(const StringImpl * a,const LChar * b)1960 bool equal(const StringImpl* a, const LChar* b)
1961 {
1962     if (!a)
1963         return !b;
1964     if (!b)
1965         return !a;
1966 
1967     unsigned length = a->length();
1968 
1969     if (a->is8Bit()) {
1970         const LChar* aPtr = a->characters8();
1971         for (unsigned i = 0; i != length; ++i) {
1972             LChar bc = b[i];
1973             LChar ac = aPtr[i];
1974             if (!bc)
1975                 return false;
1976             if (ac != bc)
1977                 return false;
1978         }
1979 
1980         return !b[length];
1981     }
1982 
1983     const UChar* aPtr = a->characters16();
1984     for (unsigned i = 0; i != length; ++i) {
1985         LChar bc = b[i];
1986         if (!bc)
1987             return false;
1988         if (aPtr[i] != bc)
1989             return false;
1990     }
1991 
1992     return !b[length];
1993 }
1994 
equalNonNull(const StringImpl * a,const StringImpl * b)1995 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1996 {
1997     ASSERT(a && b);
1998     if (a == b)
1999         return true;
2000 
2001     return stringImplContentEqual(a, b);
2002 }
2003 
equalIgnoringCase(const StringImpl * a,const StringImpl * b)2004 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
2005 {
2006     if (a == b)
2007         return true;
2008     if (!a || !b)
2009         return false;
2010 
2011     return CaseFoldingHash::equal(a, b);
2012 }
2013 
equalIgnoringCase(const StringImpl * a,const LChar * b)2014 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2015 {
2016     if (!a)
2017         return !b;
2018     if (!b)
2019         return !a;
2020 
2021     unsigned length = a->length();
2022 
2023     // Do a faster loop for the case where all the characters are ASCII.
2024     UChar ored = 0;
2025     bool equal = true;
2026     if (a->is8Bit()) {
2027         const LChar* as = a->characters8();
2028         for (unsigned i = 0; i != length; ++i) {
2029             LChar bc = b[i];
2030             if (!bc)
2031                 return false;
2032             UChar ac = as[i];
2033             ored |= ac;
2034             equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2035         }
2036 
2037         // Do a slower implementation for cases that include non-ASCII characters.
2038         if (ored & ~0x7F) {
2039             equal = true;
2040             for (unsigned i = 0; i != length; ++i)
2041                 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2042         }
2043 
2044         return equal && !b[length];
2045     }
2046 
2047     const UChar* as = a->characters16();
2048     for (unsigned i = 0; i != length; ++i) {
2049         LChar bc = b[i];
2050         if (!bc)
2051             return false;
2052         UChar ac = as[i];
2053         ored |= ac;
2054         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2055     }
2056 
2057     // Do a slower implementation for cases that include non-ASCII characters.
2058     if (ored & ~0x7F) {
2059         equal = true;
2060         for (unsigned i = 0; i != length; ++i) {
2061             equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2062         }
2063     }
2064 
2065     return equal && !b[length];
2066 }
2067 
equalIgnoringCaseNonNull(const StringImpl * a,const StringImpl * b)2068 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2069 {
2070     ASSERT(a && b);
2071     if (a == b)
2072         return true;
2073 
2074     unsigned length = a->length();
2075     if (length != b->length())
2076         return false;
2077 
2078     if (a->is8Bit()) {
2079         if (b->is8Bit())
2080             return equalIgnoringCase(a->characters8(), b->characters8(), length);
2081 
2082         return equalIgnoringCase(b->characters16(), a->characters8(), length);
2083     }
2084 
2085     if (b->is8Bit())
2086         return equalIgnoringCase(a->characters16(), b->characters8(), length);
2087 
2088     return equalIgnoringCase(a->characters16(), b->characters16(), length);
2089 }
2090 
equalIgnoringNullity(StringImpl * a,StringImpl * b)2091 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2092 {
2093     if (!a && b && !b->length())
2094         return true;
2095     if (!b && a && !a->length())
2096         return true;
2097     return equal(a, b);
2098 }
2099 
sizeInBytes() const2100 size_t StringImpl::sizeInBytes() const
2101 {
2102     size_t size = length();
2103     if (!is8Bit())
2104         size *= 2;
2105     return size + sizeof(*this);
2106 }
2107 
2108 } // namespace WTF
2109