1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2013 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "wtf/text/StringImpl.h"
27
28 #include "wtf/DynamicAnnotations.h"
29 #include "wtf/LeakAnnotations.h"
30 #include "wtf/MainThread.h"
31 #include "wtf/PartitionAlloc.h"
32 #include "wtf/StdLibExtras.h"
33 #include "wtf/WTF.h"
34 #include "wtf/text/AtomicString.h"
35 #include "wtf/text/StringBuffer.h"
36 #include "wtf/text/StringHash.h"
37 #include "wtf/unicode/CharacterNames.h"
38
39 #ifdef STRING_STATS
40 #include "wtf/DataLog.h"
41 #include "wtf/HashMap.h"
42 #include "wtf/HashSet.h"
43 #include "wtf/ProcessID.h"
44 #include "wtf/RefCounted.h"
45 #include "wtf/ThreadingPrimitives.h"
46 #include <unistd.h>
47 #endif
48
49 using namespace std;
50
51 namespace WTF {
52
53 using namespace Unicode;
54
55 COMPILE_ASSERT(sizeof(StringImpl) == 3 * sizeof(int), StringImpl_should_stay_small);
56
57 #ifdef STRING_STATS
58
statsMutex()59 static Mutex& statsMutex()
60 {
61 DEFINE_STATIC_LOCAL(Mutex, mutex, ());
62 return mutex;
63 }
64
liveStrings()65 static HashSet<void*>& liveStrings()
66 {
67 // Notice that we can't use HashSet<StringImpl*> because then HashSet would dedup identical strings.
68 DEFINE_STATIC_LOCAL(HashSet<void*>, strings, ());
69 return strings;
70 }
71
addStringForStats(StringImpl * string)72 void addStringForStats(StringImpl* string)
73 {
74 MutexLocker locker(statsMutex());
75 liveStrings().add(string);
76 }
77
removeStringForStats(StringImpl * string)78 void removeStringForStats(StringImpl* string)
79 {
80 MutexLocker locker(statsMutex());
81 liveStrings().remove(string);
82 }
83
fillWithSnippet(const StringImpl * string,Vector<char> & snippet)84 static void fillWithSnippet(const StringImpl* string, Vector<char>& snippet)
85 {
86 const unsigned kMaxSnippetLength = 64;
87 snippet.clear();
88
89 size_t expectedLength = std::min(string->length(), kMaxSnippetLength);
90 if (expectedLength == kMaxSnippetLength)
91 expectedLength += 3; // For the "...".
92 ++expectedLength; // For the terminating '\0'.
93 snippet.reserveCapacity(expectedLength);
94
95 size_t i;
96 for (i = 0; i < string->length() && i < kMaxSnippetLength; ++i) {
97 UChar c = (*string)[i];
98 if (isASCIIPrintable(c))
99 snippet.append(c);
100 else
101 snippet.append('?');
102 }
103 if (i < string->length()) {
104 snippet.append('.');
105 snippet.append('.');
106 snippet.append('.');
107 }
108 snippet.append('\0');
109 }
110
isUnnecessarilyWide(const StringImpl * string)111 static bool isUnnecessarilyWide(const StringImpl* string)
112 {
113 if (string->is8Bit())
114 return false;
115 UChar c = 0;
116 for (unsigned i = 0; i < string->length(); ++i)
117 c |= (*string)[i] >> 8;
118 return !c;
119 }
120
121 class PerStringStats : public RefCounted<PerStringStats> {
122 public:
create()123 static PassRefPtr<PerStringStats> create()
124 {
125 return adoptRef(new PerStringStats);
126 }
127
add(const StringImpl * string)128 void add(const StringImpl* string)
129 {
130 ++m_numberOfCopies;
131 if (!m_length) {
132 m_length = string->length();
133 fillWithSnippet(string, m_snippet);
134 }
135 if (string->isAtomic())
136 ++m_numberOfAtomicCopies;
137 if (isUnnecessarilyWide(string))
138 m_unnecessarilyWide = true;
139 }
140
totalCharacters() const141 size_t totalCharacters() const
142 {
143 return m_numberOfCopies * m_length;
144 }
145
print()146 void print()
147 {
148 const char* status = "ok";
149 if (m_unnecessarilyWide)
150 status = "16";
151 dataLogF("%8u copies (%s) of length %8u %s\n", m_numberOfCopies, status, m_length, m_snippet.data());
152 }
153
154 bool m_unnecessarilyWide;
155 unsigned m_numberOfCopies;
156 unsigned m_length;
157 unsigned m_numberOfAtomicCopies;
158 Vector<char> m_snippet;
159
160 private:
PerStringStats()161 PerStringStats()
162 : m_unnecessarilyWide(false)
163 , m_numberOfCopies(0)
164 , m_length(0)
165 , m_numberOfAtomicCopies(0)
166 {
167 }
168 };
169
operator <(const RefPtr<PerStringStats> & a,const RefPtr<PerStringStats> & b)170 bool operator<(const RefPtr<PerStringStats>& a, const RefPtr<PerStringStats>& b)
171 {
172 if (a->m_unnecessarilyWide != b->m_unnecessarilyWide)
173 return !a->m_unnecessarilyWide && b->m_unnecessarilyWide;
174 if (a->totalCharacters() != b->totalCharacters())
175 return a->totalCharacters() < b->totalCharacters();
176 if (a->m_numberOfCopies != b->m_numberOfCopies)
177 return a->m_numberOfCopies < b->m_numberOfCopies;
178 if (a->m_length != b->m_length)
179 return a->m_length < b->m_length;
180 return a->m_numberOfAtomicCopies < b->m_numberOfAtomicCopies;
181 }
182
printLiveStringStats(void *)183 static void printLiveStringStats(void*)
184 {
185 MutexLocker locker(statsMutex());
186 HashSet<void*>& strings = liveStrings();
187
188 HashMap<StringImpl*, RefPtr<PerStringStats> > stats;
189 for (HashSet<void*>::iterator iter = strings.begin(); iter != strings.end(); ++iter) {
190 StringImpl* string = static_cast<StringImpl*>(*iter);
191 HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator entry = stats.find(string);
192 RefPtr<PerStringStats> value = entry == stats.end() ? RefPtr<PerStringStats>(PerStringStats::create()) : entry->value;
193 value->add(string);
194 stats.set(string, value.release());
195 }
196
197 Vector<RefPtr<PerStringStats> > all;
198 for (HashMap<StringImpl*, RefPtr<PerStringStats> >::iterator iter = stats.begin(); iter != stats.end(); ++iter)
199 all.append(iter->value);
200
201 std::sort(all.begin(), all.end());
202 std::reverse(all.begin(), all.end());
203 for (size_t i = 0; i < 20 && i < all.size(); ++i)
204 all[i]->print();
205 }
206
207 StringStats StringImpl::m_stringStats;
208
209 unsigned StringStats::s_stringRemovesTillPrintStats = StringStats::s_printStringStatsFrequency;
210
removeString(StringImpl * string)211 void StringStats::removeString(StringImpl* string)
212 {
213 unsigned length = string->length();
214 --m_totalNumberStrings;
215
216 if (string->is8Bit()) {
217 --m_number8BitStrings;
218 m_total8BitData -= length;
219 } else {
220 --m_number16BitStrings;
221 m_total16BitData -= length;
222 }
223
224 if (!--s_stringRemovesTillPrintStats) {
225 s_stringRemovesTillPrintStats = s_printStringStatsFrequency;
226 printStats();
227 }
228 }
229
printStats()230 void StringStats::printStats()
231 {
232 dataLogF("String stats for process id %d:\n", getCurrentProcessID());
233
234 unsigned long long totalNumberCharacters = m_total8BitData + m_total16BitData;
235 double percent8Bit = m_totalNumberStrings ? ((double)m_number8BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
236 double average8bitLength = m_number8BitStrings ? (double)m_total8BitData / (double)m_number8BitStrings : 0.0;
237 dataLogF("%8u (%5.2f%%) 8 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number8BitStrings, percent8Bit, m_total8BitData, m_total8BitData, average8bitLength);
238
239 double percent16Bit = m_totalNumberStrings ? ((double)m_number16BitStrings * 100) / (double)m_totalNumberStrings : 0.0;
240 double average16bitLength = m_number16BitStrings ? (double)m_total16BitData / (double)m_number16BitStrings : 0.0;
241 dataLogF("%8u (%5.2f%%) 16 bit %12llu chars %12llu bytes avg length %6.1f\n", m_number16BitStrings, percent16Bit, m_total16BitData, m_total16BitData * 2, average16bitLength);
242
243 double averageLength = m_totalNumberStrings ? (double)totalNumberCharacters / (double)m_totalNumberStrings : 0.0;
244 unsigned long long totalDataBytes = m_total8BitData + m_total16BitData * 2;
245 dataLogF("%8u Total %12llu chars %12llu bytes avg length %6.1f\n", m_totalNumberStrings, totalNumberCharacters, totalDataBytes, averageLength);
246 unsigned long long totalSavedBytes = m_total8BitData;
247 double percentSavings = totalSavedBytes ? ((double)totalSavedBytes * 100) / (double)(totalDataBytes + totalSavedBytes) : 0.0;
248 dataLogF(" Total savings %12llu bytes (%5.2f%%)\n", totalSavedBytes, percentSavings);
249
250 unsigned totalOverhead = m_totalNumberStrings * sizeof(StringImpl);
251 double overheadPercent = (double)totalOverhead / (double)totalDataBytes * 100;
252 dataLogF(" StringImpl overheader: %8u (%5.2f%%)\n", totalOverhead, overheadPercent);
253
254 callOnMainThread(printLiveStringStats, 0);
255 }
256 #endif
257
operator new(size_t size)258 void* StringImpl::operator new(size_t size)
259 {
260 ASSERT(size == sizeof(StringImpl));
261 return partitionAllocGeneric(Partitions::getBufferPartition(), size);
262 }
263
operator delete(void * ptr)264 void StringImpl::operator delete(void* ptr)
265 {
266 partitionFreeGeneric(Partitions::getBufferPartition(), ptr);
267 }
268
~StringImpl()269 inline StringImpl::~StringImpl()
270 {
271 ASSERT(!isStatic());
272
273 STRING_STATS_REMOVE_STRING(this);
274
275 if (isAtomic())
276 AtomicString::remove(this);
277 }
278
destroyIfNotStatic()279 void StringImpl::destroyIfNotStatic()
280 {
281 if (!isStatic())
282 delete this;
283 }
284
createUninitialized(unsigned length,LChar * & data)285 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, LChar*& data)
286 {
287 if (!length) {
288 data = 0;
289 return empty();
290 }
291
292 // Allocate a single buffer large enough to contain the StringImpl
293 // struct as well as the data which it contains. This removes one
294 // heap allocation from this call.
295 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
296 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
297 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
298
299 data = reinterpret_cast<LChar*>(string + 1);
300 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
301 }
302
createUninitialized(unsigned length,UChar * & data)303 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
304 {
305 if (!length) {
306 data = 0;
307 return empty();
308 }
309
310 // Allocate a single buffer large enough to contain the StringImpl
311 // struct as well as the data which it contains. This removes one
312 // heap allocation from this call.
313 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
314 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
315 StringImpl* string = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
316
317 data = reinterpret_cast<UChar*>(string + 1);
318 return adoptRef(new (string) StringImpl(length));
319 }
320
reallocate(PassRefPtr<StringImpl> originalString,unsigned length,LChar * & data)321 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, LChar*& data)
322 {
323 ASSERT(originalString->is8Bit());
324 ASSERT(originalString->hasOneRef());
325
326 if (!length) {
327 data = 0;
328 return empty();
329 }
330
331 // Same as createUninitialized() except here we use realloc.
332 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
333 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
334 originalString->~StringImpl();
335 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
336
337 data = reinterpret_cast<LChar*>(string + 1);
338 return adoptRef(new (string) StringImpl(length, Force8BitConstructor));
339 }
340
reallocate(PassRefPtr<StringImpl> originalString,unsigned length,UChar * & data)341 PassRefPtr<StringImpl> StringImpl::reallocate(PassRefPtr<StringImpl> originalString, unsigned length, UChar*& data)
342 {
343 ASSERT(!originalString->is8Bit());
344 ASSERT(originalString->hasOneRef());
345
346 if (!length) {
347 data = 0;
348 return empty();
349 }
350
351 // Same as createUninitialized() except here we use realloc.
352 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)));
353 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
354 originalString->~StringImpl();
355 StringImpl* string = static_cast<StringImpl*>(partitionReallocGeneric(Partitions::getBufferPartition(), originalString.leakRef(), size));
356
357 data = reinterpret_cast<UChar*>(string + 1);
358 return adoptRef(new (string) StringImpl(length));
359 }
360
staticStrings()361 static StaticStringsTable& staticStrings()
362 {
363 DEFINE_STATIC_LOCAL(StaticStringsTable, staticStrings, ());
364 return staticStrings;
365 }
366
367 #ifndef NDEBUG
368 static bool s_allowCreationOfStaticStrings = true;
369 #endif
370
allStaticStrings()371 const StaticStringsTable& StringImpl::allStaticStrings()
372 {
373 return staticStrings();
374 }
375
freezeStaticStrings()376 void StringImpl::freezeStaticStrings()
377 {
378 ASSERT(isMainThread());
379
380 #ifndef NDEBUG
381 s_allowCreationOfStaticStrings = false;
382 #endif
383 }
384
385 unsigned StringImpl::m_highestStaticStringLength = 0;
386
createStatic(const char * string,unsigned length,unsigned hash)387 StringImpl* StringImpl::createStatic(const char* string, unsigned length, unsigned hash)
388 {
389 ASSERT(s_allowCreationOfStaticStrings);
390 ASSERT(string);
391 ASSERT(length);
392
393 StaticStringsTable::const_iterator it = staticStrings().find(hash);
394 if (it != staticStrings().end()) {
395 ASSERT(!memcmp(string, it->value + 1, length * sizeof(LChar)));
396 return it->value;
397 }
398
399 // Allocate a single buffer large enough to contain the StringImpl
400 // struct as well as the data which it contains. This removes one
401 // heap allocation from this call.
402 RELEASE_ASSERT(length <= ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(LChar)));
403 size_t size = sizeof(StringImpl) + length * sizeof(LChar);
404
405 WTF_ANNOTATE_SCOPED_MEMORY_LEAK;
406 StringImpl* impl = static_cast<StringImpl*>(partitionAllocGeneric(Partitions::getBufferPartition(), size));
407
408 LChar* data = reinterpret_cast<LChar*>(impl + 1);
409 impl = new (impl) StringImpl(length, hash, StaticString);
410 memcpy(data, string, length * sizeof(LChar));
411 #ifndef NDEBUG
412 impl->assertHashIsCorrect();
413 #endif
414
415 ASSERT(isMainThread());
416 m_highestStaticStringLength = std::max(m_highestStaticStringLength, length);
417 staticStrings().add(hash, impl);
418 WTF_ANNOTATE_BENIGN_RACE(impl,
419 "Benign race on the reference counter of a static string created by StringImpl::createStatic");
420
421 return impl;
422 }
423
create(const UChar * characters,unsigned length)424 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
425 {
426 if (!characters || !length)
427 return empty();
428
429 UChar* data;
430 RefPtr<StringImpl> string = createUninitialized(length, data);
431 memcpy(data, characters, length * sizeof(UChar));
432 return string.release();
433 }
434
create(const LChar * characters,unsigned length)435 PassRefPtr<StringImpl> StringImpl::create(const LChar* characters, unsigned length)
436 {
437 if (!characters || !length)
438 return empty();
439
440 LChar* data;
441 RefPtr<StringImpl> string = createUninitialized(length, data);
442 memcpy(data, characters, length * sizeof(LChar));
443 return string.release();
444 }
445
create8BitIfPossible(const UChar * characters,unsigned length)446 PassRefPtr<StringImpl> StringImpl::create8BitIfPossible(const UChar* characters, unsigned length)
447 {
448 if (!characters || !length)
449 return empty();
450
451 LChar* data;
452 RefPtr<StringImpl> string = createUninitialized(length, data);
453
454 for (size_t i = 0; i < length; ++i) {
455 if (characters[i] & 0xff00)
456 return create(characters, length);
457 data[i] = static_cast<LChar>(characters[i]);
458 }
459
460 return string.release();
461 }
462
create(const LChar * string)463 PassRefPtr<StringImpl> StringImpl::create(const LChar* string)
464 {
465 if (!string)
466 return empty();
467 size_t length = strlen(reinterpret_cast<const char*>(string));
468 RELEASE_ASSERT(length <= numeric_limits<unsigned>::max());
469 return create(string, length);
470 }
471
containsOnlyWhitespace()472 bool StringImpl::containsOnlyWhitespace()
473 {
474 // FIXME: The definition of whitespace here includes a number of characters
475 // that are not whitespace from the point of view of RenderText; I wonder if
476 // that's a problem in practice.
477 if (is8Bit()) {
478 for (unsigned i = 0; i < m_length; ++i) {
479 UChar c = characters8()[i];
480 if (!isASCIISpace(c))
481 return false;
482 }
483
484 return true;
485 }
486
487 for (unsigned i = 0; i < m_length; ++i) {
488 UChar c = characters16()[i];
489 if (!isASCIISpace(c))
490 return false;
491 }
492 return true;
493 }
494
substring(unsigned start,unsigned length)495 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
496 {
497 if (start >= m_length)
498 return empty();
499 unsigned maxLength = m_length - start;
500 if (length >= maxLength) {
501 if (!start)
502 return this;
503 length = maxLength;
504 }
505 if (is8Bit())
506 return create(characters8() + start, length);
507
508 return create(characters16() + start, length);
509 }
510
characterStartingAt(unsigned i)511 UChar32 StringImpl::characterStartingAt(unsigned i)
512 {
513 if (is8Bit())
514 return characters8()[i];
515 if (U16_IS_SINGLE(characters16()[i]))
516 return characters16()[i];
517 if (i + 1 < m_length && U16_IS_LEAD(characters16()[i]) && U16_IS_TRAIL(characters16()[i + 1]))
518 return U16_GET_SUPPLEMENTARY(characters16()[i], characters16()[i + 1]);
519 return 0;
520 }
521
lower()522 PassRefPtr<StringImpl> StringImpl::lower()
523 {
524 // Note: This is a hot function in the Dromaeo benchmark, specifically the
525 // no-op code path up through the first 'return' statement.
526
527 // First scan the string for uppercase and non-ASCII characters:
528 bool noUpper = true;
529 UChar ored = 0;
530 if (is8Bit()) {
531 const LChar* end = characters8() + m_length;
532 for (const LChar* chp = characters8(); chp != end; ++chp) {
533 if (UNLIKELY(isASCIIUpper(*chp)))
534 noUpper = false;
535 ored |= *chp;
536 }
537 // Nothing to do if the string is all ASCII with no uppercase.
538 if (noUpper && !(ored & ~0x7F))
539 return this;
540
541 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
542 int32_t length = m_length;
543
544 LChar* data8;
545 RefPtr<StringImpl> newImpl = createUninitialized(length, data8);
546
547 if (!(ored & ~0x7F)) {
548 for (int32_t i = 0; i < length; ++i)
549 data8[i] = toASCIILower(characters8()[i]);
550
551 return newImpl.release();
552 }
553
554 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
555 for (int32_t i = 0; i < length; ++i)
556 data8[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
557
558 return newImpl.release();
559 }
560
561 const UChar* end = characters16() + m_length;
562 for (const UChar* chp = characters16(); chp != end; ++chp) {
563 if (UNLIKELY(isASCIIUpper(*chp)))
564 noUpper = false;
565 ored |= *chp;
566 }
567 // Nothing to do if the string is all ASCII with no uppercase.
568 if (noUpper && !(ored & ~0x7F))
569 return this;
570
571 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
572 int32_t length = m_length;
573
574 if (!(ored & ~0x7F)) {
575 UChar* data16;
576 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
577
578 for (int32_t i = 0; i < length; ++i) {
579 UChar c = characters16()[i];
580 data16[i] = toASCIILower(c);
581 }
582 return newImpl.release();
583 }
584
585 // Do a slower implementation for cases that include non-ASCII characters.
586 UChar* data16;
587 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
588
589 bool error;
590 int32_t realLength = Unicode::toLower(data16, length, characters16(), m_length, &error);
591 if (!error && realLength == length)
592 return newImpl.release();
593
594 newImpl = createUninitialized(realLength, data16);
595 Unicode::toLower(data16, realLength, characters16(), m_length, &error);
596 if (error)
597 return this;
598 return newImpl.release();
599 }
600
upper()601 PassRefPtr<StringImpl> StringImpl::upper()
602 {
603 // This function could be optimized for no-op cases the way lower() is,
604 // but in empirical testing, few actual calls to upper() are no-ops, so
605 // it wouldn't be worth the extra time for pre-scanning.
606
607 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
608 int32_t length = m_length;
609
610 if (is8Bit()) {
611 LChar* data8;
612 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data8);
613
614 // Do a faster loop for the case where all the characters are ASCII.
615 LChar ored = 0;
616 for (int i = 0; i < length; ++i) {
617 LChar c = characters8()[i];
618 ored |= c;
619 data8[i] = toASCIIUpper(c);
620 }
621 if (!(ored & ~0x7F))
622 return newImpl.release();
623
624 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
625 int numberSharpSCharacters = 0;
626
627 // There are two special cases.
628 // 1. latin-1 characters when converted to upper case are 16 bit characters.
629 // 2. Lower case sharp-S converts to "SS" (two characters)
630 for (int32_t i = 0; i < length; ++i) {
631 LChar c = characters8()[i];
632 if (UNLIKELY(c == smallLetterSharpS))
633 ++numberSharpSCharacters;
634 UChar upper = Unicode::toUpper(c);
635 if (UNLIKELY(upper > 0xff)) {
636 // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
637 goto upconvert;
638 }
639 data8[i] = static_cast<LChar>(upper);
640 }
641
642 if (!numberSharpSCharacters)
643 return newImpl.release();
644
645 // We have numberSSCharacters sharp-s characters, but none of the other special characters.
646 newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
647
648 LChar* dest = data8;
649
650 for (int32_t i = 0; i < length; ++i) {
651 LChar c = characters8()[i];
652 if (c == smallLetterSharpS) {
653 *dest++ = 'S';
654 *dest++ = 'S';
655 } else
656 *dest++ = static_cast<LChar>(Unicode::toUpper(c));
657 }
658
659 return newImpl.release();
660 }
661
662 upconvert:
663 RefPtr<StringImpl> upconverted = upconvertedString();
664 const UChar* source16 = upconverted->characters16();
665
666 UChar* data16;
667 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
668
669 // Do a faster loop for the case where all the characters are ASCII.
670 UChar ored = 0;
671 for (int i = 0; i < length; ++i) {
672 UChar c = source16[i];
673 ored |= c;
674 data16[i] = toASCIIUpper(c);
675 }
676 if (!(ored & ~0x7F))
677 return newImpl.release();
678
679 // Do a slower implementation for cases that include non-ASCII characters.
680 bool error;
681 int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
682 if (!error && realLength == length)
683 return newImpl;
684 newImpl = createUninitialized(realLength, data16);
685 Unicode::toUpper(data16, realLength, source16, m_length, &error);
686 if (error)
687 return this;
688 return newImpl.release();
689 }
690
lower(const AtomicString & localeIdentifier)691 PassRefPtr<StringImpl> StringImpl::lower(const AtomicString& localeIdentifier)
692 {
693 // Use the more-optimized code path most of the time.
694 // Note the assumption here that the only locale-specific lowercasing is
695 // in the "tr" and "az" locales.
696 // FIXME: Could possibly optimize further by looking for the specific sequences
697 // that have locale-specific lowercasing. There are only three of them.
698 if (!(localeIdentifier == "tr" || localeIdentifier == "az"))
699 return lower();
700
701 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
702 CRASH();
703 int length = m_length;
704
705 // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
706 // allocating memory just to turn localeIdentifier into a C string, and there is no
707 // difference between the uppercasing for "tr" and "az" locales.
708 RefPtr<StringImpl> upconverted = upconvertedString();
709 const UChar* source16 = upconverted->characters16();
710 UChar* data16;
711 RefPtr<StringImpl> newString = createUninitialized(length, data16);
712 do {
713 UErrorCode status = U_ZERO_ERROR;
714 int realLength = u_strToLower(data16, length, source16, length, "tr", &status);
715 if (U_SUCCESS(status)) {
716 newString->truncateAssumingIsolated(realLength);
717 return newString.release();
718 }
719 if (status != U_BUFFER_OVERFLOW_ERROR)
720 return this;
721 // Expand the buffer.
722 newString = createUninitialized(realLength, data16);
723 } while (true);
724 }
725
upper(const AtomicString & localeIdentifier)726 PassRefPtr<StringImpl> StringImpl::upper(const AtomicString& localeIdentifier)
727 {
728 // Use the more-optimized code path most of the time.
729 // Note the assumption here that the only locale-specific uppercasing is of the
730 // letter "i" in the "tr" and "az" locales.
731 if (!(localeIdentifier == "tr" || localeIdentifier == "az") || find('i') == kNotFound)
732 return upper();
733
734 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
735 CRASH();
736 int length = m_length;
737
738 // Below, we pass in the hardcoded locale "tr". Passing that is more efficient than
739 // allocating memory just to turn localeIdentifier into a C string, and there is no
740 // difference between the uppercasing for "tr" and "az" locales.
741 RefPtr<StringImpl> upconverted = upconvertedString();
742 const UChar* source16 = upconverted->characters16();
743 UChar* data16;
744 RefPtr<StringImpl> newString = createUninitialized(length, data16);
745 do {
746 UErrorCode status = U_ZERO_ERROR;
747 int realLength = u_strToUpper(data16, length, source16, length, "tr", &status);
748 if (U_SUCCESS(status)) {
749 newString->truncateAssumingIsolated(realLength);
750 return newString.release();
751 }
752 if (status != U_BUFFER_OVERFLOW_ERROR)
753 return this;
754 // Expand the buffer.
755 newString = createUninitialized(realLength, data16);
756 } while (true);
757 }
758
fill(UChar character)759 PassRefPtr<StringImpl> StringImpl::fill(UChar character)
760 {
761 if (!(character & ~0x7F)) {
762 LChar* data;
763 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
764 for (unsigned i = 0; i < m_length; ++i)
765 data[i] = character;
766 return newImpl.release();
767 }
768 UChar* data;
769 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
770 for (unsigned i = 0; i < m_length; ++i)
771 data[i] = character;
772 return newImpl.release();
773 }
774
foldCase()775 PassRefPtr<StringImpl> StringImpl::foldCase()
776 {
777 RELEASE_ASSERT(m_length <= static_cast<unsigned>(numeric_limits<int32_t>::max()));
778 int32_t length = m_length;
779
780 if (is8Bit()) {
781 // Do a faster loop for the case where all the characters are ASCII.
782 LChar* data;
783 RefPtr <StringImpl>newImpl = createUninitialized(m_length, data);
784 LChar ored = 0;
785
786 for (int32_t i = 0; i < length; ++i) {
787 LChar c = characters8()[i];
788 data[i] = toASCIILower(c);
789 ored |= c;
790 }
791
792 if (!(ored & ~0x7F))
793 return newImpl.release();
794
795 // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
796 for (int32_t i = 0; i < length; ++i)
797 data[i] = static_cast<LChar>(Unicode::toLower(characters8()[i]));
798
799 return newImpl.release();
800 }
801
802 // Do a faster loop for the case where all the characters are ASCII.
803 UChar* data;
804 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
805 UChar ored = 0;
806 for (int32_t i = 0; i < length; ++i) {
807 UChar c = characters16()[i];
808 ored |= c;
809 data[i] = toASCIILower(c);
810 }
811 if (!(ored & ~0x7F))
812 return newImpl.release();
813
814 // Do a slower implementation for cases that include non-ASCII characters.
815 bool error;
816 int32_t realLength = Unicode::foldCase(data, length, characters16(), m_length, &error);
817 if (!error && realLength == length)
818 return newImpl.release();
819 newImpl = createUninitialized(realLength, data);
820 Unicode::foldCase(data, realLength, characters16(), m_length, &error);
821 if (error)
822 return this;
823 return newImpl.release();
824 }
825
826 template <class UCharPredicate>
stripMatchedCharacters(UCharPredicate predicate)827 inline PassRefPtr<StringImpl> StringImpl::stripMatchedCharacters(UCharPredicate predicate)
828 {
829 if (!m_length)
830 return empty();
831
832 unsigned start = 0;
833 unsigned end = m_length - 1;
834
835 // skip white space from start
836 while (start <= end && predicate(is8Bit() ? characters8()[start] : characters16()[start]))
837 ++start;
838
839 // only white space
840 if (start > end)
841 return empty();
842
843 // skip white space from end
844 while (end && predicate(is8Bit() ? characters8()[end] : characters16()[end]))
845 --end;
846
847 if (!start && end == m_length - 1)
848 return this;
849 if (is8Bit())
850 return create(characters8() + start, end + 1 - start);
851 return create(characters16() + start, end + 1 - start);
852 }
853
854 class UCharPredicate {
855 public:
UCharPredicate(CharacterMatchFunctionPtr function)856 inline UCharPredicate(CharacterMatchFunctionPtr function): m_function(function) { }
857
operator ()(UChar ch) const858 inline bool operator()(UChar ch) const
859 {
860 return m_function(ch);
861 }
862
863 private:
864 const CharacterMatchFunctionPtr m_function;
865 };
866
867 class SpaceOrNewlinePredicate {
868 public:
operator ()(UChar ch) const869 inline bool operator()(UChar ch) const
870 {
871 return isSpaceOrNewline(ch);
872 }
873 };
874
stripWhiteSpace()875 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
876 {
877 return stripMatchedCharacters(SpaceOrNewlinePredicate());
878 }
879
stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)880 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace)
881 {
882 return stripMatchedCharacters(UCharPredicate(isWhiteSpace));
883 }
884
885 template <typename CharType>
removeCharacters(const CharType * characters,CharacterMatchFunctionPtr findMatch)886 ALWAYS_INLINE PassRefPtr<StringImpl> StringImpl::removeCharacters(const CharType* characters, CharacterMatchFunctionPtr findMatch)
887 {
888 const CharType* from = characters;
889 const CharType* fromend = from + m_length;
890
891 // Assume the common case will not remove any characters
892 while (from != fromend && !findMatch(*from))
893 ++from;
894 if (from == fromend)
895 return this;
896
897 StringBuffer<CharType> data(m_length);
898 CharType* to = data.characters();
899 unsigned outc = from - characters;
900
901 if (outc)
902 memcpy(to, characters, outc * sizeof(CharType));
903
904 while (true) {
905 while (from != fromend && findMatch(*from))
906 ++from;
907 while (from != fromend && !findMatch(*from))
908 to[outc++] = *from++;
909 if (from == fromend)
910 break;
911 }
912
913 data.shrink(outc);
914
915 return data.release();
916 }
917
removeCharacters(CharacterMatchFunctionPtr findMatch)918 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
919 {
920 if (is8Bit())
921 return removeCharacters(characters8(), findMatch);
922 return removeCharacters(characters16(), findMatch);
923 }
924
925 template <typename CharType, class UCharPredicate>
simplifyMatchedCharactersToSpace(UCharPredicate predicate,StripBehavior stripBehavior)926 inline PassRefPtr<StringImpl> StringImpl::simplifyMatchedCharactersToSpace(UCharPredicate predicate, StripBehavior stripBehavior)
927 {
928 StringBuffer<CharType> data(m_length);
929
930 const CharType* from = getCharacters<CharType>();
931 const CharType* fromend = from + m_length;
932 int outc = 0;
933 bool changedToSpace = false;
934
935 CharType* to = data.characters();
936
937 if (stripBehavior == StripExtraWhiteSpace) {
938 while (true) {
939 while (from != fromend && predicate(*from)) {
940 if (*from != ' ')
941 changedToSpace = true;
942 ++from;
943 }
944 while (from != fromend && !predicate(*from))
945 to[outc++] = *from++;
946 if (from != fromend)
947 to[outc++] = ' ';
948 else
949 break;
950 }
951
952 if (outc > 0 && to[outc - 1] == ' ')
953 --outc;
954 } else {
955 for (; from != fromend; ++from) {
956 if (predicate(*from)) {
957 if (*from != ' ')
958 changedToSpace = true;
959 to[outc++] = ' ';
960 } else {
961 to[outc++] = *from;
962 }
963 }
964 }
965
966 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
967 return this;
968
969 data.shrink(outc);
970
971 return data.release();
972 }
973
simplifyWhiteSpace(StripBehavior stripBehavior)974 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(StripBehavior stripBehavior)
975 {
976 if (is8Bit())
977 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(SpaceOrNewlinePredicate(), stripBehavior);
978 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(SpaceOrNewlinePredicate(), stripBehavior);
979 }
980
simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace,StripBehavior stripBehavior)981 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace(IsWhiteSpaceFunctionPtr isWhiteSpace, StripBehavior stripBehavior)
982 {
983 if (is8Bit())
984 return StringImpl::simplifyMatchedCharactersToSpace<LChar>(UCharPredicate(isWhiteSpace), stripBehavior);
985 return StringImpl::simplifyMatchedCharactersToSpace<UChar>(UCharPredicate(isWhiteSpace), stripBehavior);
986 }
987
toIntStrict(bool * ok,int base)988 int StringImpl::toIntStrict(bool* ok, int base)
989 {
990 if (is8Bit())
991 return charactersToIntStrict(characters8(), m_length, ok, base);
992 return charactersToIntStrict(characters16(), m_length, ok, base);
993 }
994
toUIntStrict(bool * ok,int base)995 unsigned StringImpl::toUIntStrict(bool* ok, int base)
996 {
997 if (is8Bit())
998 return charactersToUIntStrict(characters8(), m_length, ok, base);
999 return charactersToUIntStrict(characters16(), m_length, ok, base);
1000 }
1001
toInt64Strict(bool * ok,int base)1002 int64_t StringImpl::toInt64Strict(bool* ok, int base)
1003 {
1004 if (is8Bit())
1005 return charactersToInt64Strict(characters8(), m_length, ok, base);
1006 return charactersToInt64Strict(characters16(), m_length, ok, base);
1007 }
1008
toUInt64Strict(bool * ok,int base)1009 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
1010 {
1011 if (is8Bit())
1012 return charactersToUInt64Strict(characters8(), m_length, ok, base);
1013 return charactersToUInt64Strict(characters16(), m_length, ok, base);
1014 }
1015
toIntPtrStrict(bool * ok,int base)1016 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
1017 {
1018 if (is8Bit())
1019 return charactersToIntPtrStrict(characters8(), m_length, ok, base);
1020 return charactersToIntPtrStrict(characters16(), m_length, ok, base);
1021 }
1022
toInt(bool * ok)1023 int StringImpl::toInt(bool* ok)
1024 {
1025 if (is8Bit())
1026 return charactersToInt(characters8(), m_length, ok);
1027 return charactersToInt(characters16(), m_length, ok);
1028 }
1029
toUInt(bool * ok)1030 unsigned StringImpl::toUInt(bool* ok)
1031 {
1032 if (is8Bit())
1033 return charactersToUInt(characters8(), m_length, ok);
1034 return charactersToUInt(characters16(), m_length, ok);
1035 }
1036
toInt64(bool * ok)1037 int64_t StringImpl::toInt64(bool* ok)
1038 {
1039 if (is8Bit())
1040 return charactersToInt64(characters8(), m_length, ok);
1041 return charactersToInt64(characters16(), m_length, ok);
1042 }
1043
toUInt64(bool * ok)1044 uint64_t StringImpl::toUInt64(bool* ok)
1045 {
1046 if (is8Bit())
1047 return charactersToUInt64(characters8(), m_length, ok);
1048 return charactersToUInt64(characters16(), m_length, ok);
1049 }
1050
toIntPtr(bool * ok)1051 intptr_t StringImpl::toIntPtr(bool* ok)
1052 {
1053 if (is8Bit())
1054 return charactersToIntPtr(characters8(), m_length, ok);
1055 return charactersToIntPtr(characters16(), m_length, ok);
1056 }
1057
toDouble(bool * ok)1058 double StringImpl::toDouble(bool* ok)
1059 {
1060 if (is8Bit())
1061 return charactersToDouble(characters8(), m_length, ok);
1062 return charactersToDouble(characters16(), m_length, ok);
1063 }
1064
toFloat(bool * ok)1065 float StringImpl::toFloat(bool* ok)
1066 {
1067 if (is8Bit())
1068 return charactersToFloat(characters8(), m_length, ok);
1069 return charactersToFloat(characters16(), m_length, ok);
1070 }
1071
equalIgnoringCase(const LChar * a,const LChar * b,unsigned length)1072 bool equalIgnoringCase(const LChar* a, const LChar* b, unsigned length)
1073 {
1074 while (length--) {
1075 LChar bc = *b++;
1076 if (foldCase(*a++) != foldCase(bc))
1077 return false;
1078 }
1079 return true;
1080 }
1081
equalIgnoringCase(const UChar * a,const LChar * b,unsigned length)1082 bool equalIgnoringCase(const UChar* a, const LChar* b, unsigned length)
1083 {
1084 while (length--) {
1085 LChar bc = *b++;
1086 if (foldCase(*a++) != foldCase(bc))
1087 return false;
1088 }
1089 return true;
1090 }
1091
find(CharacterMatchFunctionPtr matchFunction,unsigned start)1092 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
1093 {
1094 if (is8Bit())
1095 return WTF::find(characters8(), m_length, matchFunction, start);
1096 return WTF::find(characters16(), m_length, matchFunction, start);
1097 }
1098
find(const LChar * matchString,unsigned index)1099 size_t StringImpl::find(const LChar* matchString, unsigned index)
1100 {
1101 // Check for null or empty string to match against
1102 if (!matchString)
1103 return kNotFound;
1104 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1105 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1106 unsigned matchLength = matchStringLength;
1107 if (!matchLength)
1108 return min(index, length());
1109
1110 // Optimization 1: fast case for strings of length 1.
1111 if (matchLength == 1)
1112 return WTF::find(characters16(), length(), *matchString, index);
1113
1114 // Check index & matchLength are in range.
1115 if (index > length())
1116 return kNotFound;
1117 unsigned searchLength = length() - index;
1118 if (matchLength > searchLength)
1119 return kNotFound;
1120 // delta is the number of additional times to test; delta == 0 means test only once.
1121 unsigned delta = searchLength - matchLength;
1122
1123 const UChar* searchCharacters = characters16() + index;
1124
1125 // Optimization 2: keep a running hash of the strings,
1126 // only call equal if the hashes match.
1127 unsigned searchHash = 0;
1128 unsigned matchHash = 0;
1129 for (unsigned i = 0; i < matchLength; ++i) {
1130 searchHash += searchCharacters[i];
1131 matchHash += matchString[i];
1132 }
1133
1134 unsigned i = 0;
1135 // keep looping until we match
1136 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
1137 if (i == delta)
1138 return kNotFound;
1139 searchHash += searchCharacters[i + matchLength];
1140 searchHash -= searchCharacters[i];
1141 ++i;
1142 }
1143 return index + i;
1144 }
1145
1146 template<typename CharType>
findIgnoringCaseInternal(const CharType * searchCharacters,const LChar * matchString,unsigned index,unsigned searchLength,unsigned matchLength)1147 ALWAYS_INLINE size_t findIgnoringCaseInternal(const CharType* searchCharacters, const LChar* matchString, unsigned index, unsigned searchLength, unsigned matchLength)
1148 {
1149 // delta is the number of additional times to test; delta == 0 means test only once.
1150 unsigned delta = searchLength - matchLength;
1151
1152 unsigned i = 0;
1153 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
1154 if (i == delta)
1155 return kNotFound;
1156 ++i;
1157 }
1158 return index + i;
1159 }
1160
findIgnoringCase(const LChar * matchString,unsigned index)1161 size_t StringImpl::findIgnoringCase(const LChar* matchString, unsigned index)
1162 {
1163 // Check for null or empty string to match against
1164 if (!matchString)
1165 return kNotFound;
1166 size_t matchStringLength = strlen(reinterpret_cast<const char*>(matchString));
1167 RELEASE_ASSERT(matchStringLength <= numeric_limits<unsigned>::max());
1168 unsigned matchLength = matchStringLength;
1169 if (!matchLength)
1170 return min(index, length());
1171
1172 // Check index & matchLength are in range.
1173 if (index > length())
1174 return kNotFound;
1175 unsigned searchLength = length() - index;
1176 if (matchLength > searchLength)
1177 return kNotFound;
1178
1179 if (is8Bit())
1180 return findIgnoringCaseInternal(characters8() + index, matchString, index, searchLength, matchLength);
1181 return findIgnoringCaseInternal(characters16() + index, matchString, index, searchLength, matchLength);
1182 }
1183
1184 template <typename SearchCharacterType, typename MatchCharacterType>
findInternal(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1185 ALWAYS_INLINE static size_t findInternal(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1186 {
1187 // Optimization: keep a running hash of the strings,
1188 // only call equal() if the hashes match.
1189
1190 // delta is the number of additional times to test; delta == 0 means test only once.
1191 unsigned delta = searchLength - matchLength;
1192
1193 unsigned searchHash = 0;
1194 unsigned matchHash = 0;
1195
1196 for (unsigned i = 0; i < matchLength; ++i) {
1197 searchHash += searchCharacters[i];
1198 matchHash += matchCharacters[i];
1199 }
1200
1201 unsigned i = 0;
1202 // keep looping until we match
1203 while (searchHash != matchHash || !equal(searchCharacters + i, matchCharacters, matchLength)) {
1204 if (i == delta)
1205 return kNotFound;
1206 searchHash += searchCharacters[i + matchLength];
1207 searchHash -= searchCharacters[i];
1208 ++i;
1209 }
1210 return index + i;
1211 }
1212
find(StringImpl * matchString)1213 size_t StringImpl::find(StringImpl* matchString)
1214 {
1215 // Check for null string to match against
1216 if (UNLIKELY(!matchString))
1217 return kNotFound;
1218 unsigned matchLength = matchString->length();
1219
1220 // Optimization 1: fast case for strings of length 1.
1221 if (matchLength == 1) {
1222 if (is8Bit()) {
1223 if (matchString->is8Bit())
1224 return WTF::find(characters8(), length(), matchString->characters8()[0]);
1225 return WTF::find(characters8(), length(), matchString->characters16()[0]);
1226 }
1227 if (matchString->is8Bit())
1228 return WTF::find(characters16(), length(), matchString->characters8()[0]);
1229 return WTF::find(characters16(), length(), matchString->characters16()[0]);
1230 }
1231
1232 // Check matchLength is in range.
1233 if (matchLength > length())
1234 return kNotFound;
1235
1236 // Check for empty string to match against
1237 if (UNLIKELY(!matchLength))
1238 return 0;
1239
1240 if (is8Bit()) {
1241 if (matchString->is8Bit())
1242 return findInternal(characters8(), matchString->characters8(), 0, length(), matchLength);
1243 return findInternal(characters8(), matchString->characters16(), 0, length(), matchLength);
1244 }
1245
1246 if (matchString->is8Bit())
1247 return findInternal(characters16(), matchString->characters8(), 0, length(), matchLength);
1248
1249 return findInternal(characters16(), matchString->characters16(), 0, length(), matchLength);
1250 }
1251
find(StringImpl * matchString,unsigned index)1252 size_t StringImpl::find(StringImpl* matchString, unsigned index)
1253 {
1254 // Check for null or empty string to match against
1255 if (UNLIKELY(!matchString))
1256 return kNotFound;
1257
1258 unsigned matchLength = matchString->length();
1259
1260 // Optimization 1: fast case for strings of length 1.
1261 if (matchLength == 1) {
1262 if (is8Bit())
1263 return WTF::find(characters8(), length(), (*matchString)[0], index);
1264 return WTF::find(characters16(), length(), (*matchString)[0], index);
1265 }
1266
1267 if (UNLIKELY(!matchLength))
1268 return min(index, length());
1269
1270 // Check index & matchLength are in range.
1271 if (index > length())
1272 return kNotFound;
1273 unsigned searchLength = length() - index;
1274 if (matchLength > searchLength)
1275 return kNotFound;
1276
1277 if (is8Bit()) {
1278 if (matchString->is8Bit())
1279 return findInternal(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1280 return findInternal(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1281 }
1282
1283 if (matchString->is8Bit())
1284 return findInternal(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1285
1286 return findInternal(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1287 }
1288
1289 template <typename SearchCharacterType, typename MatchCharacterType>
findIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned searchLength,unsigned matchLength)1290 ALWAYS_INLINE static size_t findIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned searchLength, unsigned matchLength)
1291 {
1292 // delta is the number of additional times to test; delta == 0 means test only once.
1293 unsigned delta = searchLength - matchLength;
1294
1295 unsigned i = 0;
1296 // keep looping until we match
1297 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
1298 if (i == delta)
1299 return kNotFound;
1300 ++i;
1301 }
1302 return index + i;
1303 }
1304
findIgnoringCase(StringImpl * matchString,unsigned index)1305 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
1306 {
1307 // Check for null or empty string to match against
1308 if (!matchString)
1309 return kNotFound;
1310 unsigned matchLength = matchString->length();
1311 if (!matchLength)
1312 return min(index, length());
1313
1314 // Check index & matchLength are in range.
1315 if (index > length())
1316 return kNotFound;
1317 unsigned searchLength = length() - index;
1318 if (matchLength > searchLength)
1319 return kNotFound;
1320
1321 if (is8Bit()) {
1322 if (matchString->is8Bit())
1323 return findIgnoringCaseInner(characters8() + index, matchString->characters8(), index, searchLength, matchLength);
1324 return findIgnoringCaseInner(characters8() + index, matchString->characters16(), index, searchLength, matchLength);
1325 }
1326
1327 if (matchString->is8Bit())
1328 return findIgnoringCaseInner(characters16() + index, matchString->characters8(), index, searchLength, matchLength);
1329
1330 return findIgnoringCaseInner(characters16() + index, matchString->characters16(), index, searchLength, matchLength);
1331 }
1332
findNextLineStart(unsigned index)1333 size_t StringImpl::findNextLineStart(unsigned index)
1334 {
1335 if (is8Bit())
1336 return WTF::findNextLineStart(characters8(), m_length, index);
1337 return WTF::findNextLineStart(characters16(), m_length, index);
1338 }
1339
count(LChar c) const1340 size_t StringImpl::count(LChar c) const
1341 {
1342 int count = 0;
1343 if (is8Bit()) {
1344 for (size_t i = 0; i < m_length; ++i)
1345 count += characters8()[i] == c;
1346 } else {
1347 for (size_t i = 0; i < m_length; ++i)
1348 count += characters16()[i] == c;
1349 }
1350 return count;
1351 }
1352
reverseFind(UChar c,unsigned index)1353 size_t StringImpl::reverseFind(UChar c, unsigned index)
1354 {
1355 if (is8Bit())
1356 return WTF::reverseFind(characters8(), m_length, c, index);
1357 return WTF::reverseFind(characters16(), m_length, c, index);
1358 }
1359
1360 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1361 ALWAYS_INLINE static size_t reverseFindInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1362 {
1363 // Optimization: keep a running hash of the strings,
1364 // only call equal if the hashes match.
1365
1366 // delta is the number of additional times to test; delta == 0 means test only once.
1367 unsigned delta = min(index, length - matchLength);
1368
1369 unsigned searchHash = 0;
1370 unsigned matchHash = 0;
1371 for (unsigned i = 0; i < matchLength; ++i) {
1372 searchHash += searchCharacters[delta + i];
1373 matchHash += matchCharacters[i];
1374 }
1375
1376 // keep looping until we match
1377 while (searchHash != matchHash || !equal(searchCharacters + delta, matchCharacters, matchLength)) {
1378 if (!delta)
1379 return kNotFound;
1380 --delta;
1381 searchHash -= searchCharacters[delta + matchLength];
1382 searchHash += searchCharacters[delta];
1383 }
1384 return delta;
1385 }
1386
reverseFind(StringImpl * matchString,unsigned index)1387 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
1388 {
1389 // Check for null or empty string to match against
1390 if (!matchString)
1391 return kNotFound;
1392 unsigned matchLength = matchString->length();
1393 unsigned ourLength = length();
1394 if (!matchLength)
1395 return min(index, ourLength);
1396
1397 // Optimization 1: fast case for strings of length 1.
1398 if (matchLength == 1) {
1399 if (is8Bit())
1400 return WTF::reverseFind(characters8(), ourLength, (*matchString)[0], index);
1401 return WTF::reverseFind(characters16(), ourLength, (*matchString)[0], index);
1402 }
1403
1404 // Check index & matchLength are in range.
1405 if (matchLength > ourLength)
1406 return kNotFound;
1407
1408 if (is8Bit()) {
1409 if (matchString->is8Bit())
1410 return reverseFindInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1411 return reverseFindInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1412 }
1413
1414 if (matchString->is8Bit())
1415 return reverseFindInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1416
1417 return reverseFindInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1418 }
1419
1420 template <typename SearchCharacterType, typename MatchCharacterType>
reverseFindIgnoringCaseInner(const SearchCharacterType * searchCharacters,const MatchCharacterType * matchCharacters,unsigned index,unsigned length,unsigned matchLength)1421 ALWAYS_INLINE static size_t reverseFindIgnoringCaseInner(const SearchCharacterType* searchCharacters, const MatchCharacterType* matchCharacters, unsigned index, unsigned length, unsigned matchLength)
1422 {
1423 // delta is the number of additional times to test; delta == 0 means test only once.
1424 unsigned delta = min(index, length - matchLength);
1425
1426 // keep looping until we match
1427 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
1428 if (!delta)
1429 return kNotFound;
1430 --delta;
1431 }
1432 return delta;
1433 }
1434
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)1435 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
1436 {
1437 // Check for null or empty string to match against
1438 if (!matchString)
1439 return kNotFound;
1440 unsigned matchLength = matchString->length();
1441 unsigned ourLength = length();
1442 if (!matchLength)
1443 return min(index, ourLength);
1444
1445 // Check index & matchLength are in range.
1446 if (matchLength > ourLength)
1447 return kNotFound;
1448
1449 if (is8Bit()) {
1450 if (matchString->is8Bit())
1451 return reverseFindIgnoringCaseInner(characters8(), matchString->characters8(), index, ourLength, matchLength);
1452 return reverseFindIgnoringCaseInner(characters8(), matchString->characters16(), index, ourLength, matchLength);
1453 }
1454
1455 if (matchString->is8Bit())
1456 return reverseFindIgnoringCaseInner(characters16(), matchString->characters8(), index, ourLength, matchLength);
1457
1458 return reverseFindIgnoringCaseInner(characters16(), matchString->characters16(), index, ourLength, matchLength);
1459 }
1460
equalInner(const StringImpl * stringImpl,unsigned startOffset,const char * matchString,unsigned matchLength,bool caseSensitive)1461 ALWAYS_INLINE static bool equalInner(const StringImpl* stringImpl, unsigned startOffset, const char* matchString, unsigned matchLength, bool caseSensitive)
1462 {
1463 ASSERT(stringImpl);
1464 ASSERT(matchLength <= stringImpl->length());
1465 ASSERT(startOffset + matchLength <= stringImpl->length());
1466
1467 if (caseSensitive) {
1468 if (stringImpl->is8Bit())
1469 return equal(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1470 return equal(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1471 }
1472 if (stringImpl->is8Bit())
1473 return equalIgnoringCase(stringImpl->characters8() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1474 return equalIgnoringCase(stringImpl->characters16() + startOffset, reinterpret_cast<const LChar*>(matchString), matchLength);
1475 }
1476
startsWith(UChar character) const1477 bool StringImpl::startsWith(UChar character) const
1478 {
1479 return m_length && (*this)[0] == character;
1480 }
1481
startsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1482 bool StringImpl::startsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1483 {
1484 ASSERT(matchLength);
1485 if (matchLength > length())
1486 return false;
1487 return equalInner(this, 0, matchString, matchLength, caseSensitive);
1488 }
1489
endsWith(StringImpl * matchString,bool caseSensitive)1490 bool StringImpl::endsWith(StringImpl* matchString, bool caseSensitive)
1491 {
1492 ASSERT(matchString);
1493 if (m_length >= matchString->m_length) {
1494 unsigned start = m_length - matchString->m_length;
1495 return (caseSensitive ? find(matchString, start) : findIgnoringCase(matchString, start)) == start;
1496 }
1497 return false;
1498 }
1499
endsWith(UChar character) const1500 bool StringImpl::endsWith(UChar character) const
1501 {
1502 return m_length && (*this)[m_length - 1] == character;
1503 }
1504
endsWith(const char * matchString,unsigned matchLength,bool caseSensitive) const1505 bool StringImpl::endsWith(const char* matchString, unsigned matchLength, bool caseSensitive) const
1506 {
1507 ASSERT(matchLength);
1508 if (matchLength > length())
1509 return false;
1510 unsigned startOffset = length() - matchLength;
1511 return equalInner(this, startOffset, matchString, matchLength, caseSensitive);
1512 }
1513
replace(UChar oldC,UChar newC)1514 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
1515 {
1516 if (oldC == newC)
1517 return this;
1518 unsigned i;
1519 for (i = 0; i != m_length; ++i) {
1520 UChar c = is8Bit() ? characters8()[i] : characters16()[i];
1521 if (c == oldC)
1522 break;
1523 }
1524 if (i == m_length)
1525 return this;
1526
1527 if (is8Bit()) {
1528 if (oldC > 0xff)
1529 // Looking for a 16 bit char in an 8 bit string, we're done.
1530 return this;
1531
1532 if (newC <= 0xff) {
1533 LChar* data;
1534 LChar oldChar = static_cast<LChar>(oldC);
1535 LChar newChar = static_cast<LChar>(newC);
1536
1537 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1538
1539 for (i = 0; i != m_length; ++i) {
1540 LChar ch = characters8()[i];
1541 if (ch == oldChar)
1542 ch = newChar;
1543 data[i] = ch;
1544 }
1545 return newImpl.release();
1546 }
1547
1548 // There is the possibility we need to up convert from 8 to 16 bit,
1549 // create a 16 bit string for the result.
1550 UChar* data;
1551 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1552
1553 for (i = 0; i != m_length; ++i) {
1554 UChar ch = characters8()[i];
1555 if (ch == oldC)
1556 ch = newC;
1557 data[i] = ch;
1558 }
1559
1560 return newImpl.release();
1561 }
1562
1563 UChar* data;
1564 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
1565
1566 for (i = 0; i != m_length; ++i) {
1567 UChar ch = characters16()[i];
1568 if (ch == oldC)
1569 ch = newC;
1570 data[i] = ch;
1571 }
1572 return newImpl.release();
1573 }
1574
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)1575 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
1576 {
1577 position = min(position, length());
1578 lengthToReplace = min(lengthToReplace, length() - position);
1579 unsigned lengthToInsert = str ? str->length() : 0;
1580 if (!lengthToReplace && !lengthToInsert)
1581 return this;
1582
1583 RELEASE_ASSERT((length() - lengthToReplace) < (numeric_limits<unsigned>::max() - lengthToInsert));
1584
1585 if (is8Bit() && (!str || str->is8Bit())) {
1586 LChar* data;
1587 RefPtr<StringImpl> newImpl =
1588 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1589 memcpy(data, characters8(), position * sizeof(LChar));
1590 if (str)
1591 memcpy(data + position, str->characters8(), lengthToInsert * sizeof(LChar));
1592 memcpy(data + position + lengthToInsert, characters8() + position + lengthToReplace,
1593 (length() - position - lengthToReplace) * sizeof(LChar));
1594 return newImpl.release();
1595 }
1596 UChar* data;
1597 RefPtr<StringImpl> newImpl =
1598 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
1599 if (is8Bit())
1600 for (unsigned i = 0; i < position; ++i)
1601 data[i] = characters8()[i];
1602 else
1603 memcpy(data, characters16(), position * sizeof(UChar));
1604 if (str) {
1605 if (str->is8Bit())
1606 for (unsigned i = 0; i < lengthToInsert; ++i)
1607 data[i + position] = str->characters8()[i];
1608 else
1609 memcpy(data + position, str->characters16(), lengthToInsert * sizeof(UChar));
1610 }
1611 if (is8Bit()) {
1612 for (unsigned i = 0; i < length() - position - lengthToReplace; ++i)
1613 data[i + position + lengthToInsert] = characters8()[i + position + lengthToReplace];
1614 } else {
1615 memcpy(data + position + lengthToInsert, characters16() + position + lengthToReplace,
1616 (length() - position - lengthToReplace) * sizeof(UChar));
1617 }
1618 return newImpl.release();
1619 }
1620
replace(UChar pattern,StringImpl * replacement)1621 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
1622 {
1623 if (!replacement)
1624 return this;
1625
1626 if (replacement->is8Bit())
1627 return replace(pattern, replacement->characters8(), replacement->length());
1628
1629 return replace(pattern, replacement->characters16(), replacement->length());
1630 }
1631
replace(UChar pattern,const LChar * replacement,unsigned repStrLength)1632 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const LChar* replacement, unsigned repStrLength)
1633 {
1634 ASSERT(replacement);
1635
1636 size_t srcSegmentStart = 0;
1637 unsigned matchCount = 0;
1638
1639 // Count the matches.
1640 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1641 ++matchCount;
1642 ++srcSegmentStart;
1643 }
1644
1645 // If we have 0 matches then we don't have to do any more work.
1646 if (!matchCount)
1647 return this;
1648
1649 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1650
1651 unsigned replaceSize = matchCount * repStrLength;
1652 unsigned newSize = m_length - matchCount;
1653 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1654
1655 newSize += replaceSize;
1656
1657 // Construct the new data.
1658 size_t srcSegmentEnd;
1659 unsigned srcSegmentLength;
1660 srcSegmentStart = 0;
1661 unsigned dstOffset = 0;
1662
1663 if (is8Bit()) {
1664 LChar* data;
1665 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1666
1667 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1668 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1669 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1670 dstOffset += srcSegmentLength;
1671 memcpy(data + dstOffset, replacement, repStrLength * sizeof(LChar));
1672 dstOffset += repStrLength;
1673 srcSegmentStart = srcSegmentEnd + 1;
1674 }
1675
1676 srcSegmentLength = m_length - srcSegmentStart;
1677 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1678
1679 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1680
1681 return newImpl.release();
1682 }
1683
1684 UChar* data;
1685 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1686
1687 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1688 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1689 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1690
1691 dstOffset += srcSegmentLength;
1692 for (unsigned i = 0; i < repStrLength; ++i)
1693 data[i + dstOffset] = replacement[i];
1694
1695 dstOffset += repStrLength;
1696 srcSegmentStart = srcSegmentEnd + 1;
1697 }
1698
1699 srcSegmentLength = m_length - srcSegmentStart;
1700 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1701
1702 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1703
1704 return newImpl.release();
1705 }
1706
replace(UChar pattern,const UChar * replacement,unsigned repStrLength)1707 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, const UChar* replacement, unsigned repStrLength)
1708 {
1709 ASSERT(replacement);
1710
1711 size_t srcSegmentStart = 0;
1712 unsigned matchCount = 0;
1713
1714 // Count the matches.
1715 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1716 ++matchCount;
1717 ++srcSegmentStart;
1718 }
1719
1720 // If we have 0 matches then we don't have to do any more work.
1721 if (!matchCount)
1722 return this;
1723
1724 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1725
1726 unsigned replaceSize = matchCount * repStrLength;
1727 unsigned newSize = m_length - matchCount;
1728 RELEASE_ASSERT(newSize < (numeric_limits<unsigned>::max() - replaceSize));
1729
1730 newSize += replaceSize;
1731
1732 // Construct the new data.
1733 size_t srcSegmentEnd;
1734 unsigned srcSegmentLength;
1735 srcSegmentStart = 0;
1736 unsigned dstOffset = 0;
1737
1738 if (is8Bit()) {
1739 UChar* data;
1740 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1741
1742 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1743 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1744 for (unsigned i = 0; i < srcSegmentLength; ++i)
1745 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1746
1747 dstOffset += srcSegmentLength;
1748 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1749
1750 dstOffset += repStrLength;
1751 srcSegmentStart = srcSegmentEnd + 1;
1752 }
1753
1754 srcSegmentLength = m_length - srcSegmentStart;
1755 for (unsigned i = 0; i < srcSegmentLength; ++i)
1756 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1757
1758 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1759
1760 return newImpl.release();
1761 }
1762
1763 UChar* data;
1764 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1765
1766 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1767 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1768 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1769
1770 dstOffset += srcSegmentLength;
1771 memcpy(data + dstOffset, replacement, repStrLength * sizeof(UChar));
1772
1773 dstOffset += repStrLength;
1774 srcSegmentStart = srcSegmentEnd + 1;
1775 }
1776
1777 srcSegmentLength = m_length - srcSegmentStart;
1778 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1779
1780 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1781
1782 return newImpl.release();
1783 }
1784
replace(StringImpl * pattern,StringImpl * replacement)1785 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
1786 {
1787 if (!pattern || !replacement)
1788 return this;
1789
1790 unsigned patternLength = pattern->length();
1791 if (!patternLength)
1792 return this;
1793
1794 unsigned repStrLength = replacement->length();
1795 size_t srcSegmentStart = 0;
1796 unsigned matchCount = 0;
1797
1798 // Count the matches.
1799 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != kNotFound) {
1800 ++matchCount;
1801 srcSegmentStart += patternLength;
1802 }
1803
1804 // If we have 0 matches, we don't have to do any more work
1805 if (!matchCount)
1806 return this;
1807
1808 unsigned newSize = m_length - matchCount * patternLength;
1809 RELEASE_ASSERT(!repStrLength || matchCount <= numeric_limits<unsigned>::max() / repStrLength);
1810
1811 RELEASE_ASSERT(newSize <= (numeric_limits<unsigned>::max() - matchCount * repStrLength));
1812
1813 newSize += matchCount * repStrLength;
1814
1815
1816 // Construct the new data
1817 size_t srcSegmentEnd;
1818 unsigned srcSegmentLength;
1819 srcSegmentStart = 0;
1820 unsigned dstOffset = 0;
1821 bool srcIs8Bit = is8Bit();
1822 bool replacementIs8Bit = replacement->is8Bit();
1823
1824 // There are 4 cases:
1825 // 1. This and replacement are both 8 bit.
1826 // 2. This and replacement are both 16 bit.
1827 // 3. This is 8 bit and replacement is 16 bit.
1828 // 4. This is 16 bit and replacement is 8 bit.
1829 if (srcIs8Bit && replacementIs8Bit) {
1830 // Case 1
1831 LChar* data;
1832 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1833 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1834 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1835 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1836 dstOffset += srcSegmentLength;
1837 memcpy(data + dstOffset, replacement->characters8(), repStrLength * sizeof(LChar));
1838 dstOffset += repStrLength;
1839 srcSegmentStart = srcSegmentEnd + patternLength;
1840 }
1841
1842 srcSegmentLength = m_length - srcSegmentStart;
1843 memcpy(data + dstOffset, characters8() + srcSegmentStart, srcSegmentLength * sizeof(LChar));
1844
1845 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1846
1847 return newImpl.release();
1848 }
1849
1850 UChar* data;
1851 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
1852 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != kNotFound) {
1853 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
1854 if (srcIs8Bit) {
1855 // Case 3.
1856 for (unsigned i = 0; i < srcSegmentLength; ++i)
1857 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1858 } else {
1859 // Case 2 & 4.
1860 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1861 }
1862 dstOffset += srcSegmentLength;
1863 if (replacementIs8Bit) {
1864 // Cases 2 & 3.
1865 for (unsigned i = 0; i < repStrLength; ++i)
1866 data[i + dstOffset] = replacement->characters8()[i];
1867 } else {
1868 // Case 4
1869 memcpy(data + dstOffset, replacement->characters16(), repStrLength * sizeof(UChar));
1870 }
1871 dstOffset += repStrLength;
1872 srcSegmentStart = srcSegmentEnd + patternLength;
1873 }
1874
1875 srcSegmentLength = m_length - srcSegmentStart;
1876 if (srcIs8Bit) {
1877 // Case 3.
1878 for (unsigned i = 0; i < srcSegmentLength; ++i)
1879 data[i + dstOffset] = characters8()[i + srcSegmentStart];
1880 } else {
1881 // Cases 2 & 4.
1882 memcpy(data + dstOffset, characters16() + srcSegmentStart, srcSegmentLength * sizeof(UChar));
1883 }
1884
1885 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
1886
1887 return newImpl.release();
1888 }
1889
upconvertedString()1890 PassRefPtr<StringImpl> StringImpl::upconvertedString()
1891 {
1892 if (is8Bit())
1893 return String::make16BitFrom8BitSource(characters8(), m_length).releaseImpl();
1894 return this;
1895 }
1896
stringImplContentEqual(const StringImpl * a,const StringImpl * b)1897 static inline bool stringImplContentEqual(const StringImpl* a, const StringImpl* b)
1898 {
1899 unsigned aLength = a->length();
1900 unsigned bLength = b->length();
1901 if (aLength != bLength)
1902 return false;
1903
1904 if (a->is8Bit()) {
1905 if (b->is8Bit())
1906 return equal(a->characters8(), b->characters8(), aLength);
1907
1908 return equal(a->characters8(), b->characters16(), aLength);
1909 }
1910
1911 if (b->is8Bit())
1912 return equal(a->characters16(), b->characters8(), aLength);
1913
1914 return equal(a->characters16(), b->characters16(), aLength);
1915 }
1916
equal(const StringImpl * a,const StringImpl * b)1917 bool equal(const StringImpl* a, const StringImpl* b)
1918 {
1919 if (a == b)
1920 return true;
1921 if (!a || !b)
1922 return false;
1923
1924 return stringImplContentEqual(a, b);
1925 }
1926
1927 template <typename CharType>
equalInternal(const StringImpl * a,const CharType * b,unsigned length)1928 inline bool equalInternal(const StringImpl* a, const CharType* b, unsigned length)
1929 {
1930 if (!a)
1931 return !b;
1932 if (!b)
1933 return false;
1934
1935 if (a->length() != length)
1936 return false;
1937 if (a->is8Bit())
1938 return equal(a->characters8(), b, length);
1939 return equal(a->characters16(), b, length);
1940 }
1941
equal(const StringImpl * a,const LChar * b,unsigned length)1942 bool equal(const StringImpl* a, const LChar* b, unsigned length)
1943 {
1944 return equalInternal(a, b, length);
1945 }
1946
equal(const StringImpl * a,const UChar * b,unsigned length)1947 bool equal(const StringImpl* a, const UChar* b, unsigned length)
1948 {
1949 return equalInternal(a, b, length);
1950 }
1951
equal(const StringImpl * a,const LChar * b)1952 bool equal(const StringImpl* a, const LChar* b)
1953 {
1954 if (!a)
1955 return !b;
1956 if (!b)
1957 return !a;
1958
1959 unsigned length = a->length();
1960
1961 if (a->is8Bit()) {
1962 const LChar* aPtr = a->characters8();
1963 for (unsigned i = 0; i != length; ++i) {
1964 LChar bc = b[i];
1965 LChar ac = aPtr[i];
1966 if (!bc)
1967 return false;
1968 if (ac != bc)
1969 return false;
1970 }
1971
1972 return !b[length];
1973 }
1974
1975 const UChar* aPtr = a->characters16();
1976 for (unsigned i = 0; i != length; ++i) {
1977 LChar bc = b[i];
1978 if (!bc)
1979 return false;
1980 if (aPtr[i] != bc)
1981 return false;
1982 }
1983
1984 return !b[length];
1985 }
1986
equalNonNull(const StringImpl * a,const StringImpl * b)1987 bool equalNonNull(const StringImpl* a, const StringImpl* b)
1988 {
1989 ASSERT(a && b);
1990 if (a == b)
1991 return true;
1992
1993 return stringImplContentEqual(a, b);
1994 }
1995
equalIgnoringCase(const StringImpl * a,const StringImpl * b)1996 bool equalIgnoringCase(const StringImpl* a, const StringImpl* b)
1997 {
1998 if (a == b)
1999 return true;
2000 if (!a || !b)
2001 return false;
2002
2003 return CaseFoldingHash::equal(a, b);
2004 }
2005
equalIgnoringCase(const StringImpl * a,const LChar * b)2006 bool equalIgnoringCase(const StringImpl* a, const LChar* b)
2007 {
2008 if (!a)
2009 return !b;
2010 if (!b)
2011 return !a;
2012
2013 unsigned length = a->length();
2014
2015 // Do a faster loop for the case where all the characters are ASCII.
2016 UChar ored = 0;
2017 bool equal = true;
2018 if (a->is8Bit()) {
2019 const LChar* as = a->characters8();
2020 for (unsigned i = 0; i != length; ++i) {
2021 LChar bc = b[i];
2022 if (!bc)
2023 return false;
2024 UChar ac = as[i];
2025 ored |= ac;
2026 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2027 }
2028
2029 // Do a slower implementation for cases that include non-ASCII characters.
2030 if (ored & ~0x7F) {
2031 equal = true;
2032 for (unsigned i = 0; i != length; ++i)
2033 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2034 }
2035
2036 return equal && !b[length];
2037 }
2038
2039 const UChar* as = a->characters16();
2040 for (unsigned i = 0; i != length; ++i) {
2041 LChar bc = b[i];
2042 if (!bc)
2043 return false;
2044 UChar ac = as[i];
2045 ored |= ac;
2046 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
2047 }
2048
2049 // Do a slower implementation for cases that include non-ASCII characters.
2050 if (ored & ~0x7F) {
2051 equal = true;
2052 for (unsigned i = 0; i != length; ++i) {
2053 equal = equal && (foldCase(as[i]) == foldCase(b[i]));
2054 }
2055 }
2056
2057 return equal && !b[length];
2058 }
2059
equalIgnoringCaseNonNull(const StringImpl * a,const StringImpl * b)2060 bool equalIgnoringCaseNonNull(const StringImpl* a, const StringImpl* b)
2061 {
2062 ASSERT(a && b);
2063 if (a == b)
2064 return true;
2065
2066 unsigned length = a->length();
2067 if (length != b->length())
2068 return false;
2069
2070 if (a->is8Bit()) {
2071 if (b->is8Bit())
2072 return equalIgnoringCase(a->characters8(), b->characters8(), length);
2073
2074 return equalIgnoringCase(b->characters16(), a->characters8(), length);
2075 }
2076
2077 if (b->is8Bit())
2078 return equalIgnoringCase(a->characters16(), b->characters8(), length);
2079
2080 return equalIgnoringCase(a->characters16(), b->characters16(), length);
2081 }
2082
equalIgnoringNullity(StringImpl * a,StringImpl * b)2083 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
2084 {
2085 if (!a && b && !b->length())
2086 return true;
2087 if (!b && a && !a->length())
2088 return true;
2089 return equal(a, b);
2090 }
2091
sizeInBytes() const2092 size_t StringImpl::sizeInBytes() const
2093 {
2094 size_t size = length();
2095 if (!is8Bit())
2096 size *= 2;
2097 return size + sizeof(*this);
2098 }
2099
2100 } // namespace WTF
2101