1 /*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * (C) 1999 Antti Koivisto (koivisto@kde.org)
4 * (C) 2001 Dirk Mueller ( mueller@kde.org )
5 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6 * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #include "config.h"
26 #include "StringImpl.h"
27
28 #include "AtomicString.h"
29 #include "StringBuffer.h"
30 #include "StringHash.h"
31 #include <wtf/StdLibExtras.h>
32 #include <wtf/WTFThreadData.h>
33
34 using namespace std;
35
36 namespace WTF {
37
38 using namespace Unicode;
39
40 static const unsigned minLengthToShare = 20;
41
42 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);
43
~StringImpl()44 StringImpl::~StringImpl()
45 {
46 ASSERT(!isStatic());
47
48 if (isAtomic())
49 AtomicString::remove(this);
50 #if USE(JSC)
51 if (isIdentifier()) {
52 if (!wtfThreadData().currentIdentifierTable()->remove(this))
53 CRASH();
54 }
55 #endif
56
57 BufferOwnership ownership = bufferOwnership();
58 if (ownership != BufferInternal) {
59 if (ownership == BufferOwned) {
60 ASSERT(!m_sharedBuffer);
61 ASSERT(m_data);
62 fastFree(const_cast<UChar*>(m_data));
63 } else if (ownership == BufferSubstring) {
64 ASSERT(m_substringBuffer);
65 m_substringBuffer->deref();
66 } else {
67 ASSERT(ownership == BufferShared);
68 ASSERT(m_sharedBuffer);
69 m_sharedBuffer->deref();
70 }
71 }
72 }
73
createUninitialized(unsigned length,UChar * & data)74 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
75 {
76 if (!length) {
77 data = 0;
78 return empty();
79 }
80
81 // Allocate a single buffer large enough to contain the StringImpl
82 // struct as well as the data which it contains. This removes one
83 // heap allocation from this call.
84 if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)))
85 CRASH();
86 size_t size = sizeof(StringImpl) + length * sizeof(UChar);
87 StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
88
89 data = reinterpret_cast<UChar*>(string + 1);
90 return adoptRef(new (string) StringImpl(length));
91 }
92
create(const UChar * characters,unsigned length)93 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
94 {
95 if (!characters || !length)
96 return empty();
97
98 UChar* data;
99 RefPtr<StringImpl> string = createUninitialized(length, data);
100 memcpy(data, characters, length * sizeof(UChar));
101 return string.release();
102 }
103
create(const char * characters,unsigned length)104 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
105 {
106 if (!characters || !length)
107 return empty();
108
109 UChar* data;
110 RefPtr<StringImpl> string = createUninitialized(length, data);
111 for (unsigned i = 0; i != length; ++i) {
112 unsigned char c = characters[i];
113 data[i] = c;
114 }
115 return string.release();
116 }
117
create(const char * string)118 PassRefPtr<StringImpl> StringImpl::create(const char* string)
119 {
120 if (!string)
121 return empty();
122 size_t length = strlen(string);
123 if (length > numeric_limits<unsigned>::max())
124 CRASH();
125 return create(string, length);
126 }
127
create(const UChar * characters,unsigned length,PassRefPtr<SharedUChar> sharedBuffer)128 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
129 {
130 ASSERT(characters);
131 ASSERT(minLengthToShare && length >= minLengthToShare);
132 return adoptRef(new StringImpl(characters, length, sharedBuffer));
133 }
134
sharedBuffer()135 SharedUChar* StringImpl::sharedBuffer()
136 {
137 if (m_length < minLengthToShare)
138 return 0;
139 // All static strings are smaller that the minimim length to share.
140 ASSERT(!isStatic());
141
142 BufferOwnership ownership = bufferOwnership();
143
144 if (ownership == BufferInternal)
145 return 0;
146 if (ownership == BufferSubstring)
147 return m_substringBuffer->sharedBuffer();
148 if (ownership == BufferOwned) {
149 ASSERT(!m_sharedBuffer);
150 m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef();
151 m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
152 }
153
154 ASSERT(bufferOwnership() == BufferShared);
155 ASSERT(m_sharedBuffer);
156 return m_sharedBuffer;
157 }
158
containsOnlyWhitespace()159 bool StringImpl::containsOnlyWhitespace()
160 {
161 // FIXME: The definition of whitespace here includes a number of characters
162 // that are not whitespace from the point of view of RenderText; I wonder if
163 // that's a problem in practice.
164 for (unsigned i = 0; i < m_length; i++)
165 if (!isASCIISpace(m_data[i]))
166 return false;
167 return true;
168 }
169
substring(unsigned start,unsigned length)170 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
171 {
172 if (start >= m_length)
173 return empty();
174 unsigned maxLength = m_length - start;
175 if (length >= maxLength) {
176 if (!start)
177 return this;
178 length = maxLength;
179 }
180 return create(m_data + start, length);
181 }
182
characterStartingAt(unsigned i)183 UChar32 StringImpl::characterStartingAt(unsigned i)
184 {
185 if (U16_IS_SINGLE(m_data[i]))
186 return m_data[i];
187 if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
188 return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
189 return 0;
190 }
191
lower()192 PassRefPtr<StringImpl> StringImpl::lower()
193 {
194 // Note: This is a hot function in the Dromaeo benchmark, specifically the
195 // no-op code path up through the first 'return' statement.
196
197 // First scan the string for uppercase and non-ASCII characters:
198 UChar ored = 0;
199 bool noUpper = true;
200 const UChar *end = m_data + m_length;
201 for (const UChar* chp = m_data; chp != end; chp++) {
202 if (UNLIKELY(isASCIIUpper(*chp)))
203 noUpper = false;
204 ored |= *chp;
205 }
206
207 // Nothing to do if the string is all ASCII with no uppercase.
208 if (noUpper && !(ored & ~0x7F))
209 return this;
210
211 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
212 CRASH();
213 int32_t length = m_length;
214
215 UChar* data;
216 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
217
218 if (!(ored & ~0x7F)) {
219 // Do a faster loop for the case where all the characters are ASCII.
220 for (int i = 0; i < length; i++) {
221 UChar c = m_data[i];
222 data[i] = toASCIILower(c);
223 }
224 return newImpl;
225 }
226
227 // Do a slower implementation for cases that include non-ASCII characters.
228 bool error;
229 int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
230 if (!error && realLength == length)
231 return newImpl;
232 newImpl = createUninitialized(realLength, data);
233 Unicode::toLower(data, realLength, m_data, m_length, &error);
234 if (error)
235 return this;
236 return newImpl;
237 }
238
upper()239 PassRefPtr<StringImpl> StringImpl::upper()
240 {
241 // This function could be optimized for no-op cases the way lower() is,
242 // but in empirical testing, few actual calls to upper() are no-ops, so
243 // it wouldn't be worth the extra time for pre-scanning.
244 UChar* data;
245 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
246
247 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
248 CRASH();
249 int32_t length = m_length;
250
251 // Do a faster loop for the case where all the characters are ASCII.
252 UChar ored = 0;
253 for (int i = 0; i < length; i++) {
254 UChar c = m_data[i];
255 ored |= c;
256 data[i] = toASCIIUpper(c);
257 }
258 if (!(ored & ~0x7F))
259 return newImpl.release();
260
261 // Do a slower implementation for cases that include non-ASCII characters.
262 bool error;
263 int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
264 if (!error && realLength == length)
265 return newImpl;
266 newImpl = createUninitialized(realLength, data);
267 Unicode::toUpper(data, realLength, m_data, m_length, &error);
268 if (error)
269 return this;
270 return newImpl.release();
271 }
272
secure(UChar character,LastCharacterBehavior behavior)273 PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior)
274 {
275 if (!m_length)
276 return this;
277
278 UChar* data;
279 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
280 unsigned lastCharacterIndex = m_length - 1;
281 for (unsigned i = 0; i < lastCharacterIndex; ++i)
282 data[i] = character;
283 data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex];
284 return newImpl.release();
285 }
286
foldCase()287 PassRefPtr<StringImpl> StringImpl::foldCase()
288 {
289 UChar* data;
290 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
291
292 if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
293 CRASH();
294 int32_t length = m_length;
295
296 // Do a faster loop for the case where all the characters are ASCII.
297 UChar ored = 0;
298 for (int32_t i = 0; i < length; i++) {
299 UChar c = m_data[i];
300 ored |= c;
301 data[i] = toASCIILower(c);
302 }
303 if (!(ored & ~0x7F))
304 return newImpl.release();
305
306 // Do a slower implementation for cases that include non-ASCII characters.
307 bool error;
308 int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
309 if (!error && realLength == length)
310 return newImpl.release();
311 newImpl = createUninitialized(realLength, data);
312 Unicode::foldCase(data, realLength, m_data, m_length, &error);
313 if (error)
314 return this;
315 return newImpl.release();
316 }
317
stripWhiteSpace()318 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
319 {
320 if (!m_length)
321 return empty();
322
323 unsigned start = 0;
324 unsigned end = m_length - 1;
325
326 // skip white space from start
327 while (start <= end && isSpaceOrNewline(m_data[start]))
328 start++;
329
330 // only white space
331 if (start > end)
332 return empty();
333
334 // skip white space from end
335 while (end && isSpaceOrNewline(m_data[end]))
336 end--;
337
338 if (!start && end == m_length - 1)
339 return this;
340 return create(m_data + start, end + 1 - start);
341 }
342
removeCharacters(CharacterMatchFunctionPtr findMatch)343 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
344 {
345 const UChar* from = m_data;
346 const UChar* fromend = from + m_length;
347
348 // Assume the common case will not remove any characters
349 while (from != fromend && !findMatch(*from))
350 from++;
351 if (from == fromend)
352 return this;
353
354 StringBuffer data(m_length);
355 UChar* to = data.characters();
356 unsigned outc = from - m_data;
357
358 if (outc)
359 memcpy(to, m_data, outc * sizeof(UChar));
360
361 while (true) {
362 while (from != fromend && findMatch(*from))
363 from++;
364 while (from != fromend && !findMatch(*from))
365 to[outc++] = *from++;
366 if (from == fromend)
367 break;
368 }
369
370 data.shrink(outc);
371
372 return adopt(data);
373 }
374
simplifyWhiteSpace()375 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
376 {
377 StringBuffer data(m_length);
378
379 const UChar* from = m_data;
380 const UChar* fromend = from + m_length;
381 int outc = 0;
382 bool changedToSpace = false;
383
384 UChar* to = data.characters();
385
386 while (true) {
387 while (from != fromend && isSpaceOrNewline(*from)) {
388 if (*from != ' ')
389 changedToSpace = true;
390 from++;
391 }
392 while (from != fromend && !isSpaceOrNewline(*from))
393 to[outc++] = *from++;
394 if (from != fromend)
395 to[outc++] = ' ';
396 else
397 break;
398 }
399
400 if (outc > 0 && to[outc - 1] == ' ')
401 outc--;
402
403 if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
404 return this;
405
406 data.shrink(outc);
407
408 return adopt(data);
409 }
410
toIntStrict(bool * ok,int base)411 int StringImpl::toIntStrict(bool* ok, int base)
412 {
413 return charactersToIntStrict(m_data, m_length, ok, base);
414 }
415
toUIntStrict(bool * ok,int base)416 unsigned StringImpl::toUIntStrict(bool* ok, int base)
417 {
418 return charactersToUIntStrict(m_data, m_length, ok, base);
419 }
420
toInt64Strict(bool * ok,int base)421 int64_t StringImpl::toInt64Strict(bool* ok, int base)
422 {
423 return charactersToInt64Strict(m_data, m_length, ok, base);
424 }
425
toUInt64Strict(bool * ok,int base)426 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
427 {
428 return charactersToUInt64Strict(m_data, m_length, ok, base);
429 }
430
toIntPtrStrict(bool * ok,int base)431 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
432 {
433 return charactersToIntPtrStrict(m_data, m_length, ok, base);
434 }
435
toInt(bool * ok)436 int StringImpl::toInt(bool* ok)
437 {
438 return charactersToInt(m_data, m_length, ok);
439 }
440
toUInt(bool * ok)441 unsigned StringImpl::toUInt(bool* ok)
442 {
443 return charactersToUInt(m_data, m_length, ok);
444 }
445
toInt64(bool * ok)446 int64_t StringImpl::toInt64(bool* ok)
447 {
448 return charactersToInt64(m_data, m_length, ok);
449 }
450
toUInt64(bool * ok)451 uint64_t StringImpl::toUInt64(bool* ok)
452 {
453 return charactersToUInt64(m_data, m_length, ok);
454 }
455
toIntPtr(bool * ok)456 intptr_t StringImpl::toIntPtr(bool* ok)
457 {
458 return charactersToIntPtr(m_data, m_length, ok);
459 }
460
toDouble(bool * ok,bool * didReadNumber)461 double StringImpl::toDouble(bool* ok, bool* didReadNumber)
462 {
463 return charactersToDouble(m_data, m_length, ok, didReadNumber);
464 }
465
toFloat(bool * ok,bool * didReadNumber)466 float StringImpl::toFloat(bool* ok, bool* didReadNumber)
467 {
468 return charactersToFloat(m_data, m_length, ok, didReadNumber);
469 }
470
equal(const UChar * a,const char * b,int length)471 static bool equal(const UChar* a, const char* b, int length)
472 {
473 ASSERT(length >= 0);
474 while (length--) {
475 unsigned char bc = *b++;
476 if (*a++ != bc)
477 return false;
478 }
479 return true;
480 }
481
equalIgnoringCase(const UChar * a,const char * b,unsigned length)482 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
483 {
484 while (length--) {
485 unsigned char bc = *b++;
486 if (foldCase(*a++) != foldCase(bc))
487 return false;
488 }
489 return true;
490 }
491
equalIgnoringCase(const UChar * a,const UChar * b,int length)492 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
493 {
494 ASSERT(length >= 0);
495 return umemcasecmp(a, b, length) == 0;
496 }
497
codePointCompare(const StringImpl * s1,const StringImpl * s2)498 int codePointCompare(const StringImpl* s1, const StringImpl* s2)
499 {
500 const unsigned l1 = s1 ? s1->length() : 0;
501 const unsigned l2 = s2 ? s2->length() : 0;
502 const unsigned lmin = l1 < l2 ? l1 : l2;
503 const UChar* c1 = s1 ? s1->characters() : 0;
504 const UChar* c2 = s2 ? s2->characters() : 0;
505 unsigned pos = 0;
506 while (pos < lmin && *c1 == *c2) {
507 c1++;
508 c2++;
509 pos++;
510 }
511
512 if (pos < lmin)
513 return (c1[0] > c2[0]) ? 1 : -1;
514
515 if (l1 == l2)
516 return 0;
517
518 return (l1 > l2) ? 1 : -1;
519 }
520
find(UChar c,unsigned start)521 size_t StringImpl::find(UChar c, unsigned start)
522 {
523 return WTF::find(m_data, m_length, c, start);
524 }
525
find(CharacterMatchFunctionPtr matchFunction,unsigned start)526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
527 {
528 return WTF::find(m_data, m_length, matchFunction, start);
529 }
530
find(const char * matchString,unsigned index)531 size_t StringImpl::find(const char* matchString, unsigned index)
532 {
533 // Check for null or empty string to match against
534 if (!matchString)
535 return notFound;
536 size_t matchStringLength = strlen(matchString);
537 if (matchStringLength > numeric_limits<unsigned>::max())
538 CRASH();
539 unsigned matchLength = matchStringLength;
540 if (!matchLength)
541 return min(index, length());
542
543 // Optimization 1: fast case for strings of length 1.
544 if (matchLength == 1)
545 return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
546
547 // Check index & matchLength are in range.
548 if (index > length())
549 return notFound;
550 unsigned searchLength = length() - index;
551 if (matchLength > searchLength)
552 return notFound;
553 // delta is the number of additional times to test; delta == 0 means test only once.
554 unsigned delta = searchLength - matchLength;
555
556 const UChar* searchCharacters = characters() + index;
557 const unsigned char* matchCharacters = (const unsigned char*)matchString;
558
559 // Optimization 2: keep a running hash of the strings,
560 // only call memcmp if the hashes match.
561 unsigned searchHash = 0;
562 unsigned matchHash = 0;
563 for (unsigned i = 0; i < matchLength; ++i) {
564 searchHash += searchCharacters[i];
565 matchHash += matchCharacters[i];
566 }
567
568 unsigned i = 0;
569 // keep looping until we match
570 while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
571 if (i == delta)
572 return notFound;
573 searchHash += searchCharacters[i + matchLength];
574 searchHash -= searchCharacters[i];
575 ++i;
576 }
577 return index + i;
578 }
579
findIgnoringCase(const char * matchString,unsigned index)580 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
581 {
582 // Check for null or empty string to match against
583 if (!matchString)
584 return notFound;
585 size_t matchStringLength = strlen(matchString);
586 if (matchStringLength > numeric_limits<unsigned>::max())
587 CRASH();
588 unsigned matchLength = matchStringLength;
589 if (!matchLength)
590 return min(index, length());
591
592 // Check index & matchLength are in range.
593 if (index > length())
594 return notFound;
595 unsigned searchLength = length() - index;
596 if (matchLength > searchLength)
597 return notFound;
598 // delta is the number of additional times to test; delta == 0 means test only once.
599 unsigned delta = searchLength - matchLength;
600
601 const UChar* searchCharacters = characters() + index;
602
603 unsigned i = 0;
604 // keep looping until we match
605 while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
606 if (i == delta)
607 return notFound;
608 ++i;
609 }
610 return index + i;
611 }
612
find(StringImpl * matchString,unsigned index)613 size_t StringImpl::find(StringImpl* matchString, unsigned index)
614 {
615 // Check for null or empty string to match against
616 if (!matchString)
617 return notFound;
618 unsigned matchLength = matchString->length();
619 if (!matchLength)
620 return min(index, length());
621
622 // Optimization 1: fast case for strings of length 1.
623 if (matchLength == 1)
624 return WTF::find(characters(), length(), matchString->characters()[0], index);
625
626 // Check index & matchLength are in range.
627 if (index > length())
628 return notFound;
629 unsigned searchLength = length() - index;
630 if (matchLength > searchLength)
631 return notFound;
632 // delta is the number of additional times to test; delta == 0 means test only once.
633 unsigned delta = searchLength - matchLength;
634
635 const UChar* searchCharacters = characters() + index;
636 const UChar* matchCharacters = matchString->characters();
637
638 // Optimization 2: keep a running hash of the strings,
639 // only call memcmp if the hashes match.
640 unsigned searchHash = 0;
641 unsigned matchHash = 0;
642 for (unsigned i = 0; i < matchLength; ++i) {
643 searchHash += searchCharacters[i];
644 matchHash += matchCharacters[i];
645 }
646
647 unsigned i = 0;
648 // keep looping until we match
649 while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
650 if (i == delta)
651 return notFound;
652 searchHash += searchCharacters[i + matchLength];
653 searchHash -= searchCharacters[i];
654 ++i;
655 }
656 return index + i;
657 }
658
findIgnoringCase(StringImpl * matchString,unsigned index)659 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
660 {
661 // Check for null or empty string to match against
662 if (!matchString)
663 return notFound;
664 unsigned matchLength = matchString->length();
665 if (!matchLength)
666 return min(index, length());
667
668 // Check index & matchLength are in range.
669 if (index > length())
670 return notFound;
671 unsigned searchLength = length() - index;
672 if (matchLength > searchLength)
673 return notFound;
674 // delta is the number of additional times to test; delta == 0 means test only once.
675 unsigned delta = searchLength - matchLength;
676
677 const UChar* searchCharacters = characters() + index;
678 const UChar* matchCharacters = matchString->characters();
679
680 unsigned i = 0;
681 // keep looping until we match
682 while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
683 if (i == delta)
684 return notFound;
685 ++i;
686 }
687 return index + i;
688 }
689
reverseFind(UChar c,unsigned index)690 size_t StringImpl::reverseFind(UChar c, unsigned index)
691 {
692 return WTF::reverseFind(m_data, m_length, c, index);
693 }
694
reverseFind(StringImpl * matchString,unsigned index)695 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
696 {
697 // Check for null or empty string to match against
698 if (!matchString)
699 return notFound;
700 unsigned matchLength = matchString->length();
701 if (!matchLength)
702 return min(index, length());
703
704 // Optimization 1: fast case for strings of length 1.
705 if (matchLength == 1)
706 return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
707
708 // Check index & matchLength are in range.
709 if (matchLength > length())
710 return notFound;
711 // delta is the number of additional times to test; delta == 0 means test only once.
712 unsigned delta = min(index, length() - matchLength);
713
714 const UChar *searchCharacters = characters();
715 const UChar *matchCharacters = matchString->characters();
716
717 // Optimization 2: keep a running hash of the strings,
718 // only call memcmp if the hashes match.
719 unsigned searchHash = 0;
720 unsigned matchHash = 0;
721 for (unsigned i = 0; i < matchLength; ++i) {
722 searchHash += searchCharacters[delta + i];
723 matchHash += matchCharacters[i];
724 }
725
726 // keep looping until we match
727 while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
728 if (!delta)
729 return notFound;
730 delta--;
731 searchHash -= searchCharacters[delta + matchLength];
732 searchHash += searchCharacters[delta];
733 }
734 return delta;
735 }
736
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)737 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
738 {
739 // Check for null or empty string to match against
740 if (!matchString)
741 return notFound;
742 unsigned matchLength = matchString->length();
743 if (!matchLength)
744 return min(index, length());
745
746 // Check index & matchLength are in range.
747 if (matchLength > length())
748 return notFound;
749 // delta is the number of additional times to test; delta == 0 means test only once.
750 unsigned delta = min(index, length() - matchLength);
751
752 const UChar *searchCharacters = characters();
753 const UChar *matchCharacters = matchString->characters();
754
755 // keep looping until we match
756 while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
757 if (!delta)
758 return notFound;
759 delta--;
760 }
761 return delta;
762 }
763
endsWith(StringImpl * m_data,bool caseSensitive)764 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
765 {
766 ASSERT(m_data);
767 if (m_length >= m_data->m_length) {
768 unsigned start = m_length - m_data->m_length;
769 return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
770 }
771 return false;
772 }
773
replace(UChar oldC,UChar newC)774 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
775 {
776 if (oldC == newC)
777 return this;
778 unsigned i;
779 for (i = 0; i != m_length; ++i)
780 if (m_data[i] == oldC)
781 break;
782 if (i == m_length)
783 return this;
784
785 UChar* data;
786 RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
787
788 for (i = 0; i != m_length; ++i) {
789 UChar ch = m_data[i];
790 if (ch == oldC)
791 ch = newC;
792 data[i] = ch;
793 }
794 return newImpl.release();
795 }
796
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)797 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
798 {
799 position = min(position, length());
800 lengthToReplace = min(lengthToReplace, length() - position);
801 unsigned lengthToInsert = str ? str->length() : 0;
802 if (!lengthToReplace && !lengthToInsert)
803 return this;
804 UChar* data;
805
806 if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
807 CRASH();
808
809 RefPtr<StringImpl> newImpl =
810 createUninitialized(length() - lengthToReplace + lengthToInsert, data);
811 memcpy(data, characters(), position * sizeof(UChar));
812 if (str)
813 memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
814 memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
815 (length() - position - lengthToReplace) * sizeof(UChar));
816 return newImpl.release();
817 }
818
replace(UChar pattern,StringImpl * replacement)819 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
820 {
821 if (!replacement)
822 return this;
823
824 unsigned repStrLength = replacement->length();
825 size_t srcSegmentStart = 0;
826 unsigned matchCount = 0;
827
828 // Count the matches
829 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
830 ++matchCount;
831 ++srcSegmentStart;
832 }
833
834 // If we have 0 matches, we don't have to do any more work
835 if (!matchCount)
836 return this;
837
838 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
839 CRASH();
840
841 unsigned replaceSize = matchCount * repStrLength;
842 unsigned newSize = m_length - matchCount;
843 if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
844 CRASH();
845
846 newSize += replaceSize;
847
848 UChar* data;
849 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
850
851 // Construct the new data
852 size_t srcSegmentEnd;
853 unsigned srcSegmentLength;
854 srcSegmentStart = 0;
855 unsigned dstOffset = 0;
856
857 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
858 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
859 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
860 dstOffset += srcSegmentLength;
861 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
862 dstOffset += repStrLength;
863 srcSegmentStart = srcSegmentEnd + 1;
864 }
865
866 srcSegmentLength = m_length - srcSegmentStart;
867 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
868
869 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
870
871 return newImpl.release();
872 }
873
replace(StringImpl * pattern,StringImpl * replacement)874 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
875 {
876 if (!pattern || !replacement)
877 return this;
878
879 unsigned patternLength = pattern->length();
880 if (!patternLength)
881 return this;
882
883 unsigned repStrLength = replacement->length();
884 size_t srcSegmentStart = 0;
885 unsigned matchCount = 0;
886
887 // Count the matches
888 while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
889 ++matchCount;
890 srcSegmentStart += patternLength;
891 }
892
893 // If we have 0 matches, we don't have to do any more work
894 if (!matchCount)
895 return this;
896
897 unsigned newSize = m_length - matchCount * patternLength;
898 if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
899 CRASH();
900
901 if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
902 CRASH();
903
904 newSize += matchCount * repStrLength;
905
906 UChar* data;
907 RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
908
909 // Construct the new data
910 size_t srcSegmentEnd;
911 unsigned srcSegmentLength;
912 srcSegmentStart = 0;
913 unsigned dstOffset = 0;
914
915 while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
916 srcSegmentLength = srcSegmentEnd - srcSegmentStart;
917 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
918 dstOffset += srcSegmentLength;
919 memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
920 dstOffset += repStrLength;
921 srcSegmentStart = srcSegmentEnd + patternLength;
922 }
923
924 srcSegmentLength = m_length - srcSegmentStart;
925 memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
926
927 ASSERT(dstOffset + srcSegmentLength == newImpl->length());
928
929 return newImpl.release();
930 }
931
equal(const StringImpl * a,const StringImpl * b)932 bool equal(const StringImpl* a, const StringImpl* b)
933 {
934 return StringHash::equal(a, b);
935 }
936
equal(const StringImpl * a,const char * b)937 bool equal(const StringImpl* a, const char* b)
938 {
939 if (!a)
940 return !b;
941 if (!b)
942 return !a;
943
944 unsigned length = a->length();
945 const UChar* as = a->characters();
946 for (unsigned i = 0; i != length; ++i) {
947 unsigned char bc = b[i];
948 if (!bc)
949 return false;
950 if (as[i] != bc)
951 return false;
952 }
953
954 return !b[length];
955 }
956
equalIgnoringCase(StringImpl * a,StringImpl * b)957 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
958 {
959 return CaseFoldingHash::equal(a, b);
960 }
961
equalIgnoringCase(StringImpl * a,const char * b)962 bool equalIgnoringCase(StringImpl* a, const char* b)
963 {
964 if (!a)
965 return !b;
966 if (!b)
967 return !a;
968
969 unsigned length = a->length();
970 const UChar* as = a->characters();
971
972 // Do a faster loop for the case where all the characters are ASCII.
973 UChar ored = 0;
974 bool equal = true;
975 for (unsigned i = 0; i != length; ++i) {
976 char bc = b[i];
977 if (!bc)
978 return false;
979 UChar ac = as[i];
980 ored |= ac;
981 equal = equal && (toASCIILower(ac) == toASCIILower(bc));
982 }
983
984 // Do a slower implementation for cases that include non-ASCII characters.
985 if (ored & ~0x7F) {
986 equal = true;
987 for (unsigned i = 0; i != length; ++i) {
988 unsigned char bc = b[i];
989 equal = equal && (foldCase(as[i]) == foldCase(bc));
990 }
991 }
992
993 return equal && !b[length];
994 }
995
equalIgnoringNullity(StringImpl * a,StringImpl * b)996 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
997 {
998 if (StringHash::equal(a, b))
999 return true;
1000 if (!a && b && !b->length())
1001 return true;
1002 if (!b && a && !a->length())
1003 return true;
1004
1005 return false;
1006 }
1007
defaultWritingDirection(bool * hasStrongDirectionality)1008 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
1009 {
1010 for (unsigned i = 0; i < m_length; ++i) {
1011 WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
1012 if (charDirection == WTF::Unicode::LeftToRight) {
1013 if (hasStrongDirectionality)
1014 *hasStrongDirectionality = true;
1015 return WTF::Unicode::LeftToRight;
1016 }
1017 if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
1018 if (hasStrongDirectionality)
1019 *hasStrongDirectionality = true;
1020 return WTF::Unicode::RightToLeft;
1021 }
1022 }
1023 if (hasStrongDirectionality)
1024 *hasStrongDirectionality = false;
1025 return WTF::Unicode::LeftToRight;
1026 }
1027
1028 // This is a hot function because it's used when parsing HTML.
createStrippingNullCharactersSlowCase(const UChar * characters,unsigned length)1029 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
1030 {
1031 StringBuffer strippedCopy(length);
1032 unsigned strippedLength = 0;
1033 for (unsigned i = 0; i < length; i++) {
1034 if (int c = characters[i])
1035 strippedCopy[strippedLength++] = c;
1036 }
1037 ASSERT(strippedLength < length); // Only take the slow case when stripping.
1038 strippedCopy.shrink(strippedLength);
1039 return adopt(strippedCopy);
1040 }
1041
adopt(StringBuffer & buffer)1042 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
1043 {
1044 unsigned length = buffer.length();
1045 if (length == 0)
1046 return empty();
1047 return adoptRef(new StringImpl(buffer.release(), length));
1048 }
1049
createWithTerminatingNullCharacter(const StringImpl & string)1050 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
1051 {
1052 // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
1053 // get allocated in a single memory block.
1054 UChar* data;
1055 unsigned length = string.m_length;
1056 if (length >= numeric_limits<unsigned>::max())
1057 CRASH();
1058 RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
1059 memcpy(data, string.m_data, length * sizeof(UChar));
1060 data[length] = 0;
1061 terminatedString->m_length--;
1062 terminatedString->m_hash = string.m_hash;
1063 terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
1064 return terminatedString.release();
1065 }
1066
threadsafeCopy() const1067 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
1068 {
1069 return create(m_data, m_length);
1070 }
1071
crossThreadString()1072 PassRefPtr<StringImpl> StringImpl::crossThreadString()
1073 {
1074 if (SharedUChar* sharedBuffer = this->sharedBuffer())
1075 return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
1076
1077 // If no shared buffer is available, create a copy.
1078 return threadsafeCopy();
1079 }
1080
1081 } // namespace WTF
1082