• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4  *           (C) 2001 Dirk Mueller ( mueller@kde.org )
5  * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
6  * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net)
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  *
23  */
24 
25 #include "config.h"
26 #include "StringImpl.h"
27 
28 #include "AtomicString.h"
29 #include "StringBuffer.h"
30 #include "StringHash.h"
31 #include <wtf/StdLibExtras.h>
32 #include <wtf/WTFThreadData.h>
33 
34 using namespace std;
35 
36 namespace WTF {
37 
38 using namespace Unicode;
39 
40 static const unsigned minLengthToShare = 20;
41 
42 COMPILE_ASSERT(sizeof(StringImpl) == 2 * sizeof(int) + 3 * sizeof(void*), StringImpl_should_stay_small);
43 
~StringImpl()44 StringImpl::~StringImpl()
45 {
46     ASSERT(!isStatic());
47 
48     if (isAtomic())
49         AtomicString::remove(this);
50 #if USE(JSC)
51     if (isIdentifier()) {
52         if (!wtfThreadData().currentIdentifierTable()->remove(this))
53             CRASH();
54     }
55 #endif
56 
57     BufferOwnership ownership = bufferOwnership();
58     if (ownership != BufferInternal) {
59         if (ownership == BufferOwned) {
60             ASSERT(!m_sharedBuffer);
61             ASSERT(m_data);
62             fastFree(const_cast<UChar*>(m_data));
63         } else if (ownership == BufferSubstring) {
64             ASSERT(m_substringBuffer);
65             m_substringBuffer->deref();
66         } else {
67             ASSERT(ownership == BufferShared);
68             ASSERT(m_sharedBuffer);
69             m_sharedBuffer->deref();
70         }
71     }
72 }
73 
createUninitialized(unsigned length,UChar * & data)74 PassRefPtr<StringImpl> StringImpl::createUninitialized(unsigned length, UChar*& data)
75 {
76     if (!length) {
77         data = 0;
78         return empty();
79     }
80 
81     // Allocate a single buffer large enough to contain the StringImpl
82     // struct as well as the data which it contains. This removes one
83     // heap allocation from this call.
84     if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar)))
85         CRASH();
86     size_t size = sizeof(StringImpl) + length * sizeof(UChar);
87     StringImpl* string = static_cast<StringImpl*>(fastMalloc(size));
88 
89     data = reinterpret_cast<UChar*>(string + 1);
90     return adoptRef(new (string) StringImpl(length));
91 }
92 
create(const UChar * characters,unsigned length)93 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length)
94 {
95     if (!characters || !length)
96         return empty();
97 
98     UChar* data;
99     RefPtr<StringImpl> string = createUninitialized(length, data);
100     memcpy(data, characters, length * sizeof(UChar));
101     return string.release();
102 }
103 
create(const char * characters,unsigned length)104 PassRefPtr<StringImpl> StringImpl::create(const char* characters, unsigned length)
105 {
106     if (!characters || !length)
107         return empty();
108 
109     UChar* data;
110     RefPtr<StringImpl> string = createUninitialized(length, data);
111     for (unsigned i = 0; i != length; ++i) {
112         unsigned char c = characters[i];
113         data[i] = c;
114     }
115     return string.release();
116 }
117 
create(const char * string)118 PassRefPtr<StringImpl> StringImpl::create(const char* string)
119 {
120     if (!string)
121         return empty();
122     size_t length = strlen(string);
123     if (length > numeric_limits<unsigned>::max())
124         CRASH();
125     return create(string, length);
126 }
127 
create(const UChar * characters,unsigned length,PassRefPtr<SharedUChar> sharedBuffer)128 PassRefPtr<StringImpl> StringImpl::create(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
129 {
130     ASSERT(characters);
131     ASSERT(minLengthToShare && length >= minLengthToShare);
132     return adoptRef(new StringImpl(characters, length, sharedBuffer));
133 }
134 
sharedBuffer()135 SharedUChar* StringImpl::sharedBuffer()
136 {
137     if (m_length < minLengthToShare)
138         return 0;
139     // All static strings are smaller that the minimim length to share.
140     ASSERT(!isStatic());
141 
142     BufferOwnership ownership = bufferOwnership();
143 
144     if (ownership == BufferInternal)
145         return 0;
146     if (ownership == BufferSubstring)
147         return m_substringBuffer->sharedBuffer();
148     if (ownership == BufferOwned) {
149         ASSERT(!m_sharedBuffer);
150         m_sharedBuffer = SharedUChar::create(new SharableUChar(m_data)).leakRef();
151         m_refCountAndFlags = (m_refCountAndFlags & ~s_refCountMaskBufferOwnership) | BufferShared;
152     }
153 
154     ASSERT(bufferOwnership() == BufferShared);
155     ASSERT(m_sharedBuffer);
156     return m_sharedBuffer;
157 }
158 
containsOnlyWhitespace()159 bool StringImpl::containsOnlyWhitespace()
160 {
161     // FIXME: The definition of whitespace here includes a number of characters
162     // that are not whitespace from the point of view of RenderText; I wonder if
163     // that's a problem in practice.
164     for (unsigned i = 0; i < m_length; i++)
165         if (!isASCIISpace(m_data[i]))
166             return false;
167     return true;
168 }
169 
substring(unsigned start,unsigned length)170 PassRefPtr<StringImpl> StringImpl::substring(unsigned start, unsigned length)
171 {
172     if (start >= m_length)
173         return empty();
174     unsigned maxLength = m_length - start;
175     if (length >= maxLength) {
176         if (!start)
177             return this;
178         length = maxLength;
179     }
180     return create(m_data + start, length);
181 }
182 
characterStartingAt(unsigned i)183 UChar32 StringImpl::characterStartingAt(unsigned i)
184 {
185     if (U16_IS_SINGLE(m_data[i]))
186         return m_data[i];
187     if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1]))
188         return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]);
189     return 0;
190 }
191 
lower()192 PassRefPtr<StringImpl> StringImpl::lower()
193 {
194     // Note: This is a hot function in the Dromaeo benchmark, specifically the
195     // no-op code path up through the first 'return' statement.
196 
197     // First scan the string for uppercase and non-ASCII characters:
198     UChar ored = 0;
199     bool noUpper = true;
200     const UChar *end = m_data + m_length;
201     for (const UChar* chp = m_data; chp != end; chp++) {
202         if (UNLIKELY(isASCIIUpper(*chp)))
203             noUpper = false;
204         ored |= *chp;
205     }
206 
207     // Nothing to do if the string is all ASCII with no uppercase.
208     if (noUpper && !(ored & ~0x7F))
209         return this;
210 
211     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
212         CRASH();
213     int32_t length = m_length;
214 
215     UChar* data;
216     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
217 
218     if (!(ored & ~0x7F)) {
219         // Do a faster loop for the case where all the characters are ASCII.
220         for (int i = 0; i < length; i++) {
221             UChar c = m_data[i];
222             data[i] = toASCIILower(c);
223         }
224         return newImpl;
225     }
226 
227     // Do a slower implementation for cases that include non-ASCII characters.
228     bool error;
229     int32_t realLength = Unicode::toLower(data, length, m_data, m_length, &error);
230     if (!error && realLength == length)
231         return newImpl;
232     newImpl = createUninitialized(realLength, data);
233     Unicode::toLower(data, realLength, m_data, m_length, &error);
234     if (error)
235         return this;
236     return newImpl;
237 }
238 
upper()239 PassRefPtr<StringImpl> StringImpl::upper()
240 {
241     // This function could be optimized for no-op cases the way lower() is,
242     // but in empirical testing, few actual calls to upper() are no-ops, so
243     // it wouldn't be worth the extra time for pre-scanning.
244     UChar* data;
245     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
246 
247     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
248         CRASH();
249     int32_t length = m_length;
250 
251     // Do a faster loop for the case where all the characters are ASCII.
252     UChar ored = 0;
253     for (int i = 0; i < length; i++) {
254         UChar c = m_data[i];
255         ored |= c;
256         data[i] = toASCIIUpper(c);
257     }
258     if (!(ored & ~0x7F))
259         return newImpl.release();
260 
261     // Do a slower implementation for cases that include non-ASCII characters.
262     bool error;
263     int32_t realLength = Unicode::toUpper(data, length, m_data, m_length, &error);
264     if (!error && realLength == length)
265         return newImpl;
266     newImpl = createUninitialized(realLength, data);
267     Unicode::toUpper(data, realLength, m_data, m_length, &error);
268     if (error)
269         return this;
270     return newImpl.release();
271 }
272 
secure(UChar character,LastCharacterBehavior behavior)273 PassRefPtr<StringImpl> StringImpl::secure(UChar character, LastCharacterBehavior behavior)
274 {
275     if (!m_length)
276         return this;
277 
278     UChar* data;
279     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
280     unsigned lastCharacterIndex = m_length - 1;
281     for (unsigned i = 0; i < lastCharacterIndex; ++i)
282         data[i] = character;
283     data[lastCharacterIndex] = (behavior == ObscureLastCharacter) ? character : m_data[lastCharacterIndex];
284     return newImpl.release();
285 }
286 
foldCase()287 PassRefPtr<StringImpl> StringImpl::foldCase()
288 {
289     UChar* data;
290     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
291 
292     if (m_length > static_cast<unsigned>(numeric_limits<int32_t>::max()))
293         CRASH();
294     int32_t length = m_length;
295 
296     // Do a faster loop for the case where all the characters are ASCII.
297     UChar ored = 0;
298     for (int32_t i = 0; i < length; i++) {
299         UChar c = m_data[i];
300         ored |= c;
301         data[i] = toASCIILower(c);
302     }
303     if (!(ored & ~0x7F))
304         return newImpl.release();
305 
306     // Do a slower implementation for cases that include non-ASCII characters.
307     bool error;
308     int32_t realLength = Unicode::foldCase(data, length, m_data, m_length, &error);
309     if (!error && realLength == length)
310         return newImpl.release();
311     newImpl = createUninitialized(realLength, data);
312     Unicode::foldCase(data, realLength, m_data, m_length, &error);
313     if (error)
314         return this;
315     return newImpl.release();
316 }
317 
stripWhiteSpace()318 PassRefPtr<StringImpl> StringImpl::stripWhiteSpace()
319 {
320     if (!m_length)
321         return empty();
322 
323     unsigned start = 0;
324     unsigned end = m_length - 1;
325 
326     // skip white space from start
327     while (start <= end && isSpaceOrNewline(m_data[start]))
328         start++;
329 
330     // only white space
331     if (start > end)
332         return empty();
333 
334     // skip white space from end
335     while (end && isSpaceOrNewline(m_data[end]))
336         end--;
337 
338     if (!start && end == m_length - 1)
339         return this;
340     return create(m_data + start, end + 1 - start);
341 }
342 
removeCharacters(CharacterMatchFunctionPtr findMatch)343 PassRefPtr<StringImpl> StringImpl::removeCharacters(CharacterMatchFunctionPtr findMatch)
344 {
345     const UChar* from = m_data;
346     const UChar* fromend = from + m_length;
347 
348     // Assume the common case will not remove any characters
349     while (from != fromend && !findMatch(*from))
350         from++;
351     if (from == fromend)
352         return this;
353 
354     StringBuffer data(m_length);
355     UChar* to = data.characters();
356     unsigned outc = from - m_data;
357 
358     if (outc)
359         memcpy(to, m_data, outc * sizeof(UChar));
360 
361     while (true) {
362         while (from != fromend && findMatch(*from))
363             from++;
364         while (from != fromend && !findMatch(*from))
365             to[outc++] = *from++;
366         if (from == fromend)
367             break;
368     }
369 
370     data.shrink(outc);
371 
372     return adopt(data);
373 }
374 
simplifyWhiteSpace()375 PassRefPtr<StringImpl> StringImpl::simplifyWhiteSpace()
376 {
377     StringBuffer data(m_length);
378 
379     const UChar* from = m_data;
380     const UChar* fromend = from + m_length;
381     int outc = 0;
382     bool changedToSpace = false;
383 
384     UChar* to = data.characters();
385 
386     while (true) {
387         while (from != fromend && isSpaceOrNewline(*from)) {
388             if (*from != ' ')
389                 changedToSpace = true;
390             from++;
391         }
392         while (from != fromend && !isSpaceOrNewline(*from))
393             to[outc++] = *from++;
394         if (from != fromend)
395             to[outc++] = ' ';
396         else
397             break;
398     }
399 
400     if (outc > 0 && to[outc - 1] == ' ')
401         outc--;
402 
403     if (static_cast<unsigned>(outc) == m_length && !changedToSpace)
404         return this;
405 
406     data.shrink(outc);
407 
408     return adopt(data);
409 }
410 
toIntStrict(bool * ok,int base)411 int StringImpl::toIntStrict(bool* ok, int base)
412 {
413     return charactersToIntStrict(m_data, m_length, ok, base);
414 }
415 
toUIntStrict(bool * ok,int base)416 unsigned StringImpl::toUIntStrict(bool* ok, int base)
417 {
418     return charactersToUIntStrict(m_data, m_length, ok, base);
419 }
420 
toInt64Strict(bool * ok,int base)421 int64_t StringImpl::toInt64Strict(bool* ok, int base)
422 {
423     return charactersToInt64Strict(m_data, m_length, ok, base);
424 }
425 
toUInt64Strict(bool * ok,int base)426 uint64_t StringImpl::toUInt64Strict(bool* ok, int base)
427 {
428     return charactersToUInt64Strict(m_data, m_length, ok, base);
429 }
430 
toIntPtrStrict(bool * ok,int base)431 intptr_t StringImpl::toIntPtrStrict(bool* ok, int base)
432 {
433     return charactersToIntPtrStrict(m_data, m_length, ok, base);
434 }
435 
toInt(bool * ok)436 int StringImpl::toInt(bool* ok)
437 {
438     return charactersToInt(m_data, m_length, ok);
439 }
440 
toUInt(bool * ok)441 unsigned StringImpl::toUInt(bool* ok)
442 {
443     return charactersToUInt(m_data, m_length, ok);
444 }
445 
toInt64(bool * ok)446 int64_t StringImpl::toInt64(bool* ok)
447 {
448     return charactersToInt64(m_data, m_length, ok);
449 }
450 
toUInt64(bool * ok)451 uint64_t StringImpl::toUInt64(bool* ok)
452 {
453     return charactersToUInt64(m_data, m_length, ok);
454 }
455 
toIntPtr(bool * ok)456 intptr_t StringImpl::toIntPtr(bool* ok)
457 {
458     return charactersToIntPtr(m_data, m_length, ok);
459 }
460 
toDouble(bool * ok,bool * didReadNumber)461 double StringImpl::toDouble(bool* ok, bool* didReadNumber)
462 {
463     return charactersToDouble(m_data, m_length, ok, didReadNumber);
464 }
465 
toFloat(bool * ok,bool * didReadNumber)466 float StringImpl::toFloat(bool* ok, bool* didReadNumber)
467 {
468     return charactersToFloat(m_data, m_length, ok, didReadNumber);
469 }
470 
equal(const UChar * a,const char * b,int length)471 static bool equal(const UChar* a, const char* b, int length)
472 {
473     ASSERT(length >= 0);
474     while (length--) {
475         unsigned char bc = *b++;
476         if (*a++ != bc)
477             return false;
478     }
479     return true;
480 }
481 
equalIgnoringCase(const UChar * a,const char * b,unsigned length)482 bool equalIgnoringCase(const UChar* a, const char* b, unsigned length)
483 {
484     while (length--) {
485         unsigned char bc = *b++;
486         if (foldCase(*a++) != foldCase(bc))
487             return false;
488     }
489     return true;
490 }
491 
equalIgnoringCase(const UChar * a,const UChar * b,int length)492 static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length)
493 {
494     ASSERT(length >= 0);
495     return umemcasecmp(a, b, length) == 0;
496 }
497 
codePointCompare(const StringImpl * s1,const StringImpl * s2)498 int codePointCompare(const StringImpl* s1, const StringImpl* s2)
499 {
500     const unsigned l1 = s1 ? s1->length() : 0;
501     const unsigned l2 = s2 ? s2->length() : 0;
502     const unsigned lmin = l1 < l2 ? l1 : l2;
503     const UChar* c1 = s1 ? s1->characters() : 0;
504     const UChar* c2 = s2 ? s2->characters() : 0;
505     unsigned pos = 0;
506     while (pos < lmin && *c1 == *c2) {
507         c1++;
508         c2++;
509         pos++;
510     }
511 
512     if (pos < lmin)
513         return (c1[0] > c2[0]) ? 1 : -1;
514 
515     if (l1 == l2)
516         return 0;
517 
518     return (l1 > l2) ? 1 : -1;
519 }
520 
find(UChar c,unsigned start)521 size_t StringImpl::find(UChar c, unsigned start)
522 {
523     return WTF::find(m_data, m_length, c, start);
524 }
525 
find(CharacterMatchFunctionPtr matchFunction,unsigned start)526 size_t StringImpl::find(CharacterMatchFunctionPtr matchFunction, unsigned start)
527 {
528     return WTF::find(m_data, m_length, matchFunction, start);
529 }
530 
find(const char * matchString,unsigned index)531 size_t StringImpl::find(const char* matchString, unsigned index)
532 {
533     // Check for null or empty string to match against
534     if (!matchString)
535         return notFound;
536     size_t matchStringLength = strlen(matchString);
537     if (matchStringLength > numeric_limits<unsigned>::max())
538         CRASH();
539     unsigned matchLength = matchStringLength;
540     if (!matchLength)
541         return min(index, length());
542 
543     // Optimization 1: fast case for strings of length 1.
544     if (matchLength == 1)
545         return WTF::find(characters(), length(), *(const unsigned char*)matchString, index);
546 
547     // Check index & matchLength are in range.
548     if (index > length())
549         return notFound;
550     unsigned searchLength = length() - index;
551     if (matchLength > searchLength)
552         return notFound;
553     // delta is the number of additional times to test; delta == 0 means test only once.
554     unsigned delta = searchLength - matchLength;
555 
556     const UChar* searchCharacters = characters() + index;
557     const unsigned char* matchCharacters = (const unsigned char*)matchString;
558 
559     // Optimization 2: keep a running hash of the strings,
560     // only call memcmp if the hashes match.
561     unsigned searchHash = 0;
562     unsigned matchHash = 0;
563     for (unsigned i = 0; i < matchLength; ++i) {
564         searchHash += searchCharacters[i];
565         matchHash += matchCharacters[i];
566     }
567 
568     unsigned i = 0;
569     // keep looping until we match
570     while (searchHash != matchHash || !equal(searchCharacters + i, matchString, matchLength)) {
571         if (i == delta)
572             return notFound;
573         searchHash += searchCharacters[i + matchLength];
574         searchHash -= searchCharacters[i];
575         ++i;
576     }
577     return index + i;
578 }
579 
findIgnoringCase(const char * matchString,unsigned index)580 size_t StringImpl::findIgnoringCase(const char* matchString, unsigned index)
581 {
582     // Check for null or empty string to match against
583     if (!matchString)
584         return notFound;
585     size_t matchStringLength = strlen(matchString);
586     if (matchStringLength > numeric_limits<unsigned>::max())
587         CRASH();
588     unsigned matchLength = matchStringLength;
589     if (!matchLength)
590         return min(index, length());
591 
592     // Check index & matchLength are in range.
593     if (index > length())
594         return notFound;
595     unsigned searchLength = length() - index;
596     if (matchLength > searchLength)
597         return notFound;
598     // delta is the number of additional times to test; delta == 0 means test only once.
599     unsigned delta = searchLength - matchLength;
600 
601     const UChar* searchCharacters = characters() + index;
602 
603     unsigned i = 0;
604     // keep looping until we match
605     while (!equalIgnoringCase(searchCharacters + i, matchString, matchLength)) {
606         if (i == delta)
607             return notFound;
608         ++i;
609     }
610     return index + i;
611 }
612 
find(StringImpl * matchString,unsigned index)613 size_t StringImpl::find(StringImpl* matchString, unsigned index)
614 {
615     // Check for null or empty string to match against
616     if (!matchString)
617         return notFound;
618     unsigned matchLength = matchString->length();
619     if (!matchLength)
620         return min(index, length());
621 
622     // Optimization 1: fast case for strings of length 1.
623     if (matchLength == 1)
624         return WTF::find(characters(), length(), matchString->characters()[0], index);
625 
626     // Check index & matchLength are in range.
627     if (index > length())
628         return notFound;
629     unsigned searchLength = length() - index;
630     if (matchLength > searchLength)
631         return notFound;
632     // delta is the number of additional times to test; delta == 0 means test only once.
633     unsigned delta = searchLength - matchLength;
634 
635     const UChar* searchCharacters = characters() + index;
636     const UChar* matchCharacters = matchString->characters();
637 
638     // Optimization 2: keep a running hash of the strings,
639     // only call memcmp if the hashes match.
640     unsigned searchHash = 0;
641     unsigned matchHash = 0;
642     for (unsigned i = 0; i < matchLength; ++i) {
643         searchHash += searchCharacters[i];
644         matchHash += matchCharacters[i];
645     }
646 
647     unsigned i = 0;
648     // keep looping until we match
649     while (searchHash != matchHash || memcmp(searchCharacters + i, matchCharacters, matchLength * sizeof(UChar))) {
650         if (i == delta)
651             return notFound;
652         searchHash += searchCharacters[i + matchLength];
653         searchHash -= searchCharacters[i];
654         ++i;
655     }
656     return index + i;
657 }
658 
findIgnoringCase(StringImpl * matchString,unsigned index)659 size_t StringImpl::findIgnoringCase(StringImpl* matchString, unsigned index)
660 {
661     // Check for null or empty string to match against
662     if (!matchString)
663         return notFound;
664     unsigned matchLength = matchString->length();
665     if (!matchLength)
666         return min(index, length());
667 
668     // Check index & matchLength are in range.
669     if (index > length())
670         return notFound;
671     unsigned searchLength = length() - index;
672     if (matchLength > searchLength)
673         return notFound;
674     // delta is the number of additional times to test; delta == 0 means test only once.
675     unsigned delta = searchLength - matchLength;
676 
677     const UChar* searchCharacters = characters() + index;
678     const UChar* matchCharacters = matchString->characters();
679 
680     unsigned i = 0;
681     // keep looping until we match
682     while (!equalIgnoringCase(searchCharacters + i, matchCharacters, matchLength)) {
683         if (i == delta)
684             return notFound;
685         ++i;
686     }
687     return index + i;
688 }
689 
reverseFind(UChar c,unsigned index)690 size_t StringImpl::reverseFind(UChar c, unsigned index)
691 {
692     return WTF::reverseFind(m_data, m_length, c, index);
693 }
694 
reverseFind(StringImpl * matchString,unsigned index)695 size_t StringImpl::reverseFind(StringImpl* matchString, unsigned index)
696 {
697     // Check for null or empty string to match against
698     if (!matchString)
699         return notFound;
700     unsigned matchLength = matchString->length();
701     if (!matchLength)
702         return min(index, length());
703 
704     // Optimization 1: fast case for strings of length 1.
705     if (matchLength == 1)
706         return WTF::reverseFind(characters(), length(), matchString->characters()[0], index);
707 
708     // Check index & matchLength are in range.
709     if (matchLength > length())
710         return notFound;
711     // delta is the number of additional times to test; delta == 0 means test only once.
712     unsigned delta = min(index, length() - matchLength);
713 
714     const UChar *searchCharacters = characters();
715     const UChar *matchCharacters = matchString->characters();
716 
717     // Optimization 2: keep a running hash of the strings,
718     // only call memcmp if the hashes match.
719     unsigned searchHash = 0;
720     unsigned matchHash = 0;
721     for (unsigned i = 0; i < matchLength; ++i) {
722         searchHash += searchCharacters[delta + i];
723         matchHash += matchCharacters[i];
724     }
725 
726     // keep looping until we match
727     while (searchHash != matchHash || memcmp(searchCharacters + delta, matchCharacters, matchLength * sizeof(UChar))) {
728         if (!delta)
729             return notFound;
730         delta--;
731         searchHash -= searchCharacters[delta + matchLength];
732         searchHash += searchCharacters[delta];
733     }
734     return delta;
735 }
736 
reverseFindIgnoringCase(StringImpl * matchString,unsigned index)737 size_t StringImpl::reverseFindIgnoringCase(StringImpl* matchString, unsigned index)
738 {
739     // Check for null or empty string to match against
740     if (!matchString)
741         return notFound;
742     unsigned matchLength = matchString->length();
743     if (!matchLength)
744         return min(index, length());
745 
746     // Check index & matchLength are in range.
747     if (matchLength > length())
748         return notFound;
749     // delta is the number of additional times to test; delta == 0 means test only once.
750     unsigned delta = min(index, length() - matchLength);
751 
752     const UChar *searchCharacters = characters();
753     const UChar *matchCharacters = matchString->characters();
754 
755     // keep looping until we match
756     while (!equalIgnoringCase(searchCharacters + delta, matchCharacters, matchLength)) {
757         if (!delta)
758             return notFound;
759         delta--;
760     }
761     return delta;
762 }
763 
endsWith(StringImpl * m_data,bool caseSensitive)764 bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive)
765 {
766     ASSERT(m_data);
767     if (m_length >= m_data->m_length) {
768         unsigned start = m_length - m_data->m_length;
769         return (caseSensitive ? find(m_data, start) : findIgnoringCase(m_data, start)) == start;
770     }
771     return false;
772 }
773 
replace(UChar oldC,UChar newC)774 PassRefPtr<StringImpl> StringImpl::replace(UChar oldC, UChar newC)
775 {
776     if (oldC == newC)
777         return this;
778     unsigned i;
779     for (i = 0; i != m_length; ++i)
780         if (m_data[i] == oldC)
781             break;
782     if (i == m_length)
783         return this;
784 
785     UChar* data;
786     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data);
787 
788     for (i = 0; i != m_length; ++i) {
789         UChar ch = m_data[i];
790         if (ch == oldC)
791             ch = newC;
792         data[i] = ch;
793     }
794     return newImpl.release();
795 }
796 
replace(unsigned position,unsigned lengthToReplace,StringImpl * str)797 PassRefPtr<StringImpl> StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str)
798 {
799     position = min(position, length());
800     lengthToReplace = min(lengthToReplace, length() - position);
801     unsigned lengthToInsert = str ? str->length() : 0;
802     if (!lengthToReplace && !lengthToInsert)
803         return this;
804     UChar* data;
805 
806     if ((length() - lengthToReplace) >= (numeric_limits<unsigned>::max() - lengthToInsert))
807         CRASH();
808 
809     RefPtr<StringImpl> newImpl =
810         createUninitialized(length() - lengthToReplace + lengthToInsert, data);
811     memcpy(data, characters(), position * sizeof(UChar));
812     if (str)
813         memcpy(data + position, str->characters(), lengthToInsert * sizeof(UChar));
814     memcpy(data + position + lengthToInsert, characters() + position + lengthToReplace,
815         (length() - position - lengthToReplace) * sizeof(UChar));
816     return newImpl.release();
817 }
818 
replace(UChar pattern,StringImpl * replacement)819 PassRefPtr<StringImpl> StringImpl::replace(UChar pattern, StringImpl* replacement)
820 {
821     if (!replacement)
822         return this;
823 
824     unsigned repStrLength = replacement->length();
825     size_t srcSegmentStart = 0;
826     unsigned matchCount = 0;
827 
828     // Count the matches
829     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
830         ++matchCount;
831         ++srcSegmentStart;
832     }
833 
834     // If we have 0 matches, we don't have to do any more work
835     if (!matchCount)
836         return this;
837 
838     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
839         CRASH();
840 
841     unsigned replaceSize = matchCount * repStrLength;
842     unsigned newSize = m_length - matchCount;
843     if (newSize >= (numeric_limits<unsigned>::max() - replaceSize))
844         CRASH();
845 
846     newSize += replaceSize;
847 
848     UChar* data;
849     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
850 
851     // Construct the new data
852     size_t srcSegmentEnd;
853     unsigned srcSegmentLength;
854     srcSegmentStart = 0;
855     unsigned dstOffset = 0;
856 
857     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
858         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
859         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
860         dstOffset += srcSegmentLength;
861         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
862         dstOffset += repStrLength;
863         srcSegmentStart = srcSegmentEnd + 1;
864     }
865 
866     srcSegmentLength = m_length - srcSegmentStart;
867     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
868 
869     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
870 
871     return newImpl.release();
872 }
873 
replace(StringImpl * pattern,StringImpl * replacement)874 PassRefPtr<StringImpl> StringImpl::replace(StringImpl* pattern, StringImpl* replacement)
875 {
876     if (!pattern || !replacement)
877         return this;
878 
879     unsigned patternLength = pattern->length();
880     if (!patternLength)
881         return this;
882 
883     unsigned repStrLength = replacement->length();
884     size_t srcSegmentStart = 0;
885     unsigned matchCount = 0;
886 
887     // Count the matches
888     while ((srcSegmentStart = find(pattern, srcSegmentStart)) != notFound) {
889         ++matchCount;
890         srcSegmentStart += patternLength;
891     }
892 
893     // If we have 0 matches, we don't have to do any more work
894     if (!matchCount)
895         return this;
896 
897     unsigned newSize = m_length - matchCount * patternLength;
898     if (repStrLength && matchCount > numeric_limits<unsigned>::max() / repStrLength)
899         CRASH();
900 
901     if (newSize > (numeric_limits<unsigned>::max() - matchCount * repStrLength))
902         CRASH();
903 
904     newSize += matchCount * repStrLength;
905 
906     UChar* data;
907     RefPtr<StringImpl> newImpl = createUninitialized(newSize, data);
908 
909     // Construct the new data
910     size_t srcSegmentEnd;
911     unsigned srcSegmentLength;
912     srcSegmentStart = 0;
913     unsigned dstOffset = 0;
914 
915     while ((srcSegmentEnd = find(pattern, srcSegmentStart)) != notFound) {
916         srcSegmentLength = srcSegmentEnd - srcSegmentStart;
917         memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
918         dstOffset += srcSegmentLength;
919         memcpy(data + dstOffset, replacement->m_data, repStrLength * sizeof(UChar));
920         dstOffset += repStrLength;
921         srcSegmentStart = srcSegmentEnd + patternLength;
922     }
923 
924     srcSegmentLength = m_length - srcSegmentStart;
925     memcpy(data + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar));
926 
927     ASSERT(dstOffset + srcSegmentLength == newImpl->length());
928 
929     return newImpl.release();
930 }
931 
equal(const StringImpl * a,const StringImpl * b)932 bool equal(const StringImpl* a, const StringImpl* b)
933 {
934     return StringHash::equal(a, b);
935 }
936 
equal(const StringImpl * a,const char * b)937 bool equal(const StringImpl* a, const char* b)
938 {
939     if (!a)
940         return !b;
941     if (!b)
942         return !a;
943 
944     unsigned length = a->length();
945     const UChar* as = a->characters();
946     for (unsigned i = 0; i != length; ++i) {
947         unsigned char bc = b[i];
948         if (!bc)
949             return false;
950         if (as[i] != bc)
951             return false;
952     }
953 
954     return !b[length];
955 }
956 
equalIgnoringCase(StringImpl * a,StringImpl * b)957 bool equalIgnoringCase(StringImpl* a, StringImpl* b)
958 {
959     return CaseFoldingHash::equal(a, b);
960 }
961 
equalIgnoringCase(StringImpl * a,const char * b)962 bool equalIgnoringCase(StringImpl* a, const char* b)
963 {
964     if (!a)
965         return !b;
966     if (!b)
967         return !a;
968 
969     unsigned length = a->length();
970     const UChar* as = a->characters();
971 
972     // Do a faster loop for the case where all the characters are ASCII.
973     UChar ored = 0;
974     bool equal = true;
975     for (unsigned i = 0; i != length; ++i) {
976         char bc = b[i];
977         if (!bc)
978             return false;
979         UChar ac = as[i];
980         ored |= ac;
981         equal = equal && (toASCIILower(ac) == toASCIILower(bc));
982     }
983 
984     // Do a slower implementation for cases that include non-ASCII characters.
985     if (ored & ~0x7F) {
986         equal = true;
987         for (unsigned i = 0; i != length; ++i) {
988             unsigned char bc = b[i];
989             equal = equal && (foldCase(as[i]) == foldCase(bc));
990         }
991     }
992 
993     return equal && !b[length];
994 }
995 
equalIgnoringNullity(StringImpl * a,StringImpl * b)996 bool equalIgnoringNullity(StringImpl* a, StringImpl* b)
997 {
998     if (StringHash::equal(a, b))
999         return true;
1000     if (!a && b && !b->length())
1001         return true;
1002     if (!b && a && !a->length())
1003         return true;
1004 
1005     return false;
1006 }
1007 
defaultWritingDirection(bool * hasStrongDirectionality)1008 WTF::Unicode::Direction StringImpl::defaultWritingDirection(bool* hasStrongDirectionality)
1009 {
1010     for (unsigned i = 0; i < m_length; ++i) {
1011         WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]);
1012         if (charDirection == WTF::Unicode::LeftToRight) {
1013             if (hasStrongDirectionality)
1014                 *hasStrongDirectionality = true;
1015             return WTF::Unicode::LeftToRight;
1016         }
1017         if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) {
1018             if (hasStrongDirectionality)
1019                 *hasStrongDirectionality = true;
1020             return WTF::Unicode::RightToLeft;
1021         }
1022     }
1023     if (hasStrongDirectionality)
1024         *hasStrongDirectionality = false;
1025     return WTF::Unicode::LeftToRight;
1026 }
1027 
1028 // This is a hot function because it's used when parsing HTML.
createStrippingNullCharactersSlowCase(const UChar * characters,unsigned length)1029 PassRefPtr<StringImpl> StringImpl::createStrippingNullCharactersSlowCase(const UChar* characters, unsigned length)
1030 {
1031     StringBuffer strippedCopy(length);
1032     unsigned strippedLength = 0;
1033     for (unsigned i = 0; i < length; i++) {
1034         if (int c = characters[i])
1035             strippedCopy[strippedLength++] = c;
1036     }
1037     ASSERT(strippedLength < length);  // Only take the slow case when stripping.
1038     strippedCopy.shrink(strippedLength);
1039     return adopt(strippedCopy);
1040 }
1041 
adopt(StringBuffer & buffer)1042 PassRefPtr<StringImpl> StringImpl::adopt(StringBuffer& buffer)
1043 {
1044     unsigned length = buffer.length();
1045     if (length == 0)
1046         return empty();
1047     return adoptRef(new StringImpl(buffer.release(), length));
1048 }
1049 
createWithTerminatingNullCharacter(const StringImpl & string)1050 PassRefPtr<StringImpl> StringImpl::createWithTerminatingNullCharacter(const StringImpl& string)
1051 {
1052     // Use createUninitialized instead of 'new StringImpl' so that the string and its buffer
1053     // get allocated in a single memory block.
1054     UChar* data;
1055     unsigned length = string.m_length;
1056     if (length >= numeric_limits<unsigned>::max())
1057         CRASH();
1058     RefPtr<StringImpl> terminatedString = createUninitialized(length + 1, data);
1059     memcpy(data, string.m_data, length * sizeof(UChar));
1060     data[length] = 0;
1061     terminatedString->m_length--;
1062     terminatedString->m_hash = string.m_hash;
1063     terminatedString->m_refCountAndFlags |= s_refCountFlagHasTerminatingNullCharacter;
1064     return terminatedString.release();
1065 }
1066 
threadsafeCopy() const1067 PassRefPtr<StringImpl> StringImpl::threadsafeCopy() const
1068 {
1069     return create(m_data, m_length);
1070 }
1071 
crossThreadString()1072 PassRefPtr<StringImpl> StringImpl::crossThreadString()
1073 {
1074     if (SharedUChar* sharedBuffer = this->sharedBuffer())
1075         return adoptRef(new StringImpl(m_data, m_length, sharedBuffer->crossThreadCopy()));
1076 
1077     // If no shared buffer is available, create a copy.
1078     return threadsafeCopy();
1079 }
1080 
1081 } // namespace WTF
1082