• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2009 Google Inc. All rights reserved.
6  *
7  *  This library is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU Library General Public
9  *  License as published by the Free Software Foundation; either
10  *  version 2 of the License, or (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  *  Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Library General Public License
18  *  along with this library; see the file COPYING.LIB.  If not, write to
19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  *  Boston, MA 02110-1301, USA.
21  *
22  */
23 
24 #include "config.h"
25 #include "UString.h"
26 
27 #include "JSGlobalObjectFunctions.h"
28 #include "Collector.h"
29 #include "dtoa.h"
30 #include "Identifier.h"
31 #include "Operations.h"
32 #include <ctype.h>
33 #include <limits.h>
34 #include <limits>
35 #include <math.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <wtf/ASCIICType.h>
40 #include <wtf/Assertions.h>
41 #include <wtf/MathExtras.h>
42 #include <wtf/StringExtras.h>
43 #include <wtf/Vector.h>
44 #include <wtf/unicode/UTF8.h>
45 #include <wtf/StringExtras.h>
46 
47 #if HAVE(STRINGS_H)
48 #include <strings.h>
49 #endif
50 
51 using namespace WTF;
52 using namespace WTF::Unicode;
53 using namespace std;
54 
55 namespace JSC {
56 
57 extern const double NaN;
58 extern const double Inf;
59 
CString(const char * c)60 CString::CString(const char* c)
61     : m_length(strlen(c))
62     , m_data(new char[m_length + 1])
63 {
64     memcpy(m_data, c, m_length + 1);
65 }
66 
CString(const char * c,size_t length)67 CString::CString(const char* c, size_t length)
68     : m_length(length)
69     , m_data(new char[length + 1])
70 {
71     memcpy(m_data, c, m_length);
72     m_data[m_length] = 0;
73 }
74 
CString(const CString & b)75 CString::CString(const CString& b)
76 {
77     m_length = b.m_length;
78     if (b.m_data) {
79         m_data = new char[m_length + 1];
80         memcpy(m_data, b.m_data, m_length + 1);
81     } else
82         m_data = 0;
83 }
84 
~CString()85 CString::~CString()
86 {
87     delete [] m_data;
88 }
89 
adopt(char * c,size_t length)90 CString CString::adopt(char* c, size_t length)
91 {
92     CString s;
93     s.m_data = c;
94     s.m_length = length;
95     return s;
96 }
97 
append(const CString & t)98 CString& CString::append(const CString& t)
99 {
100     char* n;
101     n = new char[m_length + t.m_length + 1];
102     if (m_length)
103         memcpy(n, m_data, m_length);
104     if (t.m_length)
105         memcpy(n + m_length, t.m_data, t.m_length);
106     m_length += t.m_length;
107     n[m_length] = 0;
108 
109     delete [] m_data;
110     m_data = n;
111 
112     return *this;
113 }
114 
operator =(const char * c)115 CString& CString::operator=(const char* c)
116 {
117     if (m_data)
118         delete [] m_data;
119     m_length = strlen(c);
120     m_data = new char[m_length + 1];
121     memcpy(m_data, c, m_length + 1);
122 
123     return *this;
124 }
125 
operator =(const CString & str)126 CString& CString::operator=(const CString& str)
127 {
128     if (this == &str)
129         return *this;
130 
131     if (m_data)
132         delete [] m_data;
133     m_length = str.m_length;
134     if (str.m_data) {
135         m_data = new char[m_length + 1];
136         memcpy(m_data, str.m_data, m_length + 1);
137     } else
138         m_data = 0;
139 
140     return *this;
141 }
142 
operator ==(const CString & c1,const CString & c2)143 bool operator==(const CString& c1, const CString& c2)
144 {
145     size_t len = c1.size();
146     return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
147 }
148 
149 // These static strings are immutable, except for rc, whose initial value is chosen to
150 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
151 static UChar sharedEmptyChar;
152 UStringImpl* UStringImpl::s_empty;
153 
154 UString::Rep* UString::s_nullRep;
155 UString* UString::s_nullUString;
156 
initializeUString()157 void initializeUString()
158 {
159     UStringImpl::s_empty = new UStringImpl(&sharedEmptyChar, 0, UStringImpl::ConstructStaticString);
160 
161     UString::s_nullRep = new UStringImpl(0, 0, UStringImpl::ConstructStaticString);
162     UString::s_nullUString = new UString;
163 }
164 
UString(const char * c)165 UString::UString(const char* c)
166     : m_rep(Rep::create(c))
167 {
168 }
169 
UString(const char * c,int length)170 UString::UString(const char* c, int length)
171     : m_rep(Rep::create(c, length))
172 {
173 }
174 
UString(const UChar * c,int length)175 UString::UString(const UChar* c, int length)
176 {
177     if (length == 0)
178         m_rep = &Rep::empty();
179     else
180         m_rep = Rep::create(c, length);
181 }
182 
from(int i)183 UString UString::from(int i)
184 {
185     UChar buf[1 + sizeof(i) * 3];
186     UChar* end = buf + sizeof(buf) / sizeof(UChar);
187     UChar* p = end;
188 
189     if (i == 0)
190         *--p = '0';
191     else if (i == INT_MIN) {
192         char minBuf[1 + sizeof(i) * 3];
193         sprintf(minBuf, "%d", INT_MIN);
194         return UString(minBuf);
195     } else {
196         bool negative = false;
197         if (i < 0) {
198             negative = true;
199             i = -i;
200         }
201         while (i) {
202             *--p = static_cast<unsigned short>((i % 10) + '0');
203             i /= 10;
204         }
205         if (negative)
206             *--p = '-';
207     }
208 
209     return UString(p, static_cast<int>(end - p));
210 }
211 
from(long long i)212 UString UString::from(long long i)
213 {
214     UChar buf[1 + sizeof(i) * 3];
215     UChar* end = buf + sizeof(buf) / sizeof(UChar);
216     UChar* p = end;
217 
218     if (i == 0)
219         *--p = '0';
220     else if (i == std::numeric_limits<long long>::min()) {
221         char minBuf[1 + sizeof(i) * 3];
222 #if OS(WINDOWS)
223         snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
224 #else
225         snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
226 #endif
227         return UString(minBuf);
228     } else {
229         bool negative = false;
230         if (i < 0) {
231             negative = true;
232             i = -i;
233         }
234         while (i) {
235             *--p = static_cast<unsigned short>((i % 10) + '0');
236             i /= 10;
237         }
238         if (negative)
239             *--p = '-';
240     }
241 
242     return UString(p, static_cast<int>(end - p));
243 }
244 
from(unsigned int u)245 UString UString::from(unsigned int u)
246 {
247     UChar buf[sizeof(u) * 3];
248     UChar* end = buf + sizeof(buf) / sizeof(UChar);
249     UChar* p = end;
250 
251     if (u == 0)
252         *--p = '0';
253     else {
254         while (u) {
255             *--p = static_cast<unsigned short>((u % 10) + '0');
256             u /= 10;
257         }
258     }
259 
260     return UString(p, static_cast<int>(end - p));
261 }
262 
from(long l)263 UString UString::from(long l)
264 {
265     UChar buf[1 + sizeof(l) * 3];
266     UChar* end = buf + sizeof(buf) / sizeof(UChar);
267     UChar* p = end;
268 
269     if (l == 0)
270         *--p = '0';
271     else if (l == LONG_MIN) {
272         char minBuf[1 + sizeof(l) * 3];
273         sprintf(minBuf, "%ld", LONG_MIN);
274         return UString(minBuf);
275     } else {
276         bool negative = false;
277         if (l < 0) {
278             negative = true;
279             l = -l;
280         }
281         while (l) {
282             *--p = static_cast<unsigned short>((l % 10) + '0');
283             l /= 10;
284         }
285         if (negative)
286             *--p = '-';
287     }
288 
289     return UString(p, static_cast<int>(end - p));
290 }
291 
from(double d)292 UString UString::from(double d)
293 {
294     DtoaBuffer buffer;
295     unsigned length;
296     doubleToStringInJavaScriptFormat(d, buffer, &length);
297     return UString(buffer, length);
298 }
299 
getCString(CStringBuffer & buffer) const300 bool UString::getCString(CStringBuffer& buffer) const
301 {
302     int length = size();
303     int neededSize = length + 1;
304     buffer.resize(neededSize);
305     char* buf = buffer.data();
306 
307     UChar ored = 0;
308     const UChar* p = data();
309     char* q = buf;
310     const UChar* limit = p + length;
311     while (p != limit) {
312         UChar c = p[0];
313         ored |= c;
314         *q = static_cast<char>(c);
315         ++p;
316         ++q;
317     }
318     *q = '\0';
319 
320     return !(ored & 0xFF00);
321 }
322 
ascii() const323 char* UString::ascii() const
324 {
325     static char* asciiBuffer = 0;
326 
327     int length = size();
328     int neededSize = length + 1;
329     delete[] asciiBuffer;
330     asciiBuffer = new char[neededSize];
331 
332     const UChar* p = data();
333     char* q = asciiBuffer;
334     const UChar* limit = p + length;
335     while (p != limit) {
336         *q = static_cast<char>(p[0]);
337         ++p;
338         ++q;
339     }
340     *q = '\0';
341 
342     return asciiBuffer;
343 }
344 
is8Bit() const345 bool UString::is8Bit() const
346 {
347     const UChar* u = data();
348     const UChar* limit = u + size();
349     while (u < limit) {
350         if (u[0] > 0xFF)
351             return false;
352         ++u;
353     }
354 
355     return true;
356 }
357 
operator [](int pos) const358 UChar UString::operator[](int pos) const
359 {
360     if (pos >= size())
361         return '\0';
362     return data()[pos];
363 }
364 
toDouble(bool tolerateTrailingJunk,bool tolerateEmptyString) const365 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
366 {
367     if (size() == 1) {
368         UChar c = data()[0];
369         if (isASCIIDigit(c))
370             return c - '0';
371         if (isASCIISpace(c) && tolerateEmptyString)
372             return 0;
373         return NaN;
374     }
375 
376     // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
377     // after the number, so this is too strict a check.
378     CStringBuffer s;
379     if (!getCString(s))
380         return NaN;
381     const char* c = s.data();
382 
383     // skip leading white space
384     while (isASCIISpace(*c))
385         c++;
386 
387     // empty string ?
388     if (*c == '\0')
389         return tolerateEmptyString ? 0.0 : NaN;
390 
391     double d;
392 
393     // hex number ?
394     if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
395         const char* firstDigitPosition = c + 2;
396         c++;
397         d = 0.0;
398         while (*(++c)) {
399             if (*c >= '0' && *c <= '9')
400                 d = d * 16.0 + *c - '0';
401             else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
402                 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
403             else
404                 break;
405         }
406 
407         if (d >= mantissaOverflowLowerBound)
408             d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
409     } else {
410         // regular number ?
411         char* end;
412         d = WTF::strtod(c, &end);
413         if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
414             c = end;
415         } else {
416             double sign = 1.0;
417 
418             if (*c == '+')
419                 c++;
420             else if (*c == '-') {
421                 sign = -1.0;
422                 c++;
423             }
424 
425             // We used strtod() to do the conversion. However, strtod() handles
426             // infinite values slightly differently than JavaScript in that it
427             // converts the string "inf" with any capitalization to infinity,
428             // whereas the ECMA spec requires that it be converted to NaN.
429 
430             if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
431                 d = sign * Inf;
432                 c += 8;
433             } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
434                 c = end;
435             else
436                 return NaN;
437         }
438     }
439 
440     // allow trailing white space
441     while (isASCIISpace(*c))
442         c++;
443     // don't allow anything after - unless tolerant=true
444     if (!tolerateTrailingJunk && *c != '\0')
445         d = NaN;
446 
447     return d;
448 }
449 
toDouble(bool tolerateTrailingJunk) const450 double UString::toDouble(bool tolerateTrailingJunk) const
451 {
452     return toDouble(tolerateTrailingJunk, true);
453 }
454 
toDouble() const455 double UString::toDouble() const
456 {
457     return toDouble(false, true);
458 }
459 
toUInt32(bool * ok) const460 uint32_t UString::toUInt32(bool* ok) const
461 {
462     double d = toDouble();
463     bool b = true;
464 
465     if (d != static_cast<uint32_t>(d)) {
466         b = false;
467         d = 0;
468     }
469 
470     if (ok)
471         *ok = b;
472 
473     return static_cast<uint32_t>(d);
474 }
475 
toUInt32(bool * ok,bool tolerateEmptyString) const476 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
477 {
478     double d = toDouble(false, tolerateEmptyString);
479     bool b = true;
480 
481     if (d != static_cast<uint32_t>(d)) {
482         b = false;
483         d = 0;
484     }
485 
486     if (ok)
487         *ok = b;
488 
489     return static_cast<uint32_t>(d);
490 }
491 
toStrictUInt32(bool * ok) const492 uint32_t UString::toStrictUInt32(bool* ok) const
493 {
494     if (ok)
495         *ok = false;
496 
497     // Empty string is not OK.
498     int len = m_rep->size();
499     if (len == 0)
500         return 0;
501     const UChar* p = m_rep->data();
502     unsigned short c = p[0];
503 
504     // If the first digit is 0, only 0 itself is OK.
505     if (c == '0') {
506         if (len == 1 && ok)
507             *ok = true;
508         return 0;
509     }
510 
511     // Convert to UInt32, checking for overflow.
512     uint32_t i = 0;
513     while (1) {
514         // Process character, turning it into a digit.
515         if (c < '0' || c > '9')
516             return 0;
517         const unsigned d = c - '0';
518 
519         // Multiply by 10, checking for overflow out of 32 bits.
520         if (i > 0xFFFFFFFFU / 10)
521             return 0;
522         i *= 10;
523 
524         // Add in the digit, checking for overflow out of 32 bits.
525         const unsigned max = 0xFFFFFFFFU - d;
526         if (i > max)
527             return 0;
528         i += d;
529 
530         // Handle end of string.
531         if (--len == 0) {
532             if (ok)
533                 *ok = true;
534             return i;
535         }
536 
537         // Get next character.
538         c = *(++p);
539     }
540 }
541 
find(const UString & f,int pos) const542 int UString::find(const UString& f, int pos) const
543 {
544     int fsz = f.size();
545 
546     if (pos < 0)
547         pos = 0;
548 
549     if (fsz == 1) {
550         UChar ch = f[0];
551         const UChar* end = data() + size();
552         for (const UChar* c = data() + pos; c < end; c++) {
553             if (*c == ch)
554                 return static_cast<int>(c - data());
555         }
556         return -1;
557     }
558 
559     int sz = size();
560     if (sz < fsz)
561         return -1;
562     if (fsz == 0)
563         return pos;
564     const UChar* end = data() + sz - fsz;
565     int fsizeminusone = (fsz - 1) * sizeof(UChar);
566     const UChar* fdata = f.data();
567     unsigned short fchar = fdata[0];
568     ++fdata;
569     for (const UChar* c = data() + pos; c <= end; c++) {
570         if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
571             return static_cast<int>(c - data());
572     }
573 
574     return -1;
575 }
576 
find(UChar ch,int pos) const577 int UString::find(UChar ch, int pos) const
578 {
579     if (pos < 0)
580         pos = 0;
581     const UChar* end = data() + size();
582     for (const UChar* c = data() + pos; c < end; c++) {
583         if (*c == ch)
584             return static_cast<int>(c - data());
585     }
586 
587     return -1;
588 }
589 
rfind(const UString & f,int pos) const590 int UString::rfind(const UString& f, int pos) const
591 {
592     int sz = size();
593     int fsz = f.size();
594     if (sz < fsz)
595         return -1;
596     if (pos < 0)
597         pos = 0;
598     if (pos > sz - fsz)
599         pos = sz - fsz;
600     if (fsz == 0)
601         return pos;
602     int fsizeminusone = (fsz - 1) * sizeof(UChar);
603     const UChar* fdata = f.data();
604     for (const UChar* c = data() + pos; c >= data(); c--) {
605         if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
606             return static_cast<int>(c - data());
607     }
608 
609     return -1;
610 }
611 
rfind(UChar ch,int pos) const612 int UString::rfind(UChar ch, int pos) const
613 {
614     if (isEmpty())
615         return -1;
616     if (pos + 1 >= size())
617         pos = size() - 1;
618     for (const UChar* c = data() + pos; c >= data(); c--) {
619         if (*c == ch)
620             return static_cast<int>(c - data());
621     }
622 
623     return -1;
624 }
625 
substr(int pos,int len) const626 UString UString::substr(int pos, int len) const
627 {
628     int s = size();
629 
630     if (pos < 0)
631         pos = 0;
632     else if (pos >= s)
633         pos = s;
634     if (len < 0)
635         len = s;
636     if (pos + len >= s)
637         len = s - pos;
638 
639     if (pos == 0 && len == s)
640         return *this;
641 
642     return UString(Rep::create(m_rep, pos, len));
643 }
644 
operator ==(const UString & s1,const char * s2)645 bool operator==(const UString& s1, const char *s2)
646 {
647     if (s2 == 0)
648         return s1.isEmpty();
649 
650     const UChar* u = s1.data();
651     const UChar* uend = u + s1.size();
652     while (u != uend && *s2) {
653         if (u[0] != (unsigned char)*s2)
654             return false;
655         s2++;
656         u++;
657     }
658 
659     return u == uend && *s2 == 0;
660 }
661 
operator <(const UString & s1,const UString & s2)662 bool operator<(const UString& s1, const UString& s2)
663 {
664     const int l1 = s1.size();
665     const int l2 = s2.size();
666     const int lmin = l1 < l2 ? l1 : l2;
667     const UChar* c1 = s1.data();
668     const UChar* c2 = s2.data();
669     int l = 0;
670     while (l < lmin && *c1 == *c2) {
671         c1++;
672         c2++;
673         l++;
674     }
675     if (l < lmin)
676         return (c1[0] < c2[0]);
677 
678     return (l1 < l2);
679 }
680 
operator >(const UString & s1,const UString & s2)681 bool operator>(const UString& s1, const UString& s2)
682 {
683     const int l1 = s1.size();
684     const int l2 = s2.size();
685     const int lmin = l1 < l2 ? l1 : l2;
686     const UChar* c1 = s1.data();
687     const UChar* c2 = s2.data();
688     int l = 0;
689     while (l < lmin && *c1 == *c2) {
690         c1++;
691         c2++;
692         l++;
693     }
694     if (l < lmin)
695         return (c1[0] > c2[0]);
696 
697     return (l1 > l2);
698 }
699 
compare(const UString & s1,const UString & s2)700 int compare(const UString& s1, const UString& s2)
701 {
702     const int l1 = s1.size();
703     const int l2 = s2.size();
704     const int lmin = l1 < l2 ? l1 : l2;
705     const UChar* c1 = s1.data();
706     const UChar* c2 = s2.data();
707     int l = 0;
708     while (l < lmin && *c1 == *c2) {
709         c1++;
710         c2++;
711         l++;
712     }
713 
714     if (l < lmin)
715         return (c1[0] > c2[0]) ? 1 : -1;
716 
717     if (l1 == l2)
718         return 0;
719 
720     return (l1 > l2) ? 1 : -1;
721 }
722 
equal(const UString::Rep * r,const UString::Rep * b)723 bool equal(const UString::Rep* r, const UString::Rep* b)
724 {
725     int length = r->size();
726     if (length != b->size())
727         return false;
728     const UChar* d = r->data();
729     const UChar* s = b->data();
730     for (int i = 0; i != length; ++i) {
731         if (d[i] != s[i])
732             return false;
733     }
734     return true;
735 }
736 
UTF8String(bool strict) const737 CString UString::UTF8String(bool strict) const
738 {
739     // Allocate a buffer big enough to hold all the characters.
740     const int length = size();
741     Vector<char, 1024> buffer(length * 3);
742 
743     // Convert to runs of 8-bit characters.
744     char* p = buffer.data();
745     const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
746     ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
747     if (result != conversionOK)
748         return CString();
749 
750     return CString(buffer.data(), p - buffer.data());
751 }
752 
753 } // namespace JSC
754