• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5  *  Copyright (C) 2009 Google Inc. All rights reserved.
6  *
7  *  This library is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU Library General Public
9  *  License as published by the Free Software Foundation; either
10  *  version 2 of the License, or (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  *  Library General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Library General Public License
18  *  along with this library; see the file COPYING.LIB.  If not, write to
19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  *  Boston, MA 02110-1301, USA.
21  *
22  */
23 
24 #include "config.h"
25 #include "UString.h"
26 
27 #include "JSGlobalObjectFunctions.h"
28 #include "Heap.h"
29 #include "Identifier.h"
30 #include "Operations.h"
31 #include <ctype.h>
32 #include <limits.h>
33 #include <limits>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <wtf/ASCIICType.h>
37 #include <wtf/Assertions.h>
38 #include <wtf/DecimalNumber.h>
39 #include <wtf/MathExtras.h>
40 #include <wtf/StringExtras.h>
41 #include <wtf/Vector.h>
42 #include <wtf/unicode/UTF8.h>
43 
44 #if HAVE(STRINGS_H)
45 #include <strings.h>
46 #endif
47 
48 using namespace WTF;
49 using namespace WTF::Unicode;
50 using namespace std;
51 
52 namespace JSC {
53 
54 extern const double NaN;
55 extern const double Inf;
56 
57 COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
58 
59 // Construct a string with UTF-16 data.
UString(const UChar * characters,unsigned length)60 UString::UString(const UChar* characters, unsigned length)
61     : m_impl(characters ? StringImpl::create(characters, length) : 0)
62 {
63 }
64 
65 // Construct a string with UTF-16 data, from a null-terminated source.
UString(const UChar * characters)66 UString::UString(const UChar* characters)
67 {
68     if (!characters)
69         return;
70 
71     int length = 0;
72     while (characters[length] != UChar(0))
73         ++length;
74 
75     m_impl = StringImpl::create(characters, length);
76 }
77 
78 // Construct a string with latin1 data.
UString(const char * characters,unsigned length)79 UString::UString(const char* characters, unsigned length)
80     : m_impl(characters ? StringImpl::create(characters, length) : 0)
81 {
82 }
83 
84 // Construct a string with latin1 data, from a null-terminated source.
UString(const char * characters)85 UString::UString(const char* characters)
86     : m_impl(characters ? StringImpl::create(characters) : 0)
87 {
88 }
89 
number(int i)90 UString UString::number(int i)
91 {
92     UChar buf[1 + sizeof(i) * 3];
93     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
94     UChar* p = end;
95 
96     if (i == 0)
97         *--p = '0';
98     else if (i == INT_MIN) {
99         char minBuf[1 + sizeof(i) * 3];
100         snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
101         return UString(minBuf);
102     } else {
103         bool negative = false;
104         if (i < 0) {
105             negative = true;
106             i = -i;
107         }
108         while (i) {
109             *--p = static_cast<unsigned short>((i % 10) + '0');
110             i /= 10;
111         }
112         if (negative)
113             *--p = '-';
114     }
115 
116     return UString(p, static_cast<unsigned>(end - p));
117 }
118 
number(long long i)119 UString UString::number(long long i)
120 {
121     UChar buf[1 + sizeof(i) * 3];
122     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
123     UChar* p = end;
124 
125     if (i == 0)
126         *--p = '0';
127     else if (i == std::numeric_limits<long long>::min()) {
128         char minBuf[1 + sizeof(i) * 3];
129 #if OS(WINDOWS)
130         snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
131 #else
132         snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
133 #endif
134         return UString(minBuf);
135     } else {
136         bool negative = false;
137         if (i < 0) {
138             negative = true;
139             i = -i;
140         }
141         while (i) {
142             *--p = static_cast<unsigned short>((i % 10) + '0');
143             i /= 10;
144         }
145         if (negative)
146             *--p = '-';
147     }
148 
149     return UString(p, static_cast<unsigned>(end - p));
150 }
151 
number(unsigned u)152 UString UString::number(unsigned u)
153 {
154     UChar buf[sizeof(u) * 3];
155     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
156     UChar* p = end;
157 
158     if (u == 0)
159         *--p = '0';
160     else {
161         while (u) {
162             *--p = static_cast<unsigned short>((u % 10) + '0');
163             u /= 10;
164         }
165     }
166 
167     return UString(p, static_cast<unsigned>(end - p));
168 }
169 
number(long l)170 UString UString::number(long l)
171 {
172     UChar buf[1 + sizeof(l) * 3];
173     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
174     UChar* p = end;
175 
176     if (l == 0)
177         *--p = '0';
178     else if (l == LONG_MIN) {
179         char minBuf[1 + sizeof(l) * 3];
180         snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
181         return UString(minBuf);
182     } else {
183         bool negative = false;
184         if (l < 0) {
185             negative = true;
186             l = -l;
187         }
188         while (l) {
189             *--p = static_cast<unsigned short>((l % 10) + '0');
190             l /= 10;
191         }
192         if (negative)
193             *--p = '-';
194     }
195 
196     return UString(p, end - p);
197 }
198 
number(double d)199 UString UString::number(double d)
200 {
201     NumberToStringBuffer buffer;
202     unsigned length = numberToString(d, buffer);
203     return UString(buffer, length);
204 }
205 
substringSharingImpl(unsigned offset,unsigned length) const206 UString UString::substringSharingImpl(unsigned offset, unsigned length) const
207 {
208     // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
209 
210     unsigned stringLength = this->length();
211     offset = min(offset, stringLength);
212     length = min(length, stringLength - offset);
213 
214     if (!offset && length == stringLength)
215         return *this;
216     return UString(StringImpl::create(m_impl, offset, length));
217 }
218 
operator ==(const UString & s1,const char * s2)219 bool operator==(const UString& s1, const char *s2)
220 {
221     if (s2 == 0)
222         return s1.isEmpty();
223 
224     const UChar* u = s1.characters();
225     const UChar* uend = u + s1.length();
226     while (u != uend && *s2) {
227         if (u[0] != (unsigned char)*s2)
228             return false;
229         s2++;
230         u++;
231     }
232 
233     return u == uend && *s2 == 0;
234 }
235 
operator <(const UString & s1,const UString & s2)236 bool operator<(const UString& s1, const UString& s2)
237 {
238     const unsigned l1 = s1.length();
239     const unsigned l2 = s2.length();
240     const unsigned lmin = l1 < l2 ? l1 : l2;
241     const UChar* c1 = s1.characters();
242     const UChar* c2 = s2.characters();
243     unsigned l = 0;
244     while (l < lmin && *c1 == *c2) {
245         c1++;
246         c2++;
247         l++;
248     }
249     if (l < lmin)
250         return (c1[0] < c2[0]);
251 
252     return (l1 < l2);
253 }
254 
operator >(const UString & s1,const UString & s2)255 bool operator>(const UString& s1, const UString& s2)
256 {
257     const unsigned l1 = s1.length();
258     const unsigned l2 = s2.length();
259     const unsigned lmin = l1 < l2 ? l1 : l2;
260     const UChar* c1 = s1.characters();
261     const UChar* c2 = s2.characters();
262     unsigned l = 0;
263     while (l < lmin && *c1 == *c2) {
264         c1++;
265         c2++;
266         l++;
267     }
268     if (l < lmin)
269         return (c1[0] > c2[0]);
270 
271     return (l1 > l2);
272 }
273 
ascii() const274 CString UString::ascii() const
275 {
276     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
277     // preserved, characters outside of this range are converted to '?'.
278 
279     unsigned length = this->length();
280     const UChar* characters = this->characters();
281 
282     char* characterBuffer;
283     CString result = CString::newUninitialized(length, characterBuffer);
284 
285     for (unsigned i = 0; i < length; ++i) {
286         UChar ch = characters[i];
287         characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch;
288     }
289 
290     return result;
291 }
292 
latin1() const293 CString UString::latin1() const
294 {
295     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
296     // preserved, characters outside of this range are converted to '?'.
297 
298     unsigned length = this->length();
299     const UChar* characters = this->characters();
300 
301     char* characterBuffer;
302     CString result = CString::newUninitialized(length, characterBuffer);
303 
304     for (unsigned i = 0; i < length; ++i) {
305         UChar ch = characters[i];
306         characterBuffer[i] = ch > 0xff ? '?' : ch;
307     }
308 
309     return result;
310 }
311 
312 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
putUTF8Triple(char * & buffer,UChar ch)313 static inline void putUTF8Triple(char*& buffer, UChar ch)
314 {
315     ASSERT(ch >= 0x0800);
316     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
317     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
318     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
319 }
320 
utf8(bool strict) const321 CString UString::utf8(bool strict) const
322 {
323     unsigned length = this->length();
324     const UChar* characters = this->characters();
325 
326     // Allocate a buffer big enough to hold all the characters
327     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
328     // Optimization ideas, if we find this function is hot:
329     //  * We could speculatively create a CStringBuffer to contain 'length'
330     //    characters, and resize if necessary (i.e. if the buffer contains
331     //    non-ascii characters). (Alternatively, scan the buffer first for
332     //    ascii characters, so we know this will be sufficient).
333     //  * We could allocate a CStringBuffer with an appropriate size to
334     //    have a good chance of being able to write the string into the
335     //    buffer without reallocing (say, 1.5 x length).
336     if (length > numeric_limits<unsigned>::max() / 3)
337         return CString();
338     Vector<char, 1024> bufferVector(length * 3);
339 
340     char* buffer = bufferVector.data();
341     ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
342     ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
343 
344     // Only produced from strict conversion.
345     if (result == sourceIllegal)
346         return CString();
347 
348     // Check for an unconverted high surrogate.
349     if (result == sourceExhausted) {
350         if (strict)
351             return CString();
352         // This should be one unpaired high surrogate. Treat it the same
353         // was as an unpaired high surrogate would have been handled in
354         // the middle of a string with non-strict conversion - which is
355         // to say, simply encode it to UTF-8.
356         ASSERT((characters + 1) == (this->characters() + length));
357         ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
358         // There should be room left, since one UChar hasn't been converted.
359         ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
360         putUTF8Triple(buffer, *characters);
361     }
362 
363     return CString(bufferVector.data(), buffer - bufferVector.data());
364 }
365 
366 } // namespace JSC
367