1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "unicode/utf.h"
29 #include "unicode/utf16.h"
30 #include "uelement.h"
31 #include "ustr_imp.h"
32 #include "umutex.h"
33 #include "uassert.h"
34
35 #if 0
36
37 #include <iostream>
38 using namespace std;
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((icu::UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
~Replaceable()102 Replaceable::~Replaceable() {}
Replaceable()103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
addRef()120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122
123 int32_t
removeRef()124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126
127 int32_t
refCount() const128 UnicodeString::refCount() const
129 {
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138 void
releaseArray()139 UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142 }
143 }
144
145
146
147 //========================================
148 // Constructors
149 //========================================
UnicodeString()150 UnicodeString::UnicodeString()
151 : fShortLength(0),
152 fFlags(kShortString)
153 {}
154
UnicodeString(int32_t capacity,UChar32 c,int32_t count)155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
156 : fShortLength(0),
157 fFlags(0)
158 {
159 if(count <= 0 || (uint32_t)c > 0x10ffff) {
160 // just allocate and do not do anything else
161 allocate(capacity);
162 } else {
163 // count > 0, allocate and fill the new string with count c's
164 int32_t unitCount = U16_LENGTH(c), length = count * unitCount;
165 if(capacity < length) {
166 capacity = length;
167 }
168 if(allocate(capacity)) {
169 UChar *array = getArrayStart();
170 int32_t i = 0;
171
172 // fill the new string with c
173 if(unitCount == 1) {
174 // fill with length UChars
175 while(i < length) {
176 array[i++] = (UChar)c;
177 }
178 } else {
179 // get the code units for c
180 UChar units[U16_MAX_LENGTH];
181 U16_APPEND_UNSAFE(units, i, c);
182
183 // now it must be i==unitCount
184 i = 0;
185
186 // for Unicode, unitCount can only be 1, 2, 3, or 4
187 // 1 is handled above
188 while(i < length) {
189 int32_t unitIdx = 0;
190 while(unitIdx < unitCount) {
191 array[i++]=units[unitIdx++];
192 }
193 }
194 }
195 }
196 setLength(length);
197 }
198 }
199
UnicodeString(UChar ch)200 UnicodeString::UnicodeString(UChar ch)
201 : fShortLength(1),
202 fFlags(kShortString)
203 {
204 fUnion.fStackBuffer[0] = ch;
205 }
206
UnicodeString(UChar32 ch)207 UnicodeString::UnicodeString(UChar32 ch)
208 : fShortLength(0),
209 fFlags(kShortString)
210 {
211 int32_t i = 0;
212 UBool isError = FALSE;
213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
214 // We test isError so that the compiler does not complain that we don't.
215 // If isError then i==0 which is what we want anyway.
216 if(!isError) {
217 fShortLength = (int8_t)i;
218 }
219 }
220
UnicodeString(const UChar * text)221 UnicodeString::UnicodeString(const UChar *text)
222 : fShortLength(0),
223 fFlags(kShortString)
224 {
225 doReplace(0, 0, text, 0, -1);
226 }
227
UnicodeString(const UChar * text,int32_t textLength)228 UnicodeString::UnicodeString(const UChar *text,
229 int32_t textLength)
230 : fShortLength(0),
231 fFlags(kShortString)
232 {
233 doReplace(0, 0, text, 0, textLength);
234 }
235
UnicodeString(UBool isTerminated,const UChar * text,int32_t textLength)236 UnicodeString::UnicodeString(UBool isTerminated,
237 const UChar *text,
238 int32_t textLength)
239 : fShortLength(0),
240 fFlags(kReadonlyAlias)
241 {
242 if(text == NULL) {
243 // treat as an empty string, do not alias
244 setToEmpty();
245 } else if(textLength < -1 ||
246 (textLength == -1 && !isTerminated) ||
247 (textLength >= 0 && isTerminated && text[textLength] != 0)
248 ) {
249 setToBogus();
250 } else {
251 if(textLength == -1) {
252 // text is terminated, or else it would have failed the above test
253 textLength = u_strlen(text);
254 }
255 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
256 }
257 }
258
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)259 UnicodeString::UnicodeString(UChar *buff,
260 int32_t buffLength,
261 int32_t buffCapacity)
262 : fShortLength(0),
263 fFlags(kWritableAlias)
264 {
265 if(buff == NULL) {
266 // treat as an empty string, do not alias
267 setToEmpty();
268 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
269 setToBogus();
270 } else {
271 if(buffLength == -1) {
272 // fLength = u_strlen(buff); but do not look beyond buffCapacity
273 const UChar *p = buff, *limit = buff + buffCapacity;
274 while(p != limit && *p != 0) {
275 ++p;
276 }
277 buffLength = (int32_t)(p - buff);
278 }
279 setArray(buff, buffLength, buffCapacity);
280 }
281 }
282
UnicodeString(const char * src,int32_t length,EInvariant)283 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
284 : fShortLength(0),
285 fFlags(kShortString)
286 {
287 if(src==NULL) {
288 // treat as an empty string
289 } else {
290 if(length<0) {
291 length=(int32_t)uprv_strlen(src);
292 }
293 if(cloneArrayIfNeeded(length, length, FALSE)) {
294 u_charsToUChars(src, getArrayStart(), length);
295 setLength(length);
296 } else {
297 setToBogus();
298 }
299 }
300 }
301
302 #if U_CHARSET_IS_UTF8
303
UnicodeString(const char * codepageData)304 UnicodeString::UnicodeString(const char *codepageData)
305 : fShortLength(0),
306 fFlags(kShortString) {
307 if(codepageData != 0) {
308 setToUTF8(codepageData);
309 }
310 }
311
UnicodeString(const char * codepageData,int32_t dataLength)312 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
313 : fShortLength(0),
314 fFlags(kShortString) {
315 // if there's nothing to convert, do nothing
316 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
317 return;
318 }
319 if(dataLength == -1) {
320 dataLength = (int32_t)uprv_strlen(codepageData);
321 }
322 setToUTF8(StringPiece(codepageData, dataLength));
323 }
324
325 // else see unistr_cnv.cpp
326 #endif
327
UnicodeString(const UnicodeString & that)328 UnicodeString::UnicodeString(const UnicodeString& that)
329 : Replaceable(),
330 fShortLength(0),
331 fFlags(kShortString)
332 {
333 copyFrom(that);
334 }
335
UnicodeString(const UnicodeString & that,int32_t srcStart)336 UnicodeString::UnicodeString(const UnicodeString& that,
337 int32_t srcStart)
338 : Replaceable(),
339 fShortLength(0),
340 fFlags(kShortString)
341 {
342 setTo(that, srcStart);
343 }
344
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)345 UnicodeString::UnicodeString(const UnicodeString& that,
346 int32_t srcStart,
347 int32_t srcLength)
348 : Replaceable(),
349 fShortLength(0),
350 fFlags(kShortString)
351 {
352 setTo(that, srcStart, srcLength);
353 }
354
355 // Replaceable base class clone() default implementation, does not clone
356 Replaceable *
clone() const357 Replaceable::clone() const {
358 return NULL;
359 }
360
361 // UnicodeString overrides clone() with a real implementation
362 Replaceable *
clone() const363 UnicodeString::clone() const {
364 return new UnicodeString(*this);
365 }
366
367 //========================================
368 // array allocation
369 //========================================
370
371 UBool
allocate(int32_t capacity)372 UnicodeString::allocate(int32_t capacity) {
373 if(capacity <= US_STACKBUF_SIZE) {
374 fFlags = kShortString;
375 } else {
376 // count bytes for the refCounter and the string capacity, and
377 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
378 // to be safely aligned for the refCount
379 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
380 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
381 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
382 if(array != 0) {
383 // set initial refCount and point behind the refCount
384 *array++ = 1;
385
386 // have fArray point to the first UChar
387 fUnion.fFields.fArray = (UChar *)array;
388 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
389 fFlags = kLongString;
390 } else {
391 fShortLength = 0;
392 fUnion.fFields.fArray = 0;
393 fUnion.fFields.fCapacity = 0;
394 fFlags = kIsBogus;
395 return FALSE;
396 }
397 }
398 return TRUE;
399 }
400
401 //========================================
402 // Destructor
403 //========================================
~UnicodeString()404 UnicodeString::~UnicodeString()
405 {
406 releaseArray();
407 }
408
409 //========================================
410 // Factory methods
411 //========================================
412
fromUTF8(const StringPiece & utf8)413 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
414 UnicodeString result;
415 result.setToUTF8(utf8);
416 return result;
417 }
418
fromUTF32(const UChar32 * utf32,int32_t length)419 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
420 UnicodeString result;
421 int32_t capacity;
422 // Most UTF-32 strings will be BMP-only and result in a same-length
423 // UTF-16 string. We overestimate the capacity just slightly,
424 // just in case there are a few supplementary characters.
425 if(length <= US_STACKBUF_SIZE) {
426 capacity = US_STACKBUF_SIZE;
427 } else {
428 capacity = length + (length >> 4) + 4;
429 }
430 do {
431 UChar *utf16 = result.getBuffer(capacity);
432 int32_t length16;
433 UErrorCode errorCode = U_ZERO_ERROR;
434 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
435 utf32, length,
436 0xfffd, // Substitution character.
437 NULL, // Don't care about number of substitutions.
438 &errorCode);
439 result.releaseBuffer(length16);
440 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
441 capacity = length16 + 1; // +1 for the terminating NUL.
442 continue;
443 } else if(U_FAILURE(errorCode)) {
444 result.setToBogus();
445 }
446 break;
447 } while(TRUE);
448 return result;
449 }
450
451 //========================================
452 // Assignment
453 //========================================
454
455 UnicodeString &
operator =(const UnicodeString & src)456 UnicodeString::operator=(const UnicodeString &src) {
457 return copyFrom(src);
458 }
459
460 UnicodeString &
fastCopyFrom(const UnicodeString & src)461 UnicodeString::fastCopyFrom(const UnicodeString &src) {
462 return copyFrom(src, TRUE);
463 }
464
465 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)466 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
467 // if assigning to ourselves, do nothing
468 if(this == 0 || this == &src) {
469 return *this;
470 }
471
472 // is the right side bogus?
473 if(&src == 0 || src.isBogus()) {
474 setToBogus();
475 return *this;
476 }
477
478 // delete the current contents
479 releaseArray();
480
481 if(src.isEmpty()) {
482 // empty string - use the stack buffer
483 setToEmpty();
484 return *this;
485 }
486
487 // we always copy the length
488 int32_t srcLength = src.length();
489 setLength(srcLength);
490
491 // fLength>0 and not an "open" src.getBuffer(minCapacity)
492 switch(src.fFlags) {
493 case kShortString:
494 // short string using the stack buffer, do the same
495 fFlags = kShortString;
496 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
497 break;
498 case kLongString:
499 // src uses a refCounted string buffer, use that buffer with refCount
500 // src is const, use a cast - we don't really change it
501 ((UnicodeString &)src).addRef();
502 // copy all fields, share the reference-counted buffer
503 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
504 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
505 fFlags = src.fFlags;
506 break;
507 case kReadonlyAlias:
508 if(fastCopy) {
509 // src is a readonly alias, do the same
510 // -> maintain the readonly alias as such
511 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
512 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
513 fFlags = src.fFlags;
514 break;
515 }
516 // else if(!fastCopy) fall through to case kWritableAlias
517 // -> allocate a new buffer and copy the contents
518 case kWritableAlias:
519 // src is a writable alias; we make a copy of that instead
520 if(allocate(srcLength)) {
521 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
522 break;
523 }
524 // if there is not enough memory, then fall through to setting to bogus
525 default:
526 // if src is bogus, set ourselves to bogus
527 // do not call setToBogus() here because fArray and fFlags are not consistent here
528 fShortLength = 0;
529 fUnion.fFields.fArray = 0;
530 fUnion.fFields.fCapacity = 0;
531 fFlags = kIsBogus;
532 break;
533 }
534
535 return *this;
536 }
537
538 //========================================
539 // Miscellaneous operations
540 //========================================
541
unescape() const542 UnicodeString UnicodeString::unescape() const {
543 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
544 const UChar *array = getBuffer();
545 int32_t len = length();
546 int32_t prev = 0;
547 for (int32_t i=0;;) {
548 if (i == len) {
549 result.append(array, prev, len - prev);
550 break;
551 }
552 if (array[i++] == 0x5C /*'\\'*/) {
553 result.append(array, prev, (i - 1) - prev);
554 UChar32 c = unescapeAt(i); // advances i
555 if (c < 0) {
556 result.remove(); // return empty string
557 break; // invalid escape sequence
558 }
559 result.append(c);
560 prev = i;
561 }
562 }
563 return result;
564 }
565
unescapeAt(int32_t & offset) const566 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
567 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
568 }
569
570 //========================================
571 // Read-only implementation
572 //========================================
573 UBool
doEquals(const UnicodeString & text,int32_t len) const574 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
575 // Requires: this & text not bogus and have same lengths.
576 // Byte-wise comparison works for equality regardless of endianness.
577 return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
578 }
579
580 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const581 UnicodeString::doCompare( int32_t start,
582 int32_t length,
583 const UChar *srcChars,
584 int32_t srcStart,
585 int32_t srcLength) const
586 {
587 // compare illegal string values
588 if(isBogus()) {
589 return -1;
590 }
591
592 // pin indices to legal values
593 pinIndices(start, length);
594
595 if(srcChars == NULL) {
596 // treat const UChar *srcChars==NULL as an empty string
597 return length == 0 ? 0 : 1;
598 }
599
600 // get the correct pointer
601 const UChar *chars = getArrayStart();
602
603 chars += start;
604 srcChars += srcStart;
605
606 int32_t minLength;
607 int8_t lengthResult;
608
609 // get the srcLength if necessary
610 if(srcLength < 0) {
611 srcLength = u_strlen(srcChars + srcStart);
612 }
613
614 // are we comparing different lengths?
615 if(length != srcLength) {
616 if(length < srcLength) {
617 minLength = length;
618 lengthResult = -1;
619 } else {
620 minLength = srcLength;
621 lengthResult = 1;
622 }
623 } else {
624 minLength = length;
625 lengthResult = 0;
626 }
627
628 /*
629 * note that uprv_memcmp() returns an int but we return an int8_t;
630 * we need to take care not to truncate the result -
631 * one way to do this is to right-shift the value to
632 * move the sign bit into the lower 8 bits and making sure that this
633 * does not become 0 itself
634 */
635
636 if(minLength > 0 && chars != srcChars) {
637 int32_t result;
638
639 # if U_IS_BIG_ENDIAN
640 // big-endian: byte comparison works
641 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
642 if(result != 0) {
643 return (int8_t)(result >> 15 | 1);
644 }
645 # else
646 // little-endian: compare UChar units
647 do {
648 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
649 if(result != 0) {
650 return (int8_t)(result >> 15 | 1);
651 }
652 } while(--minLength > 0);
653 # endif
654 }
655 return lengthResult;
656 }
657
658 /* String compare in code point order - doCompare() compares in code unit order. */
659 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const660 UnicodeString::doCompareCodePointOrder(int32_t start,
661 int32_t length,
662 const UChar *srcChars,
663 int32_t srcStart,
664 int32_t srcLength) const
665 {
666 // compare illegal string values
667 // treat const UChar *srcChars==NULL as an empty string
668 if(isBogus()) {
669 return -1;
670 }
671
672 // pin indices to legal values
673 pinIndices(start, length);
674
675 if(srcChars == NULL) {
676 srcStart = srcLength = 0;
677 }
678
679 int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
680 /* translate the 32-bit result into an 8-bit one */
681 if(diff!=0) {
682 return (int8_t)(diff >> 15 | 1);
683 } else {
684 return 0;
685 }
686 }
687
688 int32_t
getLength() const689 UnicodeString::getLength() const {
690 return length();
691 }
692
693 UChar
getCharAt(int32_t offset) const694 UnicodeString::getCharAt(int32_t offset) const {
695 return charAt(offset);
696 }
697
698 UChar32
getChar32At(int32_t offset) const699 UnicodeString::getChar32At(int32_t offset) const {
700 return char32At(offset);
701 }
702
703 UChar32
char32At(int32_t offset) const704 UnicodeString::char32At(int32_t offset) const
705 {
706 int32_t len = length();
707 if((uint32_t)offset < (uint32_t)len) {
708 const UChar *array = getArrayStart();
709 UChar32 c;
710 U16_GET(array, 0, offset, len, c);
711 return c;
712 } else {
713 return kInvalidUChar;
714 }
715 }
716
717 int32_t
getChar32Start(int32_t offset) const718 UnicodeString::getChar32Start(int32_t offset) const {
719 if((uint32_t)offset < (uint32_t)length()) {
720 const UChar *array = getArrayStart();
721 U16_SET_CP_START(array, 0, offset);
722 return offset;
723 } else {
724 return 0;
725 }
726 }
727
728 int32_t
getChar32Limit(int32_t offset) const729 UnicodeString::getChar32Limit(int32_t offset) const {
730 int32_t len = length();
731 if((uint32_t)offset < (uint32_t)len) {
732 const UChar *array = getArrayStart();
733 U16_SET_CP_LIMIT(array, 0, offset, len);
734 return offset;
735 } else {
736 return len;
737 }
738 }
739
740 int32_t
countChar32(int32_t start,int32_t length) const741 UnicodeString::countChar32(int32_t start, int32_t length) const {
742 pinIndices(start, length);
743 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
744 return u_countChar32(getArrayStart()+start, length);
745 }
746
747 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const748 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
749 pinIndices(start, length);
750 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
751 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
752 }
753
754 int32_t
moveIndex32(int32_t index,int32_t delta) const755 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
756 // pin index
757 int32_t len = length();
758 if(index<0) {
759 index=0;
760 } else if(index>len) {
761 index=len;
762 }
763
764 const UChar *array = getArrayStart();
765 if(delta>0) {
766 U16_FWD_N(array, index, len, delta);
767 } else {
768 U16_BACK_N(array, 0, index, -delta);
769 }
770
771 return index;
772 }
773
774 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const775 UnicodeString::doExtract(int32_t start,
776 int32_t length,
777 UChar *dst,
778 int32_t dstStart) const
779 {
780 // pin indices to legal values
781 pinIndices(start, length);
782
783 // do not copy anything if we alias dst itself
784 const UChar *array = getArrayStart();
785 if(array + start != dst + dstStart) {
786 us_arrayCopy(array, start, dst, dstStart, length);
787 }
788 }
789
790 int32_t
extract(UChar * dest,int32_t destCapacity,UErrorCode & errorCode) const791 UnicodeString::extract(UChar *dest, int32_t destCapacity,
792 UErrorCode &errorCode) const {
793 int32_t len = length();
794 if(U_SUCCESS(errorCode)) {
795 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
796 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
797 } else {
798 const UChar *array = getArrayStart();
799 if(len>0 && len<=destCapacity && array!=dest) {
800 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
801 }
802 return u_terminateUChars(dest, destCapacity, len, &errorCode);
803 }
804 }
805
806 return len;
807 }
808
809 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const810 UnicodeString::extract(int32_t start,
811 int32_t length,
812 char *target,
813 int32_t targetCapacity,
814 enum EInvariant) const
815 {
816 // if the arguments are illegal, then do nothing
817 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
818 return 0;
819 }
820
821 // pin the indices to legal values
822 pinIndices(start, length);
823
824 if(length <= targetCapacity) {
825 u_UCharsToChars(getArrayStart() + start, target, length);
826 }
827 UErrorCode status = U_ZERO_ERROR;
828 return u_terminateChars(target, targetCapacity, length, &status);
829 }
830
831 UnicodeString
tempSubString(int32_t start,int32_t len) const832 UnicodeString::tempSubString(int32_t start, int32_t len) const {
833 pinIndices(start, len);
834 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
835 if(array==NULL) {
836 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
837 len=-2; // bogus result string
838 }
839 return UnicodeString(FALSE, array + start, len);
840 }
841
842 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const843 UnicodeString::toUTF8(int32_t start, int32_t len,
844 char *target, int32_t capacity) const {
845 pinIndices(start, len);
846 int32_t length8;
847 UErrorCode errorCode = U_ZERO_ERROR;
848 u_strToUTF8WithSub(target, capacity, &length8,
849 getBuffer() + start, len,
850 0xFFFD, // Standard substitution character.
851 NULL, // Don't care about number of substitutions.
852 &errorCode);
853 return length8;
854 }
855
856 #if U_CHARSET_IS_UTF8
857
858 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const859 UnicodeString::extract(int32_t start, int32_t len,
860 char *target, uint32_t dstSize) const {
861 // if the arguments are illegal, then do nothing
862 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
863 return 0;
864 }
865 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
866 }
867
868 // else see unistr_cnv.cpp
869 #endif
870
871 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const872 UnicodeString::extractBetween(int32_t start,
873 int32_t limit,
874 UnicodeString& target) const {
875 pinIndex(start);
876 pinIndex(limit);
877 doExtract(start, limit - start, target);
878 }
879
880 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
881 // as many bytes as the source has UChars.
882 // The "worst cases" are writing systems like Indic, Thai and CJK with
883 // 3:1 bytes:UChars.
884 void
toUTF8(ByteSink & sink) const885 UnicodeString::toUTF8(ByteSink &sink) const {
886 int32_t length16 = length();
887 if(length16 != 0) {
888 char stackBuffer[1024];
889 int32_t capacity = (int32_t)sizeof(stackBuffer);
890 UBool utf8IsOwned = FALSE;
891 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
892 3*length16,
893 stackBuffer, capacity,
894 &capacity);
895 int32_t length8 = 0;
896 UErrorCode errorCode = U_ZERO_ERROR;
897 u_strToUTF8WithSub(utf8, capacity, &length8,
898 getBuffer(), length16,
899 0xFFFD, // Standard substitution character.
900 NULL, // Don't care about number of substitutions.
901 &errorCode);
902 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
903 utf8 = (char *)uprv_malloc(length8);
904 if(utf8 != NULL) {
905 utf8IsOwned = TRUE;
906 errorCode = U_ZERO_ERROR;
907 u_strToUTF8WithSub(utf8, length8, &length8,
908 getBuffer(), length16,
909 0xFFFD, // Standard substitution character.
910 NULL, // Don't care about number of substitutions.
911 &errorCode);
912 } else {
913 errorCode = U_MEMORY_ALLOCATION_ERROR;
914 }
915 }
916 if(U_SUCCESS(errorCode)) {
917 sink.Append(utf8, length8);
918 sink.Flush();
919 }
920 if(utf8IsOwned) {
921 uprv_free(utf8);
922 }
923 }
924 }
925
926 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const927 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
928 int32_t length32=0;
929 if(U_SUCCESS(errorCode)) {
930 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
931 u_strToUTF32WithSub(utf32, capacity, &length32,
932 getBuffer(), length(),
933 0xfffd, // Substitution character.
934 NULL, // Don't care about number of substitutions.
935 &errorCode);
936 }
937 return length32;
938 }
939
940 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const941 UnicodeString::indexOf(const UChar *srcChars,
942 int32_t srcStart,
943 int32_t srcLength,
944 int32_t start,
945 int32_t length) const
946 {
947 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
948 return -1;
949 }
950
951 // UnicodeString does not find empty substrings
952 if(srcLength < 0 && srcChars[srcStart] == 0) {
953 return -1;
954 }
955
956 // get the indices within bounds
957 pinIndices(start, length);
958
959 // find the first occurrence of the substring
960 const UChar *array = getArrayStart();
961 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
962 if(match == NULL) {
963 return -1;
964 } else {
965 return (int32_t)(match - array);
966 }
967 }
968
969 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const970 UnicodeString::doIndexOf(UChar c,
971 int32_t start,
972 int32_t length) const
973 {
974 // pin indices
975 pinIndices(start, length);
976
977 // find the first occurrence of c
978 const UChar *array = getArrayStart();
979 const UChar *match = u_memchr(array + start, c, length);
980 if(match == NULL) {
981 return -1;
982 } else {
983 return (int32_t)(match - array);
984 }
985 }
986
987 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const988 UnicodeString::doIndexOf(UChar32 c,
989 int32_t start,
990 int32_t length) const {
991 // pin indices
992 pinIndices(start, length);
993
994 // find the first occurrence of c
995 const UChar *array = getArrayStart();
996 const UChar *match = u_memchr32(array + start, c, length);
997 if(match == NULL) {
998 return -1;
999 } else {
1000 return (int32_t)(match - array);
1001 }
1002 }
1003
1004 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1005 UnicodeString::lastIndexOf(const UChar *srcChars,
1006 int32_t srcStart,
1007 int32_t srcLength,
1008 int32_t start,
1009 int32_t length) const
1010 {
1011 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1012 return -1;
1013 }
1014
1015 // UnicodeString does not find empty substrings
1016 if(srcLength < 0 && srcChars[srcStart] == 0) {
1017 return -1;
1018 }
1019
1020 // get the indices within bounds
1021 pinIndices(start, length);
1022
1023 // find the last occurrence of the substring
1024 const UChar *array = getArrayStart();
1025 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1026 if(match == NULL) {
1027 return -1;
1028 } else {
1029 return (int32_t)(match - array);
1030 }
1031 }
1032
1033 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const1034 UnicodeString::doLastIndexOf(UChar c,
1035 int32_t start,
1036 int32_t length) const
1037 {
1038 if(isBogus()) {
1039 return -1;
1040 }
1041
1042 // pin indices
1043 pinIndices(start, length);
1044
1045 // find the last occurrence of c
1046 const UChar *array = getArrayStart();
1047 const UChar *match = u_memrchr(array + start, c, length);
1048 if(match == NULL) {
1049 return -1;
1050 } else {
1051 return (int32_t)(match - array);
1052 }
1053 }
1054
1055 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1056 UnicodeString::doLastIndexOf(UChar32 c,
1057 int32_t start,
1058 int32_t length) const {
1059 // pin indices
1060 pinIndices(start, length);
1061
1062 // find the last occurrence of c
1063 const UChar *array = getArrayStart();
1064 const UChar *match = u_memrchr32(array + start, c, length);
1065 if(match == NULL) {
1066 return -1;
1067 } else {
1068 return (int32_t)(match - array);
1069 }
1070 }
1071
1072 //========================================
1073 // Write implementation
1074 //========================================
1075
1076 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1077 UnicodeString::findAndReplace(int32_t start,
1078 int32_t length,
1079 const UnicodeString& oldText,
1080 int32_t oldStart,
1081 int32_t oldLength,
1082 const UnicodeString& newText,
1083 int32_t newStart,
1084 int32_t newLength)
1085 {
1086 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1087 return *this;
1088 }
1089
1090 pinIndices(start, length);
1091 oldText.pinIndices(oldStart, oldLength);
1092 newText.pinIndices(newStart, newLength);
1093
1094 if(oldLength == 0) {
1095 return *this;
1096 }
1097
1098 while(length > 0 && length >= oldLength) {
1099 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1100 if(pos < 0) {
1101 // no more oldText's here: done
1102 break;
1103 } else {
1104 // we found oldText, replace it by newText and go beyond it
1105 replace(pos, oldLength, newText, newStart, newLength);
1106 length -= pos + oldLength - start;
1107 start = pos + newLength;
1108 }
1109 }
1110
1111 return *this;
1112 }
1113
1114
1115 void
setToBogus()1116 UnicodeString::setToBogus()
1117 {
1118 releaseArray();
1119
1120 fShortLength = 0;
1121 fUnion.fFields.fArray = 0;
1122 fUnion.fFields.fCapacity = 0;
1123 fFlags = kIsBogus;
1124 }
1125
1126 // turn a bogus string into an empty one
1127 void
unBogus()1128 UnicodeString::unBogus() {
1129 if(fFlags & kIsBogus) {
1130 setToEmpty();
1131 }
1132 }
1133
1134 // setTo() analogous to the readonly-aliasing constructor with the same signature
1135 UnicodeString &
setTo(UBool isTerminated,const UChar * text,int32_t textLength)1136 UnicodeString::setTo(UBool isTerminated,
1137 const UChar *text,
1138 int32_t textLength)
1139 {
1140 if(fFlags & kOpenGetBuffer) {
1141 // do not modify a string that has an "open" getBuffer(minCapacity)
1142 return *this;
1143 }
1144
1145 if(text == NULL) {
1146 // treat as an empty string, do not alias
1147 releaseArray();
1148 setToEmpty();
1149 return *this;
1150 }
1151
1152 if( textLength < -1 ||
1153 (textLength == -1 && !isTerminated) ||
1154 (textLength >= 0 && isTerminated && text[textLength] != 0)
1155 ) {
1156 setToBogus();
1157 return *this;
1158 }
1159
1160 releaseArray();
1161
1162 if(textLength == -1) {
1163 // text is terminated, or else it would have failed the above test
1164 textLength = u_strlen(text);
1165 }
1166 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1167
1168 fFlags = kReadonlyAlias;
1169 return *this;
1170 }
1171
1172 // setTo() analogous to the writable-aliasing constructor with the same signature
1173 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1174 UnicodeString::setTo(UChar *buffer,
1175 int32_t buffLength,
1176 int32_t buffCapacity) {
1177 if(fFlags & kOpenGetBuffer) {
1178 // do not modify a string that has an "open" getBuffer(minCapacity)
1179 return *this;
1180 }
1181
1182 if(buffer == NULL) {
1183 // treat as an empty string, do not alias
1184 releaseArray();
1185 setToEmpty();
1186 return *this;
1187 }
1188
1189 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1190 setToBogus();
1191 return *this;
1192 } else if(buffLength == -1) {
1193 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1194 const UChar *p = buffer, *limit = buffer + buffCapacity;
1195 while(p != limit && *p != 0) {
1196 ++p;
1197 }
1198 buffLength = (int32_t)(p - buffer);
1199 }
1200
1201 releaseArray();
1202
1203 setArray(buffer, buffLength, buffCapacity);
1204 fFlags = kWritableAlias;
1205 return *this;
1206 }
1207
setToUTF8(const StringPiece & utf8)1208 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1209 unBogus();
1210 int32_t length = utf8.length();
1211 int32_t capacity;
1212 // The UTF-16 string will be at most as long as the UTF-8 string.
1213 if(length <= US_STACKBUF_SIZE) {
1214 capacity = US_STACKBUF_SIZE;
1215 } else {
1216 capacity = length + 1; // +1 for the terminating NUL.
1217 }
1218 UChar *utf16 = getBuffer(capacity);
1219 int32_t length16;
1220 UErrorCode errorCode = U_ZERO_ERROR;
1221 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1222 utf8.data(), length,
1223 0xfffd, // Substitution character.
1224 NULL, // Don't care about number of substitutions.
1225 &errorCode);
1226 releaseBuffer(length16);
1227 if(U_FAILURE(errorCode)) {
1228 setToBogus();
1229 }
1230 return *this;
1231 }
1232
1233 UnicodeString&
setCharAt(int32_t offset,UChar c)1234 UnicodeString::setCharAt(int32_t offset,
1235 UChar c)
1236 {
1237 int32_t len = length();
1238 if(cloneArrayIfNeeded() && len > 0) {
1239 if(offset < 0) {
1240 offset = 0;
1241 } else if(offset >= len) {
1242 offset = len - 1;
1243 }
1244
1245 getArrayStart()[offset] = c;
1246 }
1247 return *this;
1248 }
1249
1250 UnicodeString&
replace(int32_t start,int32_t _length,UChar32 srcChar)1251 UnicodeString::replace(int32_t start,
1252 int32_t _length,
1253 UChar32 srcChar) {
1254 UChar buffer[U16_MAX_LENGTH];
1255 int32_t count = 0;
1256 UBool isError = FALSE;
1257 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1258 // We test isError so that the compiler does not complain that we don't.
1259 // If isError (srcChar is not a valid code point) then count==0 which means
1260 // we remove the source segment rather than replacing it with srcChar.
1261 return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1262 }
1263
1264 UnicodeString&
append(UChar32 srcChar)1265 UnicodeString::append(UChar32 srcChar) {
1266 UChar buffer[U16_MAX_LENGTH];
1267 int32_t _length = 0;
1268 UBool isError = FALSE;
1269 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1270 // We test isError so that the compiler does not complain that we don't.
1271 // If isError then _length==0 which turns the doReplace() into a no-op anyway.
1272 return isError ? *this : doReplace(length(), 0, buffer, 0, _length);
1273 }
1274
1275 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1276 UnicodeString::doReplace( int32_t start,
1277 int32_t length,
1278 const UnicodeString& src,
1279 int32_t srcStart,
1280 int32_t srcLength)
1281 {
1282 if(!src.isBogus()) {
1283 // pin the indices to legal values
1284 src.pinIndices(srcStart, srcLength);
1285
1286 // get the characters from src
1287 // and replace the range in ourselves with them
1288 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1289 } else {
1290 // remove the range
1291 return doReplace(start, length, 0, 0, 0);
1292 }
1293 }
1294
1295 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1296 UnicodeString::doReplace(int32_t start,
1297 int32_t length,
1298 const UChar *srcChars,
1299 int32_t srcStart,
1300 int32_t srcLength)
1301 {
1302 if(!isWritable()) {
1303 return *this;
1304 }
1305
1306 int32_t oldLength = this->length();
1307
1308 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1309 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1310 if(start == 0) {
1311 // remove prefix by adjusting the array pointer
1312 pinIndex(length);
1313 fUnion.fFields.fArray += length;
1314 fUnion.fFields.fCapacity -= length;
1315 setLength(oldLength - length);
1316 return *this;
1317 } else {
1318 pinIndex(start);
1319 if(length >= (oldLength - start)) {
1320 // remove suffix by reducing the length (like truncate())
1321 setLength(start);
1322 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1323 return *this;
1324 }
1325 }
1326 }
1327
1328 if(srcChars == 0) {
1329 srcStart = srcLength = 0;
1330 } else if(srcLength < 0) {
1331 // get the srcLength if necessary
1332 srcLength = u_strlen(srcChars + srcStart);
1333 }
1334
1335 // calculate the size of the string after the replace
1336 int32_t newLength;
1337
1338 // optimize append() onto a large-enough, owned string
1339 if(start >= oldLength) {
1340 if(srcLength == 0) {
1341 return *this;
1342 }
1343 newLength = oldLength + srcLength;
1344 if(newLength <= getCapacity() && isBufferWritable()) {
1345 UChar *oldArray = getArrayStart();
1346 // Do not copy characters when
1347 // UChar *buffer=str.getAppendBuffer(...);
1348 // is followed by
1349 // str.append(buffer, length);
1350 // or
1351 // str.appendString(buffer, length)
1352 // or similar.
1353 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1354 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1355 }
1356 setLength(newLength);
1357 return *this;
1358 } else {
1359 // pin the indices to legal values
1360 start = oldLength;
1361 length = 0;
1362 }
1363 } else {
1364 // pin the indices to legal values
1365 pinIndices(start, length);
1366
1367 newLength = oldLength - length + srcLength;
1368 }
1369
1370 // the following may change fArray but will not copy the current contents;
1371 // therefore we need to keep the current fArray
1372 UChar oldStackBuffer[US_STACKBUF_SIZE];
1373 UChar *oldArray;
1374 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1375 // copy the stack buffer contents because it will be overwritten with
1376 // fUnion.fFields values
1377 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1378 oldArray = oldStackBuffer;
1379 } else {
1380 oldArray = getArrayStart();
1381 }
1382
1383 // clone our array and allocate a bigger array if needed
1384 int32_t *bufferToDelete = 0;
1385 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1386 FALSE, &bufferToDelete)
1387 ) {
1388 return *this;
1389 }
1390
1391 // now do the replace
1392
1393 UChar *newArray = getArrayStart();
1394 if(newArray != oldArray) {
1395 // if fArray changed, then we need to copy everything except what will change
1396 us_arrayCopy(oldArray, 0, newArray, 0, start);
1397 us_arrayCopy(oldArray, start + length,
1398 newArray, start + srcLength,
1399 oldLength - (start + length));
1400 } else if(length != srcLength) {
1401 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1402 us_arrayCopy(oldArray, start + length,
1403 newArray, start + srcLength,
1404 oldLength - (start + length));
1405 }
1406
1407 // now fill in the hole with the new string
1408 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1409
1410 setLength(newLength);
1411
1412 // delayed delete in case srcChars == fArray when we started, and
1413 // to keep oldArray alive for the above operations
1414 if (bufferToDelete) {
1415 uprv_free(bufferToDelete);
1416 }
1417
1418 return *this;
1419 }
1420
1421 /**
1422 * Replaceable API
1423 */
1424 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1425 UnicodeString::handleReplaceBetween(int32_t start,
1426 int32_t limit,
1427 const UnicodeString& text) {
1428 replaceBetween(start, limit, text);
1429 }
1430
1431 /**
1432 * Replaceable API
1433 */
1434 void
copy(int32_t start,int32_t limit,int32_t dest)1435 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1436 if (limit <= start) {
1437 return; // Nothing to do; avoid bogus malloc call
1438 }
1439 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1440 // Check to make sure text is not null.
1441 if (text != NULL) {
1442 extractBetween(start, limit, text, 0);
1443 insert(dest, text, 0, limit - start);
1444 uprv_free(text);
1445 }
1446 }
1447
1448 /**
1449 * Replaceable API
1450 *
1451 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1452 * so we implement this function here.
1453 */
hasMetaData() const1454 UBool Replaceable::hasMetaData() const {
1455 return TRUE;
1456 }
1457
1458 /**
1459 * Replaceable API
1460 */
hasMetaData() const1461 UBool UnicodeString::hasMetaData() const {
1462 return FALSE;
1463 }
1464
1465 UnicodeString&
doReverse(int32_t start,int32_t length)1466 UnicodeString::doReverse(int32_t start, int32_t length) {
1467 if(length <= 1 || !cloneArrayIfNeeded()) {
1468 return *this;
1469 }
1470
1471 // pin the indices to legal values
1472 pinIndices(start, length);
1473 if(length <= 1) { // pinIndices() might have shrunk the length
1474 return *this;
1475 }
1476
1477 UChar *left = getArrayStart() + start;
1478 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1479 UChar swap;
1480 UBool hasSupplementary = FALSE;
1481
1482 // Before the loop we know left<right because length>=2.
1483 do {
1484 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1485 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1486 *right-- = swap;
1487 } while(left < right);
1488 // Make sure to test the middle code unit of an odd-length string.
1489 // Redundant if the length is even.
1490 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1491
1492 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1493 if(hasSupplementary) {
1494 UChar swap2;
1495
1496 left = getArrayStart() + start;
1497 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1498 while(left < right) {
1499 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1500 *left++ = swap2;
1501 *left++ = swap;
1502 } else {
1503 ++left;
1504 }
1505 }
1506 }
1507
1508 return *this;
1509 }
1510
1511 UBool
padLeading(int32_t targetLength,UChar padChar)1512 UnicodeString::padLeading(int32_t targetLength,
1513 UChar padChar)
1514 {
1515 int32_t oldLength = length();
1516 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1517 return FALSE;
1518 } else {
1519 // move contents up by padding width
1520 UChar *array = getArrayStart();
1521 int32_t start = targetLength - oldLength;
1522 us_arrayCopy(array, 0, array, start, oldLength);
1523
1524 // fill in padding character
1525 while(--start >= 0) {
1526 array[start] = padChar;
1527 }
1528 setLength(targetLength);
1529 return TRUE;
1530 }
1531 }
1532
1533 UBool
padTrailing(int32_t targetLength,UChar padChar)1534 UnicodeString::padTrailing(int32_t targetLength,
1535 UChar padChar)
1536 {
1537 int32_t oldLength = length();
1538 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1539 return FALSE;
1540 } else {
1541 // fill in padding character
1542 UChar *array = getArrayStart();
1543 int32_t length = targetLength;
1544 while(--length >= oldLength) {
1545 array[length] = padChar;
1546 }
1547 setLength(targetLength);
1548 return TRUE;
1549 }
1550 }
1551
1552 //========================================
1553 // Hashing
1554 //========================================
1555 int32_t
doHashCode() const1556 UnicodeString::doHashCode() const
1557 {
1558 /* Delegate hash computation to uhash. This makes UnicodeString
1559 * hashing consistent with UChar* hashing. */
1560 int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1561 if (hashCode == kInvalidHashCode) {
1562 hashCode = kEmptyHashCode;
1563 }
1564 return hashCode;
1565 }
1566
1567 //========================================
1568 // External Buffer
1569 //========================================
1570
1571 UChar *
getBuffer(int32_t minCapacity)1572 UnicodeString::getBuffer(int32_t minCapacity) {
1573 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1574 fFlags|=kOpenGetBuffer;
1575 fShortLength=0;
1576 return getArrayStart();
1577 } else {
1578 return 0;
1579 }
1580 }
1581
1582 void
releaseBuffer(int32_t newLength)1583 UnicodeString::releaseBuffer(int32_t newLength) {
1584 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1585 // set the new fLength
1586 int32_t capacity=getCapacity();
1587 if(newLength==-1) {
1588 // the new length is the string length, capped by fCapacity
1589 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1590 while(p<limit && *p!=0) {
1591 ++p;
1592 }
1593 newLength=(int32_t)(p-array);
1594 } else if(newLength>capacity) {
1595 newLength=capacity;
1596 }
1597 setLength(newLength);
1598 fFlags&=~kOpenGetBuffer;
1599 }
1600 }
1601
1602 //========================================
1603 // Miscellaneous
1604 //========================================
1605 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1606 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1607 int32_t growCapacity,
1608 UBool doCopyArray,
1609 int32_t **pBufferToDelete,
1610 UBool forceClone) {
1611 // default parameters need to be static, therefore
1612 // the defaults are -1 to have convenience defaults
1613 if(newCapacity == -1) {
1614 newCapacity = getCapacity();
1615 }
1616
1617 // while a getBuffer(minCapacity) is "open",
1618 // prevent any modifications of the string by returning FALSE here
1619 // if the string is bogus, then only an assignment or similar can revive it
1620 if(!isWritable()) {
1621 return FALSE;
1622 }
1623
1624 /*
1625 * We need to make a copy of the array if
1626 * the buffer is read-only, or
1627 * the buffer is refCounted (shared), and refCount>1, or
1628 * the buffer is too small.
1629 * Return FALSE if memory could not be allocated.
1630 */
1631 if(forceClone ||
1632 fFlags & kBufferIsReadonly ||
1633 (fFlags & kRefCounted && refCount() > 1) ||
1634 newCapacity > getCapacity()
1635 ) {
1636 // check growCapacity for default value and use of the stack buffer
1637 if(growCapacity < 0) {
1638 growCapacity = newCapacity;
1639 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1640 growCapacity = US_STACKBUF_SIZE;
1641 }
1642
1643 // save old values
1644 UChar oldStackBuffer[US_STACKBUF_SIZE];
1645 UChar *oldArray;
1646 uint8_t flags = fFlags;
1647
1648 if(flags&kUsingStackBuffer) {
1649 U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1650 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1651 // copy the stack buffer contents because it will be overwritten with
1652 // fUnion.fFields values
1653 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1654 oldArray = oldStackBuffer;
1655 } else {
1656 oldArray = 0; // no need to copy from stack buffer to itself
1657 }
1658 } else {
1659 oldArray = fUnion.fFields.fArray;
1660 U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1661 }
1662
1663 // allocate a new array
1664 if(allocate(growCapacity) ||
1665 (newCapacity < growCapacity && allocate(newCapacity))
1666 ) {
1667 if(doCopyArray && oldArray != 0) {
1668 // copy the contents
1669 // do not copy more than what fits - it may be smaller than before
1670 int32_t minLength = length();
1671 newCapacity = getCapacity();
1672 if(newCapacity < minLength) {
1673 minLength = newCapacity;
1674 setLength(minLength);
1675 }
1676 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1677 } else {
1678 fShortLength = 0;
1679 }
1680
1681 // release the old array
1682 if(flags & kRefCounted) {
1683 // the array is refCounted; decrement and release if 0
1684 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1685 if(umtx_atomic_dec(pRefCount) == 0) {
1686 if(pBufferToDelete == 0) {
1687 uprv_free(pRefCount);
1688 } else {
1689 // the caller requested to delete it himself
1690 *pBufferToDelete = pRefCount;
1691 }
1692 }
1693 }
1694 } else {
1695 // not enough memory for growCapacity and not even for the smaller newCapacity
1696 // reset the old values for setToBogus() to release the array
1697 if(!(flags&kUsingStackBuffer)) {
1698 fUnion.fFields.fArray = oldArray;
1699 }
1700 fFlags = flags;
1701 setToBogus();
1702 return FALSE;
1703 }
1704 }
1705 return TRUE;
1706 }
1707
1708 // UnicodeStringAppendable ------------------------------------------------- ***
1709
~UnicodeStringAppendable()1710 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1711
1712 UBool
appendCodeUnit(UChar c)1713 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1714 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1715 }
1716
1717 UBool
appendCodePoint(UChar32 c)1718 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1719 UChar buffer[U16_MAX_LENGTH];
1720 int32_t cLength = 0;
1721 UBool isError = FALSE;
1722 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1723 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1724 }
1725
1726 UBool
appendString(const UChar * s,int32_t length)1727 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1728 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1729 }
1730
1731 UBool
reserveAppendCapacity(int32_t appendCapacity)1732 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1733 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1734 }
1735
1736 UChar *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,UChar * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1737 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1738 int32_t desiredCapacityHint,
1739 UChar *scratch, int32_t scratchCapacity,
1740 int32_t *resultCapacity) {
1741 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1742 *resultCapacity = 0;
1743 return NULL;
1744 }
1745 int32_t oldLength = str.length();
1746 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1747 *resultCapacity = str.getCapacity() - oldLength;
1748 return str.getArrayStart() + oldLength;
1749 }
1750 *resultCapacity = scratchCapacity;
1751 return scratch;
1752 }
1753
1754 U_NAMESPACE_END
1755
1756 U_NAMESPACE_USE
1757
1758 U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key)1759 uhash_hashUnicodeString(const UElement key) {
1760 const UnicodeString *str = (const UnicodeString*) key.pointer;
1761 return (str == NULL) ? 0 : str->hashCode();
1762 }
1763
1764 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1765 // does not depend on hashtable code.
1766 U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UElement key1,const UElement key2)1767 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1768 const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1769 const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1770 if (str1 == str2) {
1771 return TRUE;
1772 }
1773 if (str1 == NULL || str2 == NULL) {
1774 return FALSE;
1775 }
1776 return *str1 == *str2;
1777 }
1778
1779 #ifdef U_STATIC_IMPLEMENTATION
1780 /*
1781 This should never be called. It is defined here to make sure that the
1782 virtual vector deleting destructor is defined within unistr.cpp.
1783 The vector deleting destructor is already a part of UObject,
1784 but defining it here makes sure that it is included with this object file.
1785 This makes sure that static library dependencies are kept to a minimum.
1786 */
uprv_UnicodeStringDummy(void)1787 static void uprv_UnicodeStringDummy(void) {
1788 delete [] (new UnicodeString[2]);
1789 }
1790 #endif
1791