1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2011, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "uhash.h"
29 #include "ustr_imp.h"
30 #include "umutex.h"
31
32 #if 0
33
34 #if U_IOSTREAM_SOURCE >= 199711
35 #include <iostream>
36 using namespace std;
37 #elif U_IOSTREAM_SOURCE >= 198506
38 #include <iostream.h>
39 #endif
40
41 //DEBUGGING
42 void
43 print(const UnicodeString& s,
44 const char *name)
45 {
46 UChar c;
47 cout << name << ":|";
48 for(int i = 0; i < s.length(); ++i) {
49 c = s[i];
50 if(c>= 0x007E || c < 0x0020)
51 cout << "[0x" << hex << s[i] << "]";
52 else
53 cout << (char) s[i];
54 }
55 cout << '|' << endl;
56 }
57
58 void
59 print(const UChar *s,
60 int32_t len,
61 const char *name)
62 {
63 UChar c;
64 cout << name << ":|";
65 for(int i = 0; i < len; ++i) {
66 c = s[i];
67 if(c>= 0x007E || c < 0x0020)
68 cout << "[0x" << hex << s[i] << "]";
69 else
70 cout << (char) s[i];
71 }
72 cout << '|' << endl;
73 }
74 // END DEBUGGING
75 #endif
76
77 // Local function definitions for now
78
79 // need to copy areas that may overlap
80 static
81 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)82 us_arrayCopy(const UChar *src, int32_t srcStart,
83 UChar *dst, int32_t dstStart, int32_t count)
84 {
85 if(count>0) {
86 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
87 }
88 }
89
90 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 U_CDECL_BEGIN
92 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)93 UnicodeString_charAt(int32_t offset, void *context) {
94 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
95 }
96 U_CDECL_END
97
98 U_NAMESPACE_BEGIN
99
100 /* The Replaceable virtual destructor can't be defined in the header
101 due to how AIX works with multiple definitions of virtual functions.
102 */
~Replaceable()103 Replaceable::~Replaceable() {}
Replaceable()104 Replaceable::Replaceable() {}
105 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
106
107 UnicodeString U_EXPORT2
108 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
109 return
110 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
111 append(s1).
112 append(s2);
113 }
114
115 //========================================
116 // Reference Counting functions, put at top of file so that optimizing compilers
117 // have a chance to automatically inline.
118 //========================================
119
120 void
addRef()121 UnicodeString::addRef()
122 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
123
124 int32_t
removeRef()125 UnicodeString::removeRef()
126 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
127
128 int32_t
refCount() const129 UnicodeString::refCount() const
130 {
131 umtx_lock(NULL);
132 // Note: without the lock to force a memory barrier, we might see a very
133 // stale value on some multi-processor systems.
134 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
135 umtx_unlock(NULL);
136 return count;
137 }
138
139 void
releaseArray()140 UnicodeString::releaseArray() {
141 if((fFlags & kRefCounted) && removeRef() == 0) {
142 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
143 }
144 }
145
146
147
148 //========================================
149 // Constructors
150 //========================================
UnicodeString()151 UnicodeString::UnicodeString()
152 : fShortLength(0),
153 fFlags(kShortString)
154 {}
155
UnicodeString(int32_t capacity,UChar32 c,int32_t count)156 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
157 : fShortLength(0),
158 fFlags(0)
159 {
160 if(count <= 0 || (uint32_t)c > 0x10ffff) {
161 // just allocate and do not do anything else
162 allocate(capacity);
163 } else {
164 // count > 0, allocate and fill the new string with count c's
165 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
166 if(capacity < length) {
167 capacity = length;
168 }
169 if(allocate(capacity)) {
170 UChar *array = getArrayStart();
171 int32_t i = 0;
172
173 // fill the new string with c
174 if(unitCount == 1) {
175 // fill with length UChars
176 while(i < length) {
177 array[i++] = (UChar)c;
178 }
179 } else {
180 // get the code units for c
181 UChar units[UTF_MAX_CHAR_LENGTH];
182 UTF_APPEND_CHAR_UNSAFE(units, i, c);
183
184 // now it must be i==unitCount
185 i = 0;
186
187 // for Unicode, unitCount can only be 1, 2, 3, or 4
188 // 1 is handled above
189 while(i < length) {
190 int32_t unitIdx = 0;
191 while(unitIdx < unitCount) {
192 array[i++]=units[unitIdx++];
193 }
194 }
195 }
196 }
197 setLength(length);
198 }
199 }
200
UnicodeString(UChar ch)201 UnicodeString::UnicodeString(UChar ch)
202 : fShortLength(1),
203 fFlags(kShortString)
204 {
205 fUnion.fStackBuffer[0] = ch;
206 }
207
UnicodeString(UChar32 ch)208 UnicodeString::UnicodeString(UChar32 ch)
209 : fShortLength(0),
210 fFlags(kShortString)
211 {
212 int32_t i = 0;
213 UBool isError = FALSE;
214 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
215 fShortLength = (int8_t)i;
216 }
217
UnicodeString(const UChar * text)218 UnicodeString::UnicodeString(const UChar *text)
219 : fShortLength(0),
220 fFlags(kShortString)
221 {
222 doReplace(0, 0, text, 0, -1);
223 }
224
UnicodeString(const UChar * text,int32_t textLength)225 UnicodeString::UnicodeString(const UChar *text,
226 int32_t textLength)
227 : fShortLength(0),
228 fFlags(kShortString)
229 {
230 doReplace(0, 0, text, 0, textLength);
231 }
232
UnicodeString(UBool isTerminated,const UChar * text,int32_t textLength)233 UnicodeString::UnicodeString(UBool isTerminated,
234 const UChar *text,
235 int32_t textLength)
236 : fShortLength(0),
237 fFlags(kReadonlyAlias)
238 {
239 if(text == NULL) {
240 // treat as an empty string, do not alias
241 setToEmpty();
242 } else if(textLength < -1 ||
243 (textLength == -1 && !isTerminated) ||
244 (textLength >= 0 && isTerminated && text[textLength] != 0)
245 ) {
246 setToBogus();
247 } else {
248 if(textLength == -1) {
249 // text is terminated, or else it would have failed the above test
250 textLength = u_strlen(text);
251 }
252 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
253 }
254 }
255
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)256 UnicodeString::UnicodeString(UChar *buff,
257 int32_t buffLength,
258 int32_t buffCapacity)
259 : fShortLength(0),
260 fFlags(kWritableAlias)
261 {
262 if(buff == NULL) {
263 // treat as an empty string, do not alias
264 setToEmpty();
265 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
266 setToBogus();
267 } else {
268 if(buffLength == -1) {
269 // fLength = u_strlen(buff); but do not look beyond buffCapacity
270 const UChar *p = buff, *limit = buff + buffCapacity;
271 while(p != limit && *p != 0) {
272 ++p;
273 }
274 buffLength = (int32_t)(p - buff);
275 }
276 setArray(buff, buffLength, buffCapacity);
277 }
278 }
279
UnicodeString(const char * src,int32_t length,EInvariant)280 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
281 : fShortLength(0),
282 fFlags(kShortString)
283 {
284 if(src==NULL) {
285 // treat as an empty string
286 } else {
287 if(length<0) {
288 length=(int32_t)uprv_strlen(src);
289 }
290 if(cloneArrayIfNeeded(length, length, FALSE)) {
291 u_charsToUChars(src, getArrayStart(), length);
292 setLength(length);
293 } else {
294 setToBogus();
295 }
296 }
297 }
298
299 #if U_CHARSET_IS_UTF8
300
UnicodeString(const char * codepageData)301 UnicodeString::UnicodeString(const char *codepageData)
302 : fShortLength(0),
303 fFlags(kShortString) {
304 if(codepageData != 0) {
305 setToUTF8(codepageData);
306 }
307 }
308
UnicodeString(const char * codepageData,int32_t dataLength)309 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
310 : fShortLength(0),
311 fFlags(kShortString) {
312 // if there's nothing to convert, do nothing
313 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
314 return;
315 }
316 if(dataLength == -1) {
317 dataLength = (int32_t)uprv_strlen(codepageData);
318 }
319 setToUTF8(StringPiece(codepageData, dataLength));
320 }
321
322 // else see unistr_cnv.cpp
323 #endif
324
UnicodeString(const UnicodeString & that)325 UnicodeString::UnicodeString(const UnicodeString& that)
326 : Replaceable(),
327 fShortLength(0),
328 fFlags(kShortString)
329 {
330 copyFrom(that);
331 }
332
UnicodeString(const UnicodeString & that,int32_t srcStart)333 UnicodeString::UnicodeString(const UnicodeString& that,
334 int32_t srcStart)
335 : Replaceable(),
336 fShortLength(0),
337 fFlags(kShortString)
338 {
339 setTo(that, srcStart);
340 }
341
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)342 UnicodeString::UnicodeString(const UnicodeString& that,
343 int32_t srcStart,
344 int32_t srcLength)
345 : Replaceable(),
346 fShortLength(0),
347 fFlags(kShortString)
348 {
349 setTo(that, srcStart, srcLength);
350 }
351
352 // Replaceable base class clone() default implementation, does not clone
353 Replaceable *
clone() const354 Replaceable::clone() const {
355 return NULL;
356 }
357
358 // UnicodeString overrides clone() with a real implementation
359 Replaceable *
clone() const360 UnicodeString::clone() const {
361 return new UnicodeString(*this);
362 }
363
364 //========================================
365 // array allocation
366 //========================================
367
368 UBool
allocate(int32_t capacity)369 UnicodeString::allocate(int32_t capacity) {
370 if(capacity <= US_STACKBUF_SIZE) {
371 fFlags = kShortString;
372 } else {
373 // count bytes for the refCounter and the string capacity, and
374 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
375 // to be safely aligned for the refCount
376 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
377 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
378 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
379 if(array != 0) {
380 // set initial refCount and point behind the refCount
381 *array++ = 1;
382
383 // have fArray point to the first UChar
384 fUnion.fFields.fArray = (UChar *)array;
385 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
386 fFlags = kLongString;
387 } else {
388 fShortLength = 0;
389 fUnion.fFields.fArray = 0;
390 fUnion.fFields.fCapacity = 0;
391 fFlags = kIsBogus;
392 return FALSE;
393 }
394 }
395 return TRUE;
396 }
397
398 //========================================
399 // Destructor
400 //========================================
~UnicodeString()401 UnicodeString::~UnicodeString()
402 {
403 releaseArray();
404 }
405
406 //========================================
407 // Factory methods
408 //========================================
409
fromUTF8(const StringPiece & utf8)410 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
411 UnicodeString result;
412 result.setToUTF8(utf8);
413 return result;
414 }
415
fromUTF32(const UChar32 * utf32,int32_t length)416 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
417 UnicodeString result;
418 int32_t capacity;
419 // Most UTF-32 strings will be BMP-only and result in a same-length
420 // UTF-16 string. We overestimate the capacity just slightly,
421 // just in case there are a few supplementary characters.
422 if(length <= US_STACKBUF_SIZE) {
423 capacity = US_STACKBUF_SIZE;
424 } else {
425 capacity = length + (length >> 4) + 4;
426 }
427 do {
428 UChar *utf16 = result.getBuffer(capacity);
429 int32_t length16;
430 UErrorCode errorCode = U_ZERO_ERROR;
431 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
432 utf32, length,
433 0xfffd, // Substitution character.
434 NULL, // Don't care about number of substitutions.
435 &errorCode);
436 result.releaseBuffer(length16);
437 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
438 capacity = length16 + 1; // +1 for the terminating NUL.
439 continue;
440 } else if(U_FAILURE(errorCode)) {
441 result.setToBogus();
442 }
443 break;
444 } while(TRUE);
445 return result;
446 }
447
448 //========================================
449 // Assignment
450 //========================================
451
452 UnicodeString &
operator =(const UnicodeString & src)453 UnicodeString::operator=(const UnicodeString &src) {
454 return copyFrom(src);
455 }
456
457 UnicodeString &
fastCopyFrom(const UnicodeString & src)458 UnicodeString::fastCopyFrom(const UnicodeString &src) {
459 return copyFrom(src, TRUE);
460 }
461
462 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)463 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
464 // if assigning to ourselves, do nothing
465 if(this == 0 || this == &src) {
466 return *this;
467 }
468
469 // is the right side bogus?
470 if(&src == 0 || src.isBogus()) {
471 setToBogus();
472 return *this;
473 }
474
475 // delete the current contents
476 releaseArray();
477
478 if(src.isEmpty()) {
479 // empty string - use the stack buffer
480 setToEmpty();
481 return *this;
482 }
483
484 // we always copy the length
485 int32_t srcLength = src.length();
486 setLength(srcLength);
487
488 // fLength>0 and not an "open" src.getBuffer(minCapacity)
489 switch(src.fFlags) {
490 case kShortString:
491 // short string using the stack buffer, do the same
492 fFlags = kShortString;
493 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
494 break;
495 case kLongString:
496 // src uses a refCounted string buffer, use that buffer with refCount
497 // src is const, use a cast - we don't really change it
498 ((UnicodeString &)src).addRef();
499 // copy all fields, share the reference-counted buffer
500 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
501 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
502 fFlags = src.fFlags;
503 break;
504 case kReadonlyAlias:
505 if(fastCopy) {
506 // src is a readonly alias, do the same
507 // -> maintain the readonly alias as such
508 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
509 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
510 fFlags = src.fFlags;
511 break;
512 }
513 // else if(!fastCopy) fall through to case kWritableAlias
514 // -> allocate a new buffer and copy the contents
515 case kWritableAlias:
516 // src is a writable alias; we make a copy of that instead
517 if(allocate(srcLength)) {
518 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
519 break;
520 }
521 // if there is not enough memory, then fall through to setting to bogus
522 default:
523 // if src is bogus, set ourselves to bogus
524 // do not call setToBogus() here because fArray and fFlags are not consistent here
525 fShortLength = 0;
526 fUnion.fFields.fArray = 0;
527 fUnion.fFields.fCapacity = 0;
528 fFlags = kIsBogus;
529 break;
530 }
531
532 return *this;
533 }
534
535 //========================================
536 // Miscellaneous operations
537 //========================================
538
unescape() const539 UnicodeString UnicodeString::unescape() const {
540 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
541 const UChar *array = getBuffer();
542 int32_t len = length();
543 int32_t prev = 0;
544 for (int32_t i=0;;) {
545 if (i == len) {
546 result.append(array, prev, len - prev);
547 break;
548 }
549 if (array[i++] == 0x5C /*'\\'*/) {
550 result.append(array, prev, (i - 1) - prev);
551 UChar32 c = unescapeAt(i); // advances i
552 if (c < 0) {
553 result.remove(); // return empty string
554 break; // invalid escape sequence
555 }
556 result.append(c);
557 prev = i;
558 }
559 }
560 return result;
561 }
562
unescapeAt(int32_t & offset) const563 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
564 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
565 }
566
567 //========================================
568 // Read-only implementation
569 //========================================
570 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const571 UnicodeString::doCompare( int32_t start,
572 int32_t length,
573 const UChar *srcChars,
574 int32_t srcStart,
575 int32_t srcLength) const
576 {
577 // compare illegal string values
578 // treat const UChar *srcChars==NULL as an empty string
579 if(isBogus()) {
580 return -1;
581 }
582
583 // pin indices to legal values
584 pinIndices(start, length);
585
586 if(srcChars == NULL) {
587 srcStart = srcLength = 0;
588 }
589
590 // get the correct pointer
591 const UChar *chars = getArrayStart();
592
593 chars += start;
594 srcChars += srcStart;
595
596 int32_t minLength;
597 int8_t lengthResult;
598
599 // get the srcLength if necessary
600 if(srcLength < 0) {
601 srcLength = u_strlen(srcChars + srcStart);
602 }
603
604 // are we comparing different lengths?
605 if(length != srcLength) {
606 if(length < srcLength) {
607 minLength = length;
608 lengthResult = -1;
609 } else {
610 minLength = srcLength;
611 lengthResult = 1;
612 }
613 } else {
614 minLength = length;
615 lengthResult = 0;
616 }
617
618 /*
619 * note that uprv_memcmp() returns an int but we return an int8_t;
620 * we need to take care not to truncate the result -
621 * one way to do this is to right-shift the value to
622 * move the sign bit into the lower 8 bits and making sure that this
623 * does not become 0 itself
624 */
625
626 if(minLength > 0 && chars != srcChars) {
627 int32_t result;
628
629 # if U_IS_BIG_ENDIAN
630 // big-endian: byte comparison works
631 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
632 if(result != 0) {
633 return (int8_t)(result >> 15 | 1);
634 }
635 # else
636 // little-endian: compare UChar units
637 do {
638 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
639 if(result != 0) {
640 return (int8_t)(result >> 15 | 1);
641 }
642 } while(--minLength > 0);
643 # endif
644 }
645 return lengthResult;
646 }
647
648 /* String compare in code point order - doCompare() compares in code unit order. */
649 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const650 UnicodeString::doCompareCodePointOrder(int32_t start,
651 int32_t length,
652 const UChar *srcChars,
653 int32_t srcStart,
654 int32_t srcLength) const
655 {
656 // compare illegal string values
657 // treat const UChar *srcChars==NULL as an empty string
658 if(isBogus()) {
659 return -1;
660 }
661
662 // pin indices to legal values
663 pinIndices(start, length);
664
665 if(srcChars == NULL) {
666 srcStart = srcLength = 0;
667 }
668
669 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
670 /* translate the 32-bit result into an 8-bit one */
671 if(diff!=0) {
672 return (int8_t)(diff >> 15 | 1);
673 } else {
674 return 0;
675 }
676 }
677
678 int32_t
getLength() const679 UnicodeString::getLength() const {
680 return length();
681 }
682
683 UChar
getCharAt(int32_t offset) const684 UnicodeString::getCharAt(int32_t offset) const {
685 return charAt(offset);
686 }
687
688 UChar32
getChar32At(int32_t offset) const689 UnicodeString::getChar32At(int32_t offset) const {
690 return char32At(offset);
691 }
692
693 int32_t
countChar32(int32_t start,int32_t length) const694 UnicodeString::countChar32(int32_t start, int32_t length) const {
695 pinIndices(start, length);
696 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
697 return u_countChar32(getArrayStart()+start, length);
698 }
699
700 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const701 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
702 pinIndices(start, length);
703 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
704 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
705 }
706
707 int32_t
moveIndex32(int32_t index,int32_t delta) const708 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
709 // pin index
710 int32_t len = length();
711 if(index<0) {
712 index=0;
713 } else if(index>len) {
714 index=len;
715 }
716
717 const UChar *array = getArrayStart();
718 if(delta>0) {
719 UTF_FWD_N(array, index, len, delta);
720 } else {
721 UTF_BACK_N(array, 0, index, -delta);
722 }
723
724 return index;
725 }
726
727 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const728 UnicodeString::doExtract(int32_t start,
729 int32_t length,
730 UChar *dst,
731 int32_t dstStart) const
732 {
733 // pin indices to legal values
734 pinIndices(start, length);
735
736 // do not copy anything if we alias dst itself
737 const UChar *array = getArrayStart();
738 if(array + start != dst + dstStart) {
739 us_arrayCopy(array, start, dst, dstStart, length);
740 }
741 }
742
743 int32_t
extract(UChar * dest,int32_t destCapacity,UErrorCode & errorCode) const744 UnicodeString::extract(UChar *dest, int32_t destCapacity,
745 UErrorCode &errorCode) const {
746 int32_t len = length();
747 if(U_SUCCESS(errorCode)) {
748 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
749 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
750 } else {
751 const UChar *array = getArrayStart();
752 if(len>0 && len<=destCapacity && array!=dest) {
753 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
754 }
755 return u_terminateUChars(dest, destCapacity, len, &errorCode);
756 }
757 }
758
759 return len;
760 }
761
762 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const763 UnicodeString::extract(int32_t start,
764 int32_t length,
765 char *target,
766 int32_t targetCapacity,
767 enum EInvariant) const
768 {
769 // if the arguments are illegal, then do nothing
770 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
771 return 0;
772 }
773
774 // pin the indices to legal values
775 pinIndices(start, length);
776
777 if(length <= targetCapacity) {
778 u_UCharsToChars(getArrayStart() + start, target, length);
779 }
780 UErrorCode status = U_ZERO_ERROR;
781 return u_terminateChars(target, targetCapacity, length, &status);
782 }
783
784 UnicodeString
tempSubString(int32_t start,int32_t len) const785 UnicodeString::tempSubString(int32_t start, int32_t len) const {
786 pinIndices(start, len);
787 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
788 if(array==NULL) {
789 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
790 len=-2; // bogus result string
791 }
792 return UnicodeString(FALSE, array + start, len);
793 }
794
795 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const796 UnicodeString::toUTF8(int32_t start, int32_t len,
797 char *target, int32_t capacity) const {
798 pinIndices(start, len);
799 int32_t length8;
800 UErrorCode errorCode = U_ZERO_ERROR;
801 u_strToUTF8WithSub(target, capacity, &length8,
802 getBuffer() + start, len,
803 0xFFFD, // Standard substitution character.
804 NULL, // Don't care about number of substitutions.
805 &errorCode);
806 return length8;
807 }
808
809 #if U_CHARSET_IS_UTF8
810
811 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const812 UnicodeString::extract(int32_t start, int32_t len,
813 char *target, uint32_t dstSize) const {
814 // if the arguments are illegal, then do nothing
815 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
816 return 0;
817 }
818 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
819 }
820
821 // else see unistr_cnv.cpp
822 #endif
823
824 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const825 UnicodeString::extractBetween(int32_t start,
826 int32_t limit,
827 UnicodeString& target) const {
828 pinIndex(start);
829 pinIndex(limit);
830 doExtract(start, limit - start, target);
831 }
832
833 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
834 // as many bytes as the source has UChars.
835 // The "worst cases" are writing systems like Indic, Thai and CJK with
836 // 3:1 bytes:UChars.
837 void
toUTF8(ByteSink & sink) const838 UnicodeString::toUTF8(ByteSink &sink) const {
839 int32_t length16 = length();
840 if(length16 != 0) {
841 char stackBuffer[1024];
842 int32_t capacity = (int32_t)sizeof(stackBuffer);
843 UBool utf8IsOwned = FALSE;
844 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
845 3*length16,
846 stackBuffer, capacity,
847 &capacity);
848 int32_t length8 = 0;
849 UErrorCode errorCode = U_ZERO_ERROR;
850 u_strToUTF8WithSub(utf8, capacity, &length8,
851 getBuffer(), length16,
852 0xFFFD, // Standard substitution character.
853 NULL, // Don't care about number of substitutions.
854 &errorCode);
855 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
856 utf8 = (char *)uprv_malloc(length8);
857 if(utf8 != NULL) {
858 utf8IsOwned = TRUE;
859 errorCode = U_ZERO_ERROR;
860 u_strToUTF8WithSub(utf8, length8, &length8,
861 getBuffer(), length16,
862 0xFFFD, // Standard substitution character.
863 NULL, // Don't care about number of substitutions.
864 &errorCode);
865 } else {
866 errorCode = U_MEMORY_ALLOCATION_ERROR;
867 }
868 }
869 if(U_SUCCESS(errorCode)) {
870 sink.Append(utf8, length8);
871 sink.Flush();
872 }
873 if(utf8IsOwned) {
874 uprv_free(utf8);
875 }
876 }
877 }
878
879 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const880 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
881 int32_t length32=0;
882 if(U_SUCCESS(errorCode)) {
883 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
884 u_strToUTF32WithSub(utf32, capacity, &length32,
885 getBuffer(), length(),
886 0xfffd, // Substitution character.
887 NULL, // Don't care about number of substitutions.
888 &errorCode);
889 }
890 return length32;
891 }
892
893 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const894 UnicodeString::indexOf(const UChar *srcChars,
895 int32_t srcStart,
896 int32_t srcLength,
897 int32_t start,
898 int32_t length) const
899 {
900 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
901 return -1;
902 }
903
904 // UnicodeString does not find empty substrings
905 if(srcLength < 0 && srcChars[srcStart] == 0) {
906 return -1;
907 }
908
909 // get the indices within bounds
910 pinIndices(start, length);
911
912 // find the first occurrence of the substring
913 const UChar *array = getArrayStart();
914 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
915 if(match == NULL) {
916 return -1;
917 } else {
918 return (int32_t)(match - array);
919 }
920 }
921
922 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const923 UnicodeString::doIndexOf(UChar c,
924 int32_t start,
925 int32_t length) const
926 {
927 // pin indices
928 pinIndices(start, length);
929
930 // find the first occurrence of c
931 const UChar *array = getArrayStart();
932 const UChar *match = u_memchr(array + start, c, length);
933 if(match == NULL) {
934 return -1;
935 } else {
936 return (int32_t)(match - array);
937 }
938 }
939
940 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const941 UnicodeString::doIndexOf(UChar32 c,
942 int32_t start,
943 int32_t length) const {
944 // pin indices
945 pinIndices(start, length);
946
947 // find the first occurrence of c
948 const UChar *array = getArrayStart();
949 const UChar *match = u_memchr32(array + start, c, length);
950 if(match == NULL) {
951 return -1;
952 } else {
953 return (int32_t)(match - array);
954 }
955 }
956
957 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const958 UnicodeString::lastIndexOf(const UChar *srcChars,
959 int32_t srcStart,
960 int32_t srcLength,
961 int32_t start,
962 int32_t length) const
963 {
964 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
965 return -1;
966 }
967
968 // UnicodeString does not find empty substrings
969 if(srcLength < 0 && srcChars[srcStart] == 0) {
970 return -1;
971 }
972
973 // get the indices within bounds
974 pinIndices(start, length);
975
976 // find the last occurrence of the substring
977 const UChar *array = getArrayStart();
978 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
979 if(match == NULL) {
980 return -1;
981 } else {
982 return (int32_t)(match - array);
983 }
984 }
985
986 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const987 UnicodeString::doLastIndexOf(UChar c,
988 int32_t start,
989 int32_t length) const
990 {
991 if(isBogus()) {
992 return -1;
993 }
994
995 // pin indices
996 pinIndices(start, length);
997
998 // find the last occurrence of c
999 const UChar *array = getArrayStart();
1000 const UChar *match = u_memrchr(array + start, c, length);
1001 if(match == NULL) {
1002 return -1;
1003 } else {
1004 return (int32_t)(match - array);
1005 }
1006 }
1007
1008 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1009 UnicodeString::doLastIndexOf(UChar32 c,
1010 int32_t start,
1011 int32_t length) const {
1012 // pin indices
1013 pinIndices(start, length);
1014
1015 // find the last occurrence of c
1016 const UChar *array = getArrayStart();
1017 const UChar *match = u_memrchr32(array + start, c, length);
1018 if(match == NULL) {
1019 return -1;
1020 } else {
1021 return (int32_t)(match - array);
1022 }
1023 }
1024
1025 //========================================
1026 // Write implementation
1027 //========================================
1028
1029 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1030 UnicodeString::findAndReplace(int32_t start,
1031 int32_t length,
1032 const UnicodeString& oldText,
1033 int32_t oldStart,
1034 int32_t oldLength,
1035 const UnicodeString& newText,
1036 int32_t newStart,
1037 int32_t newLength)
1038 {
1039 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1040 return *this;
1041 }
1042
1043 pinIndices(start, length);
1044 oldText.pinIndices(oldStart, oldLength);
1045 newText.pinIndices(newStart, newLength);
1046
1047 if(oldLength == 0) {
1048 return *this;
1049 }
1050
1051 while(length > 0 && length >= oldLength) {
1052 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1053 if(pos < 0) {
1054 // no more oldText's here: done
1055 break;
1056 } else {
1057 // we found oldText, replace it by newText and go beyond it
1058 replace(pos, oldLength, newText, newStart, newLength);
1059 length -= pos + oldLength - start;
1060 start = pos + newLength;
1061 }
1062 }
1063
1064 return *this;
1065 }
1066
1067
1068 void
setToBogus()1069 UnicodeString::setToBogus()
1070 {
1071 releaseArray();
1072
1073 fShortLength = 0;
1074 fUnion.fFields.fArray = 0;
1075 fUnion.fFields.fCapacity = 0;
1076 fFlags = kIsBogus;
1077 }
1078
1079 // turn a bogus string into an empty one
1080 void
unBogus()1081 UnicodeString::unBogus() {
1082 if(fFlags & kIsBogus) {
1083 setToEmpty();
1084 }
1085 }
1086
1087 // setTo() analogous to the readonly-aliasing constructor with the same signature
1088 UnicodeString &
setTo(UBool isTerminated,const UChar * text,int32_t textLength)1089 UnicodeString::setTo(UBool isTerminated,
1090 const UChar *text,
1091 int32_t textLength)
1092 {
1093 if(fFlags & kOpenGetBuffer) {
1094 // do not modify a string that has an "open" getBuffer(minCapacity)
1095 return *this;
1096 }
1097
1098 if(text == NULL) {
1099 // treat as an empty string, do not alias
1100 releaseArray();
1101 setToEmpty();
1102 return *this;
1103 }
1104
1105 if( textLength < -1 ||
1106 (textLength == -1 && !isTerminated) ||
1107 (textLength >= 0 && isTerminated && text[textLength] != 0)
1108 ) {
1109 setToBogus();
1110 return *this;
1111 }
1112
1113 releaseArray();
1114
1115 if(textLength == -1) {
1116 // text is terminated, or else it would have failed the above test
1117 textLength = u_strlen(text);
1118 }
1119 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1120
1121 fFlags = kReadonlyAlias;
1122 return *this;
1123 }
1124
1125 // setTo() analogous to the writable-aliasing constructor with the same signature
1126 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1127 UnicodeString::setTo(UChar *buffer,
1128 int32_t buffLength,
1129 int32_t buffCapacity) {
1130 if(fFlags & kOpenGetBuffer) {
1131 // do not modify a string that has an "open" getBuffer(minCapacity)
1132 return *this;
1133 }
1134
1135 if(buffer == NULL) {
1136 // treat as an empty string, do not alias
1137 releaseArray();
1138 setToEmpty();
1139 return *this;
1140 }
1141
1142 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1143 setToBogus();
1144 return *this;
1145 } else if(buffLength == -1) {
1146 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1147 const UChar *p = buffer, *limit = buffer + buffCapacity;
1148 while(p != limit && *p != 0) {
1149 ++p;
1150 }
1151 buffLength = (int32_t)(p - buffer);
1152 }
1153
1154 releaseArray();
1155
1156 setArray(buffer, buffLength, buffCapacity);
1157 fFlags = kWritableAlias;
1158 return *this;
1159 }
1160
setToUTF8(const StringPiece & utf8)1161 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1162 unBogus();
1163 int32_t length = utf8.length();
1164 int32_t capacity;
1165 // The UTF-16 string will be at most as long as the UTF-8 string.
1166 if(length <= US_STACKBUF_SIZE) {
1167 capacity = US_STACKBUF_SIZE;
1168 } else {
1169 capacity = length + 1; // +1 for the terminating NUL.
1170 }
1171 UChar *utf16 = getBuffer(capacity);
1172 int32_t length16;
1173 UErrorCode errorCode = U_ZERO_ERROR;
1174 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1175 utf8.data(), length,
1176 0xfffd, // Substitution character.
1177 NULL, // Don't care about number of substitutions.
1178 &errorCode);
1179 releaseBuffer(length16);
1180 if(U_FAILURE(errorCode)) {
1181 setToBogus();
1182 }
1183 return *this;
1184 }
1185
1186 UnicodeString&
setCharAt(int32_t offset,UChar c)1187 UnicodeString::setCharAt(int32_t offset,
1188 UChar c)
1189 {
1190 int32_t len = length();
1191 if(cloneArrayIfNeeded() && len > 0) {
1192 if(offset < 0) {
1193 offset = 0;
1194 } else if(offset >= len) {
1195 offset = len - 1;
1196 }
1197
1198 getArrayStart()[offset] = c;
1199 }
1200 return *this;
1201 }
1202
1203 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1204 UnicodeString::doReplace( int32_t start,
1205 int32_t length,
1206 const UnicodeString& src,
1207 int32_t srcStart,
1208 int32_t srcLength)
1209 {
1210 if(!src.isBogus()) {
1211 // pin the indices to legal values
1212 src.pinIndices(srcStart, srcLength);
1213
1214 // get the characters from src
1215 // and replace the range in ourselves with them
1216 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1217 } else {
1218 // remove the range
1219 return doReplace(start, length, 0, 0, 0);
1220 }
1221 }
1222
1223 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1224 UnicodeString::doReplace(int32_t start,
1225 int32_t length,
1226 const UChar *srcChars,
1227 int32_t srcStart,
1228 int32_t srcLength)
1229 {
1230 if(!isWritable()) {
1231 return *this;
1232 }
1233
1234 int32_t oldLength = this->length();
1235
1236 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1237 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1238 if(start == 0) {
1239 // remove prefix by adjusting the array pointer
1240 pinIndex(length);
1241 fUnion.fFields.fArray += length;
1242 fUnion.fFields.fCapacity -= length;
1243 setLength(oldLength - length);
1244 return *this;
1245 } else {
1246 pinIndex(start);
1247 if(length >= (oldLength - start)) {
1248 // remove suffix by reducing the length (like truncate())
1249 setLength(start);
1250 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1251 return *this;
1252 }
1253 }
1254 }
1255
1256 if(srcChars == 0) {
1257 srcStart = srcLength = 0;
1258 } else if(srcLength < 0) {
1259 // get the srcLength if necessary
1260 srcLength = u_strlen(srcChars + srcStart);
1261 }
1262
1263 // calculate the size of the string after the replace
1264 int32_t newLength;
1265
1266 // optimize append() onto a large-enough, owned string
1267 if(start >= oldLength) {
1268 newLength = oldLength + srcLength;
1269 if(newLength <= getCapacity() && isBufferWritable()) {
1270 UChar *oldArray = getArrayStart();
1271 // Do not copy characters when
1272 // UChar *buffer=str.getAppendBuffer(...);
1273 // is followed by
1274 // str.append(buffer, length);
1275 // or
1276 // str.appendString(buffer, length)
1277 // or similar.
1278 if(srcChars + srcStart != oldArray + start || start > oldLength) {
1279 us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1280 }
1281 setLength(newLength);
1282 return *this;
1283 } else {
1284 // pin the indices to legal values
1285 start = oldLength;
1286 length = 0;
1287 }
1288 } else {
1289 // pin the indices to legal values
1290 pinIndices(start, length);
1291
1292 newLength = oldLength - length + srcLength;
1293 }
1294
1295 // the following may change fArray but will not copy the current contents;
1296 // therefore we need to keep the current fArray
1297 UChar oldStackBuffer[US_STACKBUF_SIZE];
1298 UChar *oldArray;
1299 if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1300 // copy the stack buffer contents because it will be overwritten with
1301 // fUnion.fFields values
1302 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1303 oldArray = oldStackBuffer;
1304 } else {
1305 oldArray = getArrayStart();
1306 }
1307
1308 // clone our array and allocate a bigger array if needed
1309 int32_t *bufferToDelete = 0;
1310 if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1311 FALSE, &bufferToDelete)
1312 ) {
1313 return *this;
1314 }
1315
1316 // now do the replace
1317
1318 UChar *newArray = getArrayStart();
1319 if(newArray != oldArray) {
1320 // if fArray changed, then we need to copy everything except what will change
1321 us_arrayCopy(oldArray, 0, newArray, 0, start);
1322 us_arrayCopy(oldArray, start + length,
1323 newArray, start + srcLength,
1324 oldLength - (start + length));
1325 } else if(length != srcLength) {
1326 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1327 us_arrayCopy(oldArray, start + length,
1328 newArray, start + srcLength,
1329 oldLength - (start + length));
1330 }
1331
1332 // now fill in the hole with the new string
1333 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1334
1335 setLength(newLength);
1336
1337 // delayed delete in case srcChars == fArray when we started, and
1338 // to keep oldArray alive for the above operations
1339 if (bufferToDelete) {
1340 uprv_free(bufferToDelete);
1341 }
1342
1343 return *this;
1344 }
1345
1346 /**
1347 * Replaceable API
1348 */
1349 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1350 UnicodeString::handleReplaceBetween(int32_t start,
1351 int32_t limit,
1352 const UnicodeString& text) {
1353 replaceBetween(start, limit, text);
1354 }
1355
1356 /**
1357 * Replaceable API
1358 */
1359 void
copy(int32_t start,int32_t limit,int32_t dest)1360 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1361 if (limit <= start) {
1362 return; // Nothing to do; avoid bogus malloc call
1363 }
1364 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1365 // Check to make sure text is not null.
1366 if (text != NULL) {
1367 extractBetween(start, limit, text, 0);
1368 insert(dest, text, 0, limit - start);
1369 uprv_free(text);
1370 }
1371 }
1372
1373 /**
1374 * Replaceable API
1375 *
1376 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1377 * so we implement this function here.
1378 */
hasMetaData() const1379 UBool Replaceable::hasMetaData() const {
1380 return TRUE;
1381 }
1382
1383 /**
1384 * Replaceable API
1385 */
hasMetaData() const1386 UBool UnicodeString::hasMetaData() const {
1387 return FALSE;
1388 }
1389
1390 UnicodeString&
doReverse(int32_t start,int32_t length)1391 UnicodeString::doReverse(int32_t start, int32_t length) {
1392 if(length <= 1 || !cloneArrayIfNeeded()) {
1393 return *this;
1394 }
1395
1396 // pin the indices to legal values
1397 pinIndices(start, length);
1398 if(length <= 1) { // pinIndices() might have shrunk the length
1399 return *this;
1400 }
1401
1402 UChar *left = getArrayStart() + start;
1403 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1404 UChar swap;
1405 UBool hasSupplementary = FALSE;
1406
1407 // Before the loop we know left<right because length>=2.
1408 do {
1409 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1410 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1411 *right-- = swap;
1412 } while(left < right);
1413 // Make sure to test the middle code unit of an odd-length string.
1414 // Redundant if the length is even.
1415 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1416
1417 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1418 if(hasSupplementary) {
1419 UChar swap2;
1420
1421 left = getArrayStart() + start;
1422 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1423 while(left < right) {
1424 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1425 *left++ = swap2;
1426 *left++ = swap;
1427 } else {
1428 ++left;
1429 }
1430 }
1431 }
1432
1433 return *this;
1434 }
1435
1436 UBool
padLeading(int32_t targetLength,UChar padChar)1437 UnicodeString::padLeading(int32_t targetLength,
1438 UChar padChar)
1439 {
1440 int32_t oldLength = length();
1441 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1442 return FALSE;
1443 } else {
1444 // move contents up by padding width
1445 UChar *array = getArrayStart();
1446 int32_t start = targetLength - oldLength;
1447 us_arrayCopy(array, 0, array, start, oldLength);
1448
1449 // fill in padding character
1450 while(--start >= 0) {
1451 array[start] = padChar;
1452 }
1453 setLength(targetLength);
1454 return TRUE;
1455 }
1456 }
1457
1458 UBool
padTrailing(int32_t targetLength,UChar padChar)1459 UnicodeString::padTrailing(int32_t targetLength,
1460 UChar padChar)
1461 {
1462 int32_t oldLength = length();
1463 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1464 return FALSE;
1465 } else {
1466 // fill in padding character
1467 UChar *array = getArrayStart();
1468 int32_t length = targetLength;
1469 while(--length >= oldLength) {
1470 array[length] = padChar;
1471 }
1472 setLength(targetLength);
1473 return TRUE;
1474 }
1475 }
1476
1477 //========================================
1478 // Hashing
1479 //========================================
1480 int32_t
doHashCode() const1481 UnicodeString::doHashCode() const
1482 {
1483 /* Delegate hash computation to uhash. This makes UnicodeString
1484 * hashing consistent with UChar* hashing. */
1485 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1486 if (hashCode == kInvalidHashCode) {
1487 hashCode = kEmptyHashCode;
1488 }
1489 return hashCode;
1490 }
1491
1492 //========================================
1493 // External Buffer
1494 //========================================
1495
1496 UChar *
getBuffer(int32_t minCapacity)1497 UnicodeString::getBuffer(int32_t minCapacity) {
1498 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1499 fFlags|=kOpenGetBuffer;
1500 fShortLength=0;
1501 return getArrayStart();
1502 } else {
1503 return 0;
1504 }
1505 }
1506
1507 void
releaseBuffer(int32_t newLength)1508 UnicodeString::releaseBuffer(int32_t newLength) {
1509 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1510 // set the new fLength
1511 int32_t capacity=getCapacity();
1512 if(newLength==-1) {
1513 // the new length is the string length, capped by fCapacity
1514 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1515 while(p<limit && *p!=0) {
1516 ++p;
1517 }
1518 newLength=(int32_t)(p-array);
1519 } else if(newLength>capacity) {
1520 newLength=capacity;
1521 }
1522 setLength(newLength);
1523 fFlags&=~kOpenGetBuffer;
1524 }
1525 }
1526
1527 //========================================
1528 // Miscellaneous
1529 //========================================
1530 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1531 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1532 int32_t growCapacity,
1533 UBool doCopyArray,
1534 int32_t **pBufferToDelete,
1535 UBool forceClone) {
1536 // default parameters need to be static, therefore
1537 // the defaults are -1 to have convenience defaults
1538 if(newCapacity == -1) {
1539 newCapacity = getCapacity();
1540 }
1541
1542 // while a getBuffer(minCapacity) is "open",
1543 // prevent any modifications of the string by returning FALSE here
1544 // if the string is bogus, then only an assignment or similar can revive it
1545 if(!isWritable()) {
1546 return FALSE;
1547 }
1548
1549 /*
1550 * We need to make a copy of the array if
1551 * the buffer is read-only, or
1552 * the buffer is refCounted (shared), and refCount>1, or
1553 * the buffer is too small.
1554 * Return FALSE if memory could not be allocated.
1555 */
1556 if(forceClone ||
1557 fFlags & kBufferIsReadonly ||
1558 (fFlags & kRefCounted && refCount() > 1) ||
1559 newCapacity > getCapacity()
1560 ) {
1561 // check growCapacity for default value and use of the stack buffer
1562 if(growCapacity == -1) {
1563 growCapacity = newCapacity;
1564 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1565 growCapacity = US_STACKBUF_SIZE;
1566 }
1567
1568 // save old values
1569 UChar oldStackBuffer[US_STACKBUF_SIZE];
1570 UChar *oldArray;
1571 uint8_t flags = fFlags;
1572
1573 if(flags&kUsingStackBuffer) {
1574 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1575 // copy the stack buffer contents because it will be overwritten with
1576 // fUnion.fFields values
1577 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1578 oldArray = oldStackBuffer;
1579 } else {
1580 oldArray = 0; // no need to copy from stack buffer to itself
1581 }
1582 } else {
1583 oldArray = fUnion.fFields.fArray;
1584 }
1585
1586 // allocate a new array
1587 if(allocate(growCapacity) ||
1588 (newCapacity < growCapacity && allocate(newCapacity))
1589 ) {
1590 if(doCopyArray && oldArray != 0) {
1591 // copy the contents
1592 // do not copy more than what fits - it may be smaller than before
1593 int32_t minLength = length();
1594 newCapacity = getCapacity();
1595 if(newCapacity < minLength) {
1596 minLength = newCapacity;
1597 setLength(minLength);
1598 }
1599 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1600 } else {
1601 fShortLength = 0;
1602 }
1603
1604 // release the old array
1605 if(flags & kRefCounted) {
1606 // the array is refCounted; decrement and release if 0
1607 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1608 if(umtx_atomic_dec(pRefCount) == 0) {
1609 if(pBufferToDelete == 0) {
1610 uprv_free(pRefCount);
1611 } else {
1612 // the caller requested to delete it himself
1613 *pBufferToDelete = pRefCount;
1614 }
1615 }
1616 }
1617 } else {
1618 // not enough memory for growCapacity and not even for the smaller newCapacity
1619 // reset the old values for setToBogus() to release the array
1620 if(!(flags&kUsingStackBuffer)) {
1621 fUnion.fFields.fArray = oldArray;
1622 }
1623 fFlags = flags;
1624 setToBogus();
1625 return FALSE;
1626 }
1627 }
1628 return TRUE;
1629 }
1630
1631 // UnicodeStringAppendable ------------------------------------------------- ***
1632
1633 UBool
appendCodeUnit(UChar c)1634 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1635 return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1636 }
1637
1638 UBool
appendCodePoint(UChar32 c)1639 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1640 UChar buffer[U16_MAX_LENGTH];
1641 int32_t cLength = 0;
1642 UBool isError = FALSE;
1643 U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1644 return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1645 }
1646
1647 UBool
appendString(const UChar * s,int32_t length)1648 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1649 return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1650 }
1651
1652 UBool
reserveAppendCapacity(int32_t appendCapacity)1653 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1654 return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1655 }
1656
1657 UChar *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,UChar * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1658 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1659 int32_t desiredCapacityHint,
1660 UChar *scratch, int32_t scratchCapacity,
1661 int32_t *resultCapacity) {
1662 if(minCapacity < 1 || scratchCapacity < minCapacity) {
1663 *resultCapacity = 0;
1664 return NULL;
1665 }
1666 int32_t oldLength = str.length();
1667 if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1668 *resultCapacity = str.getCapacity() - oldLength;
1669 return str.getArrayStart() + oldLength;
1670 }
1671 *resultCapacity = scratchCapacity;
1672 return scratch;
1673 }
1674
1675 U_NAMESPACE_END
1676
1677 #ifdef U_STATIC_IMPLEMENTATION
1678 /*
1679 This should never be called. It is defined here to make sure that the
1680 virtual vector deleting destructor is defined within unistr.cpp.
1681 The vector deleting destructor is already a part of UObject,
1682 but defining it here makes sure that it is included with this object file.
1683 This makes sure that static library dependencies are kept to a minimum.
1684 */
uprv_UnicodeStringDummy(void)1685 static void uprv_UnicodeStringDummy(void) {
1686 U_NAMESPACE_USE
1687 delete [] (new UnicodeString[2]);
1688 }
1689 #endif
1690