1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2010, International Business Machines Corporation and *
4 * others. All Rights Reserved. *
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 04/20/99 stephen Overhauled per 4/16 code review.
14 * 07/09/99 stephen Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 * 11/18/99 aliu Added handleReplaceBetween() to make inherit from
16 * Replaceable.
17 * 06/25/01 grhoten Removed the dependency on iostream
18 ******************************************************************************
19 */
20
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
27 #include "uhash.h"
28 #include "ustr_imp.h"
29 #include "umutex.h"
30
31 #if 0
32
33 #if U_IOSTREAM_SOURCE >= 199711
34 #include <iostream>
35 using namespace std;
36 #elif U_IOSTREAM_SOURCE >= 198506
37 #include <iostream.h>
38 #endif
39
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43 const char *name)
44 {
45 UChar c;
46 cout << name << ":|";
47 for(int i = 0; i < s.length(); ++i) {
48 c = s[i];
49 if(c>= 0x007E || c < 0x0020)
50 cout << "[0x" << hex << s[i] << "]";
51 else
52 cout << (char) s[i];
53 }
54 cout << '|' << endl;
55 }
56
57 void
58 print(const UChar *s,
59 int32_t len,
60 const char *name)
61 {
62 UChar c;
63 cout << name << ":|";
64 for(int i = 0; i < len; ++i) {
65 c = s[i];
66 if(c>= 0x007E || c < 0x0020)
67 cout << "[0x" << hex << s[i] << "]";
68 else
69 cout << (char) s[i];
70 }
71 cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75
76 // Local function definitions for now
77
78 // need to copy areas that may overlap
79 static
80 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)81 us_arrayCopy(const UChar *src, int32_t srcStart,
82 UChar *dst, int32_t dstStart, int32_t count)
83 {
84 if(count>0) {
85 uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86 }
87 }
88
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)92 UnicodeString_charAt(int32_t offset, void *context) {
93 return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96
97 U_NAMESPACE_BEGIN
98
99 /* The Replaceable virtual destructor can't be defined in the header
100 due to how AIX works with multiple definitions of virtual functions.
101 */
~Replaceable()102 Replaceable::~Replaceable() {}
Replaceable()103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108 return
109 UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110 append(s1).
111 append(s2);
112 }
113
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 // have a chance to automatically inline.
117 //========================================
118
119 void
addRef()120 UnicodeString::addRef()
121 { umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122
123 int32_t
removeRef()124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126
127 int32_t
refCount() const128 UnicodeString::refCount() const
129 {
130 umtx_lock(NULL);
131 // Note: without the lock to force a memory barrier, we might see a very
132 // stale value on some multi-processor systems.
133 int32_t count = *((int32_t *)fUnion.fFields.fArray - 1);
134 umtx_unlock(NULL);
135 return count;
136 }
137
138 void
releaseArray()139 UnicodeString::releaseArray() {
140 if((fFlags & kRefCounted) && removeRef() == 0) {
141 uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142 }
143 }
144
145
146
147 //========================================
148 // Constructors
149 //========================================
UnicodeString()150 UnicodeString::UnicodeString()
151 : fShortLength(0),
152 fFlags(kShortString)
153 {}
154
UnicodeString(int32_t capacity,UChar32 c,int32_t count)155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
156 : fShortLength(0),
157 fFlags(0)
158 {
159 if(count <= 0 || (uint32_t)c > 0x10ffff) {
160 // just allocate and do not do anything else
161 allocate(capacity);
162 } else {
163 // count > 0, allocate and fill the new string with count c's
164 int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
165 if(capacity < length) {
166 capacity = length;
167 }
168 if(allocate(capacity)) {
169 UChar *array = getArrayStart();
170 int32_t i = 0;
171
172 // fill the new string with c
173 if(unitCount == 1) {
174 // fill with length UChars
175 while(i < length) {
176 array[i++] = (UChar)c;
177 }
178 } else {
179 // get the code units for c
180 UChar units[UTF_MAX_CHAR_LENGTH];
181 UTF_APPEND_CHAR_UNSAFE(units, i, c);
182
183 // now it must be i==unitCount
184 i = 0;
185
186 // for Unicode, unitCount can only be 1, 2, 3, or 4
187 // 1 is handled above
188 while(i < length) {
189 int32_t unitIdx = 0;
190 while(unitIdx < unitCount) {
191 array[i++]=units[unitIdx++];
192 }
193 }
194 }
195 }
196 setLength(length);
197 }
198 }
199
UnicodeString(UChar ch)200 UnicodeString::UnicodeString(UChar ch)
201 : fShortLength(1),
202 fFlags(kShortString)
203 {
204 fUnion.fStackBuffer[0] = ch;
205 }
206
UnicodeString(UChar32 ch)207 UnicodeString::UnicodeString(UChar32 ch)
208 : fShortLength(0),
209 fFlags(kShortString)
210 {
211 int32_t i = 0;
212 UBool isError = FALSE;
213 U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
214 fShortLength = (int8_t)i;
215 }
216
UnicodeString(const UChar * text)217 UnicodeString::UnicodeString(const UChar *text)
218 : fShortLength(0),
219 fFlags(kShortString)
220 {
221 doReplace(0, 0, text, 0, -1);
222 }
223
UnicodeString(const UChar * text,int32_t textLength)224 UnicodeString::UnicodeString(const UChar *text,
225 int32_t textLength)
226 : fShortLength(0),
227 fFlags(kShortString)
228 {
229 doReplace(0, 0, text, 0, textLength);
230 }
231
UnicodeString(UBool isTerminated,const UChar * text,int32_t textLength)232 UnicodeString::UnicodeString(UBool isTerminated,
233 const UChar *text,
234 int32_t textLength)
235 : fShortLength(0),
236 fFlags(kReadonlyAlias)
237 {
238 if(text == NULL) {
239 // treat as an empty string, do not alias
240 setToEmpty();
241 } else if(textLength < -1 ||
242 (textLength == -1 && !isTerminated) ||
243 (textLength >= 0 && isTerminated && text[textLength] != 0)
244 ) {
245 setToBogus();
246 } else {
247 if(textLength == -1) {
248 // text is terminated, or else it would have failed the above test
249 textLength = u_strlen(text);
250 }
251 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
252 }
253 }
254
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)255 UnicodeString::UnicodeString(UChar *buff,
256 int32_t buffLength,
257 int32_t buffCapacity)
258 : fShortLength(0),
259 fFlags(kWritableAlias)
260 {
261 if(buff == NULL) {
262 // treat as an empty string, do not alias
263 setToEmpty();
264 } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
265 setToBogus();
266 } else {
267 if(buffLength == -1) {
268 // fLength = u_strlen(buff); but do not look beyond buffCapacity
269 const UChar *p = buff, *limit = buff + buffCapacity;
270 while(p != limit && *p != 0) {
271 ++p;
272 }
273 buffLength = (int32_t)(p - buff);
274 }
275 setArray(buff, buffLength, buffCapacity);
276 }
277 }
278
UnicodeString(const char * src,int32_t length,EInvariant)279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
280 : fShortLength(0),
281 fFlags(kShortString)
282 {
283 if(src==NULL) {
284 // treat as an empty string
285 } else {
286 if(length<0) {
287 length=(int32_t)uprv_strlen(src);
288 }
289 if(cloneArrayIfNeeded(length, length, FALSE)) {
290 u_charsToUChars(src, getArrayStart(), length);
291 setLength(length);
292 } else {
293 setToBogus();
294 }
295 }
296 }
297
298 #if U_CHARSET_IS_UTF8
299
UnicodeString(const char * codepageData)300 UnicodeString::UnicodeString(const char *codepageData)
301 : fShortLength(0),
302 fFlags(kShortString) {
303 if(codepageData != 0) {
304 setToUTF8(codepageData);
305 }
306 }
307
UnicodeString(const char * codepageData,int32_t dataLength)308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
309 : fShortLength(0),
310 fFlags(kShortString) {
311 // if there's nothing to convert, do nothing
312 if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
313 return;
314 }
315 if(dataLength == -1) {
316 dataLength = (int32_t)uprv_strlen(codepageData);
317 }
318 setToUTF8(StringPiece(codepageData, dataLength));
319 }
320
321 // else see unistr_cnv.cpp
322 #endif
323
UnicodeString(const UnicodeString & that)324 UnicodeString::UnicodeString(const UnicodeString& that)
325 : Replaceable(),
326 fShortLength(0),
327 fFlags(kShortString)
328 {
329 copyFrom(that);
330 }
331
UnicodeString(const UnicodeString & that,int32_t srcStart)332 UnicodeString::UnicodeString(const UnicodeString& that,
333 int32_t srcStart)
334 : Replaceable(),
335 fShortLength(0),
336 fFlags(kShortString)
337 {
338 setTo(that, srcStart);
339 }
340
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)341 UnicodeString::UnicodeString(const UnicodeString& that,
342 int32_t srcStart,
343 int32_t srcLength)
344 : Replaceable(),
345 fShortLength(0),
346 fFlags(kShortString)
347 {
348 setTo(that, srcStart, srcLength);
349 }
350
351 // Replaceable base class clone() default implementation, does not clone
352 Replaceable *
clone() const353 Replaceable::clone() const {
354 return NULL;
355 }
356
357 // UnicodeString overrides clone() with a real implementation
358 Replaceable *
clone() const359 UnicodeString::clone() const {
360 return new UnicodeString(*this);
361 }
362
363 //========================================
364 // array allocation
365 //========================================
366
367 UBool
allocate(int32_t capacity)368 UnicodeString::allocate(int32_t capacity) {
369 if(capacity <= US_STACKBUF_SIZE) {
370 fFlags = kShortString;
371 } else {
372 // count bytes for the refCounter and the string capacity, and
373 // round up to a multiple of 16; then divide by 4 and allocate int32_t's
374 // to be safely aligned for the refCount
375 // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
376 int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
377 int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
378 if(array != 0) {
379 // set initial refCount and point behind the refCount
380 *array++ = 1;
381
382 // have fArray point to the first UChar
383 fUnion.fFields.fArray = (UChar *)array;
384 fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
385 fFlags = kLongString;
386 } else {
387 fShortLength = 0;
388 fUnion.fFields.fArray = 0;
389 fUnion.fFields.fCapacity = 0;
390 fFlags = kIsBogus;
391 return FALSE;
392 }
393 }
394 return TRUE;
395 }
396
397 //========================================
398 // Destructor
399 //========================================
~UnicodeString()400 UnicodeString::~UnicodeString()
401 {
402 releaseArray();
403 }
404
405 //========================================
406 // Factory methods
407 //========================================
408
fromUTF8(const StringPiece & utf8)409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
410 UnicodeString result;
411 result.setToUTF8(utf8);
412 return result;
413 }
414
fromUTF32(const UChar32 * utf32,int32_t length)415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
416 UnicodeString result;
417 int32_t capacity;
418 // Most UTF-32 strings will be BMP-only and result in a same-length
419 // UTF-16 string. We overestimate the capacity just slightly,
420 // just in case there are a few supplementary characters.
421 if(length <= US_STACKBUF_SIZE) {
422 capacity = US_STACKBUF_SIZE;
423 } else {
424 capacity = length + (length >> 4) + 4;
425 }
426 do {
427 UChar *utf16 = result.getBuffer(capacity);
428 int32_t length16;
429 UErrorCode errorCode = U_ZERO_ERROR;
430 u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
431 utf32, length,
432 0xfffd, // Substitution character.
433 NULL, // Don't care about number of substitutions.
434 &errorCode);
435 result.releaseBuffer(length16);
436 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
437 capacity = length16 + 1; // +1 for the terminating NUL.
438 continue;
439 } else if(U_FAILURE(errorCode)) {
440 result.setToBogus();
441 }
442 break;
443 } while(TRUE);
444 return result;
445 }
446
447 //========================================
448 // Assignment
449 //========================================
450
451 UnicodeString &
operator =(const UnicodeString & src)452 UnicodeString::operator=(const UnicodeString &src) {
453 return copyFrom(src);
454 }
455
456 UnicodeString &
fastCopyFrom(const UnicodeString & src)457 UnicodeString::fastCopyFrom(const UnicodeString &src) {
458 return copyFrom(src, TRUE);
459 }
460
461 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
463 // if assigning to ourselves, do nothing
464 if(this == 0 || this == &src) {
465 return *this;
466 }
467
468 // is the right side bogus?
469 if(&src == 0 || src.isBogus()) {
470 setToBogus();
471 return *this;
472 }
473
474 // delete the current contents
475 releaseArray();
476
477 if(src.isEmpty()) {
478 // empty string - use the stack buffer
479 setToEmpty();
480 return *this;
481 }
482
483 // we always copy the length
484 int32_t srcLength = src.length();
485 setLength(srcLength);
486
487 // fLength>0 and not an "open" src.getBuffer(minCapacity)
488 switch(src.fFlags) {
489 case kShortString:
490 // short string using the stack buffer, do the same
491 fFlags = kShortString;
492 uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
493 break;
494 case kLongString:
495 // src uses a refCounted string buffer, use that buffer with refCount
496 // src is const, use a cast - we don't really change it
497 ((UnicodeString &)src).addRef();
498 // copy all fields, share the reference-counted buffer
499 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
500 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
501 fFlags = src.fFlags;
502 break;
503 case kReadonlyAlias:
504 if(fastCopy) {
505 // src is a readonly alias, do the same
506 // -> maintain the readonly alias as such
507 fUnion.fFields.fArray = src.fUnion.fFields.fArray;
508 fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
509 fFlags = src.fFlags;
510 break;
511 }
512 // else if(!fastCopy) fall through to case kWritableAlias
513 // -> allocate a new buffer and copy the contents
514 case kWritableAlias:
515 // src is a writable alias; we make a copy of that instead
516 if(allocate(srcLength)) {
517 uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
518 break;
519 }
520 // if there is not enough memory, then fall through to setting to bogus
521 default:
522 // if src is bogus, set ourselves to bogus
523 // do not call setToBogus() here because fArray and fFlags are not consistent here
524 fShortLength = 0;
525 fUnion.fFields.fArray = 0;
526 fUnion.fFields.fCapacity = 0;
527 fFlags = kIsBogus;
528 break;
529 }
530
531 return *this;
532 }
533
534 //========================================
535 // Miscellaneous operations
536 //========================================
537
unescape() const538 UnicodeString UnicodeString::unescape() const {
539 UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
540 const UChar *array = getBuffer();
541 int32_t len = length();
542 int32_t prev = 0;
543 for (int32_t i=0;;) {
544 if (i == len) {
545 result.append(array, prev, len - prev);
546 break;
547 }
548 if (array[i++] == 0x5C /*'\\'*/) {
549 result.append(array, prev, (i - 1) - prev);
550 UChar32 c = unescapeAt(i); // advances i
551 if (c < 0) {
552 result.remove(); // return empty string
553 break; // invalid escape sequence
554 }
555 result.append(c);
556 prev = i;
557 }
558 }
559 return result;
560 }
561
unescapeAt(int32_t & offset) const562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
563 return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
564 }
565
566 //========================================
567 // Read-only implementation
568 //========================================
569 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const570 UnicodeString::doCompare( int32_t start,
571 int32_t length,
572 const UChar *srcChars,
573 int32_t srcStart,
574 int32_t srcLength) const
575 {
576 // compare illegal string values
577 // treat const UChar *srcChars==NULL as an empty string
578 if(isBogus()) {
579 return -1;
580 }
581
582 // pin indices to legal values
583 pinIndices(start, length);
584
585 if(srcChars == NULL) {
586 srcStart = srcLength = 0;
587 }
588
589 // get the correct pointer
590 const UChar *chars = getArrayStart();
591
592 chars += start;
593 srcChars += srcStart;
594
595 int32_t minLength;
596 int8_t lengthResult;
597
598 // get the srcLength if necessary
599 if(srcLength < 0) {
600 srcLength = u_strlen(srcChars + srcStart);
601 }
602
603 // are we comparing different lengths?
604 if(length != srcLength) {
605 if(length < srcLength) {
606 minLength = length;
607 lengthResult = -1;
608 } else {
609 minLength = srcLength;
610 lengthResult = 1;
611 }
612 } else {
613 minLength = length;
614 lengthResult = 0;
615 }
616
617 /*
618 * note that uprv_memcmp() returns an int but we return an int8_t;
619 * we need to take care not to truncate the result -
620 * one way to do this is to right-shift the value to
621 * move the sign bit into the lower 8 bits and making sure that this
622 * does not become 0 itself
623 */
624
625 if(minLength > 0 && chars != srcChars) {
626 int32_t result;
627
628 # if U_IS_BIG_ENDIAN
629 // big-endian: byte comparison works
630 result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
631 if(result != 0) {
632 return (int8_t)(result >> 15 | 1);
633 }
634 # else
635 // little-endian: compare UChar units
636 do {
637 result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
638 if(result != 0) {
639 return (int8_t)(result >> 15 | 1);
640 }
641 } while(--minLength > 0);
642 # endif
643 }
644 return lengthResult;
645 }
646
647 /* String compare in code point order - doCompare() compares in code unit order. */
648 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const649 UnicodeString::doCompareCodePointOrder(int32_t start,
650 int32_t length,
651 const UChar *srcChars,
652 int32_t srcStart,
653 int32_t srcLength) const
654 {
655 // compare illegal string values
656 // treat const UChar *srcChars==NULL as an empty string
657 if(isBogus()) {
658 return -1;
659 }
660
661 // pin indices to legal values
662 pinIndices(start, length);
663
664 if(srcChars == NULL) {
665 srcStart = srcLength = 0;
666 }
667
668 int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
669 /* translate the 32-bit result into an 8-bit one */
670 if(diff!=0) {
671 return (int8_t)(diff >> 15 | 1);
672 } else {
673 return 0;
674 }
675 }
676
677 int32_t
getLength() const678 UnicodeString::getLength() const {
679 return length();
680 }
681
682 UChar
getCharAt(int32_t offset) const683 UnicodeString::getCharAt(int32_t offset) const {
684 return charAt(offset);
685 }
686
687 UChar32
getChar32At(int32_t offset) const688 UnicodeString::getChar32At(int32_t offset) const {
689 return char32At(offset);
690 }
691
692 int32_t
countChar32(int32_t start,int32_t length) const693 UnicodeString::countChar32(int32_t start, int32_t length) const {
694 pinIndices(start, length);
695 // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
696 return u_countChar32(getArrayStart()+start, length);
697 }
698
699 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
701 pinIndices(start, length);
702 // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
703 return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
704 }
705
706 int32_t
moveIndex32(int32_t index,int32_t delta) const707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
708 // pin index
709 int32_t len = length();
710 if(index<0) {
711 index=0;
712 } else if(index>len) {
713 index=len;
714 }
715
716 const UChar *array = getArrayStart();
717 if(delta>0) {
718 UTF_FWD_N(array, index, len, delta);
719 } else {
720 UTF_BACK_N(array, 0, index, -delta);
721 }
722
723 return index;
724 }
725
726 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const727 UnicodeString::doExtract(int32_t start,
728 int32_t length,
729 UChar *dst,
730 int32_t dstStart) const
731 {
732 // pin indices to legal values
733 pinIndices(start, length);
734
735 // do not copy anything if we alias dst itself
736 const UChar *array = getArrayStart();
737 if(array + start != dst + dstStart) {
738 us_arrayCopy(array, start, dst, dstStart, length);
739 }
740 }
741
742 int32_t
extract(UChar * dest,int32_t destCapacity,UErrorCode & errorCode) const743 UnicodeString::extract(UChar *dest, int32_t destCapacity,
744 UErrorCode &errorCode) const {
745 int32_t len = length();
746 if(U_SUCCESS(errorCode)) {
747 if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
748 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
749 } else {
750 const UChar *array = getArrayStart();
751 if(len>0 && len<=destCapacity && array!=dest) {
752 uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
753 }
754 return u_terminateUChars(dest, destCapacity, len, &errorCode);
755 }
756 }
757
758 return len;
759 }
760
761 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const762 UnicodeString::extract(int32_t start,
763 int32_t length,
764 char *target,
765 int32_t targetCapacity,
766 enum EInvariant) const
767 {
768 // if the arguments are illegal, then do nothing
769 if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
770 return 0;
771 }
772
773 // pin the indices to legal values
774 pinIndices(start, length);
775
776 if(length <= targetCapacity) {
777 u_UCharsToChars(getArrayStart() + start, target, length);
778 }
779 UErrorCode status = U_ZERO_ERROR;
780 return u_terminateChars(target, targetCapacity, length, &status);
781 }
782
783 UnicodeString
tempSubString(int32_t start,int32_t len) const784 UnicodeString::tempSubString(int32_t start, int32_t len) const {
785 pinIndices(start, len);
786 const UChar *array = getBuffer(); // not getArrayStart() to check kIsBogus & kOpenGetBuffer
787 if(array==NULL) {
788 array=fUnion.fStackBuffer; // anything not NULL because that would make an empty string
789 len=-2; // bogus result string
790 }
791 return UnicodeString(FALSE, array + start, len);
792 }
793
794 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const795 UnicodeString::toUTF8(int32_t start, int32_t len,
796 char *target, int32_t capacity) const {
797 pinIndices(start, len);
798 int32_t length8;
799 UErrorCode errorCode = U_ZERO_ERROR;
800 u_strToUTF8WithSub(target, capacity, &length8,
801 getBuffer() + start, len,
802 0xFFFD, // Standard substitution character.
803 NULL, // Don't care about number of substitutions.
804 &errorCode);
805 return length8;
806 }
807
808 #if U_CHARSET_IS_UTF8
809
810 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const811 UnicodeString::extract(int32_t start, int32_t len,
812 char *target, uint32_t dstSize) const {
813 // if the arguments are illegal, then do nothing
814 if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
815 return 0;
816 }
817 return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
818 }
819
820 // else see unistr_cnv.cpp
821 #endif
822
823 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const824 UnicodeString::extractBetween(int32_t start,
825 int32_t limit,
826 UnicodeString& target) const {
827 pinIndex(start);
828 pinIndex(limit);
829 doExtract(start, limit - start, target);
830 }
831
832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
833 // as many bytes as the source has UChars.
834 // The "worst cases" are writing systems like Indic, Thai and CJK with
835 // 3:1 bytes:UChars.
836 void
toUTF8(ByteSink & sink) const837 UnicodeString::toUTF8(ByteSink &sink) const {
838 int32_t length16 = length();
839 if(length16 != 0) {
840 char stackBuffer[1024];
841 int32_t capacity = (int32_t)sizeof(stackBuffer);
842 UBool utf8IsOwned = FALSE;
843 char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
844 3*length16,
845 stackBuffer, capacity,
846 &capacity);
847 int32_t length8 = 0;
848 UErrorCode errorCode = U_ZERO_ERROR;
849 u_strToUTF8WithSub(utf8, capacity, &length8,
850 getBuffer(), length16,
851 0xFFFD, // Standard substitution character.
852 NULL, // Don't care about number of substitutions.
853 &errorCode);
854 if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
855 utf8 = (char *)uprv_malloc(length8);
856 if(utf8 != NULL) {
857 utf8IsOwned = TRUE;
858 errorCode = U_ZERO_ERROR;
859 u_strToUTF8WithSub(utf8, length8, &length8,
860 getBuffer(), length16,
861 0xFFFD, // Standard substitution character.
862 NULL, // Don't care about number of substitutions.
863 &errorCode);
864 } else {
865 errorCode = U_MEMORY_ALLOCATION_ERROR;
866 }
867 }
868 if(U_SUCCESS(errorCode)) {
869 sink.Append(utf8, length8);
870 sink.Flush();
871 }
872 if(utf8IsOwned) {
873 uprv_free(utf8);
874 }
875 }
876 }
877
878 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
880 int32_t length32=0;
881 if(U_SUCCESS(errorCode)) {
882 // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
883 u_strToUTF32WithSub(utf32, capacity, &length32,
884 getBuffer(), length(),
885 0xfffd, // Substitution character.
886 NULL, // Don't care about number of substitutions.
887 &errorCode);
888 }
889 return length32;
890 }
891
892 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const893 UnicodeString::indexOf(const UChar *srcChars,
894 int32_t srcStart,
895 int32_t srcLength,
896 int32_t start,
897 int32_t length) const
898 {
899 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
900 return -1;
901 }
902
903 // UnicodeString does not find empty substrings
904 if(srcLength < 0 && srcChars[srcStart] == 0) {
905 return -1;
906 }
907
908 // get the indices within bounds
909 pinIndices(start, length);
910
911 // find the first occurrence of the substring
912 const UChar *array = getArrayStart();
913 const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
914 if(match == NULL) {
915 return -1;
916 } else {
917 return (int32_t)(match - array);
918 }
919 }
920
921 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const922 UnicodeString::doIndexOf(UChar c,
923 int32_t start,
924 int32_t length) const
925 {
926 // pin indices
927 pinIndices(start, length);
928
929 // find the first occurrence of c
930 const UChar *array = getArrayStart();
931 const UChar *match = u_memchr(array + start, c, length);
932 if(match == NULL) {
933 return -1;
934 } else {
935 return (int32_t)(match - array);
936 }
937 }
938
939 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const940 UnicodeString::doIndexOf(UChar32 c,
941 int32_t start,
942 int32_t length) const {
943 // pin indices
944 pinIndices(start, length);
945
946 // find the first occurrence of c
947 const UChar *array = getArrayStart();
948 const UChar *match = u_memchr32(array + start, c, length);
949 if(match == NULL) {
950 return -1;
951 } else {
952 return (int32_t)(match - array);
953 }
954 }
955
956 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const957 UnicodeString::lastIndexOf(const UChar *srcChars,
958 int32_t srcStart,
959 int32_t srcLength,
960 int32_t start,
961 int32_t length) const
962 {
963 if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
964 return -1;
965 }
966
967 // UnicodeString does not find empty substrings
968 if(srcLength < 0 && srcChars[srcStart] == 0) {
969 return -1;
970 }
971
972 // get the indices within bounds
973 pinIndices(start, length);
974
975 // find the last occurrence of the substring
976 const UChar *array = getArrayStart();
977 const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
978 if(match == NULL) {
979 return -1;
980 } else {
981 return (int32_t)(match - array);
982 }
983 }
984
985 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const986 UnicodeString::doLastIndexOf(UChar c,
987 int32_t start,
988 int32_t length) const
989 {
990 if(isBogus()) {
991 return -1;
992 }
993
994 // pin indices
995 pinIndices(start, length);
996
997 // find the last occurrence of c
998 const UChar *array = getArrayStart();
999 const UChar *match = u_memrchr(array + start, c, length);
1000 if(match == NULL) {
1001 return -1;
1002 } else {
1003 return (int32_t)(match - array);
1004 }
1005 }
1006
1007 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1008 UnicodeString::doLastIndexOf(UChar32 c,
1009 int32_t start,
1010 int32_t length) const {
1011 // pin indices
1012 pinIndices(start, length);
1013
1014 // find the last occurrence of c
1015 const UChar *array = getArrayStart();
1016 const UChar *match = u_memrchr32(array + start, c, length);
1017 if(match == NULL) {
1018 return -1;
1019 } else {
1020 return (int32_t)(match - array);
1021 }
1022 }
1023
1024 //========================================
1025 // Write implementation
1026 //========================================
1027
1028 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1029 UnicodeString::findAndReplace(int32_t start,
1030 int32_t length,
1031 const UnicodeString& oldText,
1032 int32_t oldStart,
1033 int32_t oldLength,
1034 const UnicodeString& newText,
1035 int32_t newStart,
1036 int32_t newLength)
1037 {
1038 if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1039 return *this;
1040 }
1041
1042 pinIndices(start, length);
1043 oldText.pinIndices(oldStart, oldLength);
1044 newText.pinIndices(newStart, newLength);
1045
1046 if(oldLength == 0) {
1047 return *this;
1048 }
1049
1050 while(length > 0 && length >= oldLength) {
1051 int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1052 if(pos < 0) {
1053 // no more oldText's here: done
1054 break;
1055 } else {
1056 // we found oldText, replace it by newText and go beyond it
1057 replace(pos, oldLength, newText, newStart, newLength);
1058 length -= pos + oldLength - start;
1059 start = pos + newLength;
1060 }
1061 }
1062
1063 return *this;
1064 }
1065
1066
1067 void
setToBogus()1068 UnicodeString::setToBogus()
1069 {
1070 releaseArray();
1071
1072 fShortLength = 0;
1073 fUnion.fFields.fArray = 0;
1074 fUnion.fFields.fCapacity = 0;
1075 fFlags = kIsBogus;
1076 }
1077
1078 // turn a bogus string into an empty one
1079 void
unBogus()1080 UnicodeString::unBogus() {
1081 if(fFlags & kIsBogus) {
1082 setToEmpty();
1083 }
1084 }
1085
1086 // setTo() analogous to the readonly-aliasing constructor with the same signature
1087 UnicodeString &
setTo(UBool isTerminated,const UChar * text,int32_t textLength)1088 UnicodeString::setTo(UBool isTerminated,
1089 const UChar *text,
1090 int32_t textLength)
1091 {
1092 if(fFlags & kOpenGetBuffer) {
1093 // do not modify a string that has an "open" getBuffer(minCapacity)
1094 return *this;
1095 }
1096
1097 if(text == NULL) {
1098 // treat as an empty string, do not alias
1099 releaseArray();
1100 setToEmpty();
1101 return *this;
1102 }
1103
1104 if( textLength < -1 ||
1105 (textLength == -1 && !isTerminated) ||
1106 (textLength >= 0 && isTerminated && text[textLength] != 0)
1107 ) {
1108 setToBogus();
1109 return *this;
1110 }
1111
1112 releaseArray();
1113
1114 if(textLength == -1) {
1115 // text is terminated, or else it would have failed the above test
1116 textLength = u_strlen(text);
1117 }
1118 setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1119
1120 fFlags = kReadonlyAlias;
1121 return *this;
1122 }
1123
1124 // setTo() analogous to the writable-aliasing constructor with the same signature
1125 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1126 UnicodeString::setTo(UChar *buffer,
1127 int32_t buffLength,
1128 int32_t buffCapacity) {
1129 if(fFlags & kOpenGetBuffer) {
1130 // do not modify a string that has an "open" getBuffer(minCapacity)
1131 return *this;
1132 }
1133
1134 if(buffer == NULL) {
1135 // treat as an empty string, do not alias
1136 releaseArray();
1137 setToEmpty();
1138 return *this;
1139 }
1140
1141 if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1142 setToBogus();
1143 return *this;
1144 } else if(buffLength == -1) {
1145 // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1146 const UChar *p = buffer, *limit = buffer + buffCapacity;
1147 while(p != limit && *p != 0) {
1148 ++p;
1149 }
1150 buffLength = (int32_t)(p - buffer);
1151 }
1152
1153 releaseArray();
1154
1155 setArray(buffer, buffLength, buffCapacity);
1156 fFlags = kWritableAlias;
1157 return *this;
1158 }
1159
setToUTF8(const StringPiece & utf8)1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1161 unBogus();
1162 int32_t length = utf8.length();
1163 int32_t capacity;
1164 // The UTF-16 string will be at most as long as the UTF-8 string.
1165 if(length <= US_STACKBUF_SIZE) {
1166 capacity = US_STACKBUF_SIZE;
1167 } else {
1168 capacity = length + 1; // +1 for the terminating NUL.
1169 }
1170 UChar *utf16 = getBuffer(capacity);
1171 int32_t length16;
1172 UErrorCode errorCode = U_ZERO_ERROR;
1173 u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1174 utf8.data(), length,
1175 0xfffd, // Substitution character.
1176 NULL, // Don't care about number of substitutions.
1177 &errorCode);
1178 releaseBuffer(length16);
1179 if(U_FAILURE(errorCode)) {
1180 setToBogus();
1181 }
1182 return *this;
1183 }
1184
1185 UnicodeString&
setCharAt(int32_t offset,UChar c)1186 UnicodeString::setCharAt(int32_t offset,
1187 UChar c)
1188 {
1189 int32_t len = length();
1190 if(cloneArrayIfNeeded() && len > 0) {
1191 if(offset < 0) {
1192 offset = 0;
1193 } else if(offset >= len) {
1194 offset = len - 1;
1195 }
1196
1197 getArrayStart()[offset] = c;
1198 }
1199 return *this;
1200 }
1201
1202 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1203 UnicodeString::doReplace( int32_t start,
1204 int32_t length,
1205 const UnicodeString& src,
1206 int32_t srcStart,
1207 int32_t srcLength)
1208 {
1209 if(!src.isBogus()) {
1210 // pin the indices to legal values
1211 src.pinIndices(srcStart, srcLength);
1212
1213 // get the characters from src
1214 // and replace the range in ourselves with them
1215 return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1216 } else {
1217 // remove the range
1218 return doReplace(start, length, 0, 0, 0);
1219 }
1220 }
1221
1222 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1223 UnicodeString::doReplace(int32_t start,
1224 int32_t length,
1225 const UChar *srcChars,
1226 int32_t srcStart,
1227 int32_t srcLength)
1228 {
1229 if(!isWritable()) {
1230 return *this;
1231 }
1232
1233 int32_t oldLength = this->length();
1234
1235 // optimize (read-only alias).remove(0, start) and .remove(start, end)
1236 if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1237 if(start == 0) {
1238 // remove prefix by adjusting the array pointer
1239 pinIndex(length);
1240 fUnion.fFields.fArray += length;
1241 fUnion.fFields.fCapacity -= length;
1242 setLength(oldLength - length);
1243 return *this;
1244 } else {
1245 pinIndex(start);
1246 if(length >= (oldLength - start)) {
1247 // remove suffix by reducing the length (like truncate())
1248 setLength(start);
1249 fUnion.fFields.fCapacity = start; // not NUL-terminated any more
1250 return *this;
1251 }
1252 }
1253 }
1254
1255 if(srcChars == 0) {
1256 srcStart = srcLength = 0;
1257 } else if(srcLength < 0) {
1258 // get the srcLength if necessary
1259 srcLength = u_strlen(srcChars + srcStart);
1260 }
1261
1262 // calculate the size of the string after the replace
1263 int32_t newSize;
1264
1265 // optimize append() onto a large-enough, owned string
1266 if(start >= oldLength) {
1267 newSize = oldLength + srcLength;
1268 if(newSize <= getCapacity() && isBufferWritable()) {
1269 us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
1270 setLength(newSize);
1271 return *this;
1272 } else {
1273 // pin the indices to legal values
1274 start = oldLength;
1275 length = 0;
1276 }
1277 } else {
1278 // pin the indices to legal values
1279 pinIndices(start, length);
1280
1281 newSize = oldLength - length + srcLength;
1282 }
1283
1284 // the following may change fArray but will not copy the current contents;
1285 // therefore we need to keep the current fArray
1286 UChar oldStackBuffer[US_STACKBUF_SIZE];
1287 UChar *oldArray;
1288 if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
1289 // copy the stack buffer contents because it will be overwritten with
1290 // fUnion.fFields values
1291 u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1292 oldArray = oldStackBuffer;
1293 } else {
1294 oldArray = getArrayStart();
1295 }
1296
1297 // clone our array and allocate a bigger array if needed
1298 int32_t *bufferToDelete = 0;
1299 if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1300 FALSE, &bufferToDelete)
1301 ) {
1302 return *this;
1303 }
1304
1305 // now do the replace
1306
1307 UChar *newArray = getArrayStart();
1308 if(newArray != oldArray) {
1309 // if fArray changed, then we need to copy everything except what will change
1310 us_arrayCopy(oldArray, 0, newArray, 0, start);
1311 us_arrayCopy(oldArray, start + length,
1312 newArray, start + srcLength,
1313 oldLength - (start + length));
1314 } else if(length != srcLength) {
1315 // fArray did not change; copy only the portion that isn't changing, leaving a hole
1316 us_arrayCopy(oldArray, start + length,
1317 newArray, start + srcLength,
1318 oldLength - (start + length));
1319 }
1320
1321 // now fill in the hole with the new string
1322 us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1323
1324 setLength(newSize);
1325
1326 // delayed delete in case srcChars == fArray when we started, and
1327 // to keep oldArray alive for the above operations
1328 if (bufferToDelete) {
1329 uprv_free(bufferToDelete);
1330 }
1331
1332 return *this;
1333 }
1334
1335 /**
1336 * Replaceable API
1337 */
1338 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1339 UnicodeString::handleReplaceBetween(int32_t start,
1340 int32_t limit,
1341 const UnicodeString& text) {
1342 replaceBetween(start, limit, text);
1343 }
1344
1345 /**
1346 * Replaceable API
1347 */
1348 void
copy(int32_t start,int32_t limit,int32_t dest)1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1350 if (limit <= start) {
1351 return; // Nothing to do; avoid bogus malloc call
1352 }
1353 UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1354 // Check to make sure text is not null.
1355 if (text != NULL) {
1356 extractBetween(start, limit, text, 0);
1357 insert(dest, text, 0, limit - start);
1358 uprv_free(text);
1359 }
1360 }
1361
1362 /**
1363 * Replaceable API
1364 *
1365 * NOTE: This is for the Replaceable class. There is no rep.cpp,
1366 * so we implement this function here.
1367 */
hasMetaData() const1368 UBool Replaceable::hasMetaData() const {
1369 return TRUE;
1370 }
1371
1372 /**
1373 * Replaceable API
1374 */
hasMetaData() const1375 UBool UnicodeString::hasMetaData() const {
1376 return FALSE;
1377 }
1378
1379 UnicodeString&
doReverse(int32_t start,int32_t length)1380 UnicodeString::doReverse(int32_t start, int32_t length) {
1381 if(length <= 1 || !cloneArrayIfNeeded()) {
1382 return *this;
1383 }
1384
1385 // pin the indices to legal values
1386 pinIndices(start, length);
1387 if(length <= 1) { // pinIndices() might have shrunk the length
1388 return *this;
1389 }
1390
1391 UChar *left = getArrayStart() + start;
1392 UChar *right = left + length - 1; // -1 for inclusive boundary (length>=2)
1393 UChar swap;
1394 UBool hasSupplementary = FALSE;
1395
1396 // Before the loop we know left<right because length>=2.
1397 do {
1398 hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1399 hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1400 *right-- = swap;
1401 } while(left < right);
1402 // Make sure to test the middle code unit of an odd-length string.
1403 // Redundant if the length is even.
1404 hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1405
1406 /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1407 if(hasSupplementary) {
1408 UChar swap2;
1409
1410 left = getArrayStart() + start;
1411 right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1412 while(left < right) {
1413 if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1414 *left++ = swap2;
1415 *left++ = swap;
1416 } else {
1417 ++left;
1418 }
1419 }
1420 }
1421
1422 return *this;
1423 }
1424
1425 UBool
padLeading(int32_t targetLength,UChar padChar)1426 UnicodeString::padLeading(int32_t targetLength,
1427 UChar padChar)
1428 {
1429 int32_t oldLength = length();
1430 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1431 return FALSE;
1432 } else {
1433 // move contents up by padding width
1434 UChar *array = getArrayStart();
1435 int32_t start = targetLength - oldLength;
1436 us_arrayCopy(array, 0, array, start, oldLength);
1437
1438 // fill in padding character
1439 while(--start >= 0) {
1440 array[start] = padChar;
1441 }
1442 setLength(targetLength);
1443 return TRUE;
1444 }
1445 }
1446
1447 UBool
padTrailing(int32_t targetLength,UChar padChar)1448 UnicodeString::padTrailing(int32_t targetLength,
1449 UChar padChar)
1450 {
1451 int32_t oldLength = length();
1452 if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1453 return FALSE;
1454 } else {
1455 // fill in padding character
1456 UChar *array = getArrayStart();
1457 int32_t length = targetLength;
1458 while(--length >= oldLength) {
1459 array[length] = padChar;
1460 }
1461 setLength(targetLength);
1462 return TRUE;
1463 }
1464 }
1465
1466 //========================================
1467 // Hashing
1468 //========================================
1469 int32_t
doHashCode() const1470 UnicodeString::doHashCode() const
1471 {
1472 /* Delegate hash computation to uhash. This makes UnicodeString
1473 * hashing consistent with UChar* hashing. */
1474 int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1475 if (hashCode == kInvalidHashCode) {
1476 hashCode = kEmptyHashCode;
1477 }
1478 return hashCode;
1479 }
1480
1481 //========================================
1482 // External Buffer
1483 //========================================
1484
1485 UChar *
getBuffer(int32_t minCapacity)1486 UnicodeString::getBuffer(int32_t minCapacity) {
1487 if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1488 fFlags|=kOpenGetBuffer;
1489 fShortLength=0;
1490 return getArrayStart();
1491 } else {
1492 return 0;
1493 }
1494 }
1495
1496 void
releaseBuffer(int32_t newLength)1497 UnicodeString::releaseBuffer(int32_t newLength) {
1498 if(fFlags&kOpenGetBuffer && newLength>=-1) {
1499 // set the new fLength
1500 int32_t capacity=getCapacity();
1501 if(newLength==-1) {
1502 // the new length is the string length, capped by fCapacity
1503 const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1504 while(p<limit && *p!=0) {
1505 ++p;
1506 }
1507 newLength=(int32_t)(p-array);
1508 } else if(newLength>capacity) {
1509 newLength=capacity;
1510 }
1511 setLength(newLength);
1512 fFlags&=~kOpenGetBuffer;
1513 }
1514 }
1515
1516 //========================================
1517 // Miscellaneous
1518 //========================================
1519 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1521 int32_t growCapacity,
1522 UBool doCopyArray,
1523 int32_t **pBufferToDelete,
1524 UBool forceClone) {
1525 // default parameters need to be static, therefore
1526 // the defaults are -1 to have convenience defaults
1527 if(newCapacity == -1) {
1528 newCapacity = getCapacity();
1529 }
1530
1531 // while a getBuffer(minCapacity) is "open",
1532 // prevent any modifications of the string by returning FALSE here
1533 // if the string is bogus, then only an assignment or similar can revive it
1534 if(!isWritable()) {
1535 return FALSE;
1536 }
1537
1538 /*
1539 * We need to make a copy of the array if
1540 * the buffer is read-only, or
1541 * the buffer is refCounted (shared), and refCount>1, or
1542 * the buffer is too small.
1543 * Return FALSE if memory could not be allocated.
1544 */
1545 if(forceClone ||
1546 fFlags & kBufferIsReadonly ||
1547 (fFlags & kRefCounted && refCount() > 1) ||
1548 newCapacity > getCapacity()
1549 ) {
1550 // check growCapacity for default value and use of the stack buffer
1551 if(growCapacity == -1) {
1552 growCapacity = newCapacity;
1553 } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1554 growCapacity = US_STACKBUF_SIZE;
1555 }
1556
1557 // save old values
1558 UChar oldStackBuffer[US_STACKBUF_SIZE];
1559 UChar *oldArray;
1560 uint8_t flags = fFlags;
1561
1562 if(flags&kUsingStackBuffer) {
1563 if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1564 // copy the stack buffer contents because it will be overwritten with
1565 // fUnion.fFields values
1566 us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1567 oldArray = oldStackBuffer;
1568 } else {
1569 oldArray = 0; // no need to copy from stack buffer to itself
1570 }
1571 } else {
1572 oldArray = fUnion.fFields.fArray;
1573 }
1574
1575 // allocate a new array
1576 if(allocate(growCapacity) ||
1577 (newCapacity < growCapacity && allocate(newCapacity))
1578 ) {
1579 if(doCopyArray && oldArray != 0) {
1580 // copy the contents
1581 // do not copy more than what fits - it may be smaller than before
1582 int32_t minLength = length();
1583 newCapacity = getCapacity();
1584 if(newCapacity < minLength) {
1585 minLength = newCapacity;
1586 setLength(minLength);
1587 }
1588 us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1589 } else {
1590 fShortLength = 0;
1591 }
1592
1593 // release the old array
1594 if(flags & kRefCounted) {
1595 // the array is refCounted; decrement and release if 0
1596 int32_t *pRefCount = ((int32_t *)oldArray - 1);
1597 if(umtx_atomic_dec(pRefCount) == 0) {
1598 if(pBufferToDelete == 0) {
1599 uprv_free(pRefCount);
1600 } else {
1601 // the caller requested to delete it himself
1602 *pBufferToDelete = pRefCount;
1603 }
1604 }
1605 }
1606 } else {
1607 // not enough memory for growCapacity and not even for the smaller newCapacity
1608 // reset the old values for setToBogus() to release the array
1609 if(!(flags&kUsingStackBuffer)) {
1610 fUnion.fFields.fArray = oldArray;
1611 }
1612 fFlags = flags;
1613 setToBogus();
1614 return FALSE;
1615 }
1616 }
1617 return TRUE;
1618 }
1619 U_NAMESPACE_END
1620
1621 #ifdef U_STATIC_IMPLEMENTATION
1622 /*
1623 This should never be called. It is defined here to make sure that the
1624 virtual vector deleting destructor is defined within unistr.cpp.
1625 The vector deleting destructor is already a part of UObject,
1626 but defining it here makes sure that it is included with this object file.
1627 This makes sure that static library dependencies are kept to a minimum.
1628 */
uprv_UnicodeStringDummy(void)1629 static void uprv_UnicodeStringDummy(void) {
1630 U_NAMESPACE_USE
1631 delete [] (new UnicodeString[2]);
1632 }
1633 #endif
1634