• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2010, International Business Machines Corporation and   *
4 * others. All Rights Reserved.                                               *
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 *   Date        Name        Description
12 *   09/25/98    stephen     Creation.
13 *   04/20/99    stephen     Overhauled per 4/16 code review.
14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
16 *                           Replaceable.
17 *   06/25/01    grhoten     Removed the dependency on iostream
18 ******************************************************************************
19 */
20 
21 #include "unicode/utypes.h"
22 #include "unicode/putil.h"
23 #include "cstring.h"
24 #include "cmemory.h"
25 #include "unicode/ustring.h"
26 #include "unicode/unistr.h"
27 #include "uhash.h"
28 #include "ustr_imp.h"
29 #include "umutex.h"
30 
31 #if 0
32 
33 #if U_IOSTREAM_SOURCE >= 199711
34 #include <iostream>
35 using namespace std;
36 #elif U_IOSTREAM_SOURCE >= 198506
37 #include <iostream.h>
38 #endif
39 
40 //DEBUGGING
41 void
42 print(const UnicodeString& s,
43       const char *name)
44 {
45   UChar c;
46   cout << name << ":|";
47   for(int i = 0; i < s.length(); ++i) {
48     c = s[i];
49     if(c>= 0x007E || c < 0x0020)
50       cout << "[0x" << hex << s[i] << "]";
51     else
52       cout << (char) s[i];
53   }
54   cout << '|' << endl;
55 }
56 
57 void
58 print(const UChar *s,
59       int32_t len,
60       const char *name)
61 {
62   UChar c;
63   cout << name << ":|";
64   for(int i = 0; i < len; ++i) {
65     c = s[i];
66     if(c>= 0x007E || c < 0x0020)
67       cout << "[0x" << hex << s[i] << "]";
68     else
69       cout << (char) s[i];
70   }
71   cout << '|' << endl;
72 }
73 // END DEBUGGING
74 #endif
75 
76 // Local function definitions for now
77 
78 // need to copy areas that may overlap
79 static
80 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)81 us_arrayCopy(const UChar *src, int32_t srcStart,
82          UChar *dst, int32_t dstStart, int32_t count)
83 {
84   if(count>0) {
85     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
86   }
87 }
88 
89 // u_unescapeAt() callback to get a UChar from a UnicodeString
90 U_CDECL_BEGIN
91 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)92 UnicodeString_charAt(int32_t offset, void *context) {
93     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
94 }
95 U_CDECL_END
96 
97 U_NAMESPACE_BEGIN
98 
99 /* The Replaceable virtual destructor can't be defined in the header
100    due to how AIX works with multiple definitions of virtual functions.
101 */
~Replaceable()102 Replaceable::~Replaceable() {}
Replaceable()103 Replaceable::Replaceable() {}
104 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
105 
106 UnicodeString U_EXPORT2
107 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
108     return
109         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
110             append(s1).
111                 append(s2);
112 }
113 
114 //========================================
115 // Reference Counting functions, put at top of file so that optimizing compilers
116 //                               have a chance to automatically inline.
117 //========================================
118 
119 void
addRef()120 UnicodeString::addRef()
121 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
122 
123 int32_t
removeRef()124 UnicodeString::removeRef()
125 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
126 
127 int32_t
refCount() const128 UnicodeString::refCount() const
129 {
130     umtx_lock(NULL);
131     // Note: without the lock to force a memory barrier, we might see a very
132     //       stale value on some multi-processor systems.
133     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
134     umtx_unlock(NULL);
135     return count;
136  }
137 
138 void
releaseArray()139 UnicodeString::releaseArray() {
140   if((fFlags & kRefCounted) && removeRef() == 0) {
141     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
142   }
143 }
144 
145 
146 
147 //========================================
148 // Constructors
149 //========================================
UnicodeString()150 UnicodeString::UnicodeString()
151   : fShortLength(0),
152     fFlags(kShortString)
153 {}
154 
UnicodeString(int32_t capacity,UChar32 c,int32_t count)155 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
156   : fShortLength(0),
157     fFlags(0)
158 {
159   if(count <= 0 || (uint32_t)c > 0x10ffff) {
160     // just allocate and do not do anything else
161     allocate(capacity);
162   } else {
163     // count > 0, allocate and fill the new string with count c's
164     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
165     if(capacity < length) {
166       capacity = length;
167     }
168     if(allocate(capacity)) {
169       UChar *array = getArrayStart();
170       int32_t i = 0;
171 
172       // fill the new string with c
173       if(unitCount == 1) {
174         // fill with length UChars
175         while(i < length) {
176           array[i++] = (UChar)c;
177         }
178       } else {
179         // get the code units for c
180         UChar units[UTF_MAX_CHAR_LENGTH];
181         UTF_APPEND_CHAR_UNSAFE(units, i, c);
182 
183         // now it must be i==unitCount
184         i = 0;
185 
186         // for Unicode, unitCount can only be 1, 2, 3, or 4
187         // 1 is handled above
188         while(i < length) {
189           int32_t unitIdx = 0;
190           while(unitIdx < unitCount) {
191             array[i++]=units[unitIdx++];
192           }
193         }
194       }
195     }
196     setLength(length);
197   }
198 }
199 
UnicodeString(UChar ch)200 UnicodeString::UnicodeString(UChar ch)
201   : fShortLength(1),
202     fFlags(kShortString)
203 {
204   fUnion.fStackBuffer[0] = ch;
205 }
206 
UnicodeString(UChar32 ch)207 UnicodeString::UnicodeString(UChar32 ch)
208   : fShortLength(0),
209     fFlags(kShortString)
210 {
211   int32_t i = 0;
212   UBool isError = FALSE;
213   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
214   fShortLength = (int8_t)i;
215 }
216 
UnicodeString(const UChar * text)217 UnicodeString::UnicodeString(const UChar *text)
218   : fShortLength(0),
219     fFlags(kShortString)
220 {
221   doReplace(0, 0, text, 0, -1);
222 }
223 
UnicodeString(const UChar * text,int32_t textLength)224 UnicodeString::UnicodeString(const UChar *text,
225                              int32_t textLength)
226   : fShortLength(0),
227     fFlags(kShortString)
228 {
229   doReplace(0, 0, text, 0, textLength);
230 }
231 
UnicodeString(UBool isTerminated,const UChar * text,int32_t textLength)232 UnicodeString::UnicodeString(UBool isTerminated,
233                              const UChar *text,
234                              int32_t textLength)
235   : fShortLength(0),
236     fFlags(kReadonlyAlias)
237 {
238   if(text == NULL) {
239     // treat as an empty string, do not alias
240     setToEmpty();
241   } else if(textLength < -1 ||
242             (textLength == -1 && !isTerminated) ||
243             (textLength >= 0 && isTerminated && text[textLength] != 0)
244   ) {
245     setToBogus();
246   } else {
247     if(textLength == -1) {
248       // text is terminated, or else it would have failed the above test
249       textLength = u_strlen(text);
250     }
251     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
252   }
253 }
254 
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)255 UnicodeString::UnicodeString(UChar *buff,
256                              int32_t buffLength,
257                              int32_t buffCapacity)
258   : fShortLength(0),
259     fFlags(kWritableAlias)
260 {
261   if(buff == NULL) {
262     // treat as an empty string, do not alias
263     setToEmpty();
264   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
265     setToBogus();
266   } else {
267     if(buffLength == -1) {
268       // fLength = u_strlen(buff); but do not look beyond buffCapacity
269       const UChar *p = buff, *limit = buff + buffCapacity;
270       while(p != limit && *p != 0) {
271         ++p;
272       }
273       buffLength = (int32_t)(p - buff);
274     }
275     setArray(buff, buffLength, buffCapacity);
276   }
277 }
278 
UnicodeString(const char * src,int32_t length,EInvariant)279 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
280   : fShortLength(0),
281     fFlags(kShortString)
282 {
283   if(src==NULL) {
284     // treat as an empty string
285   } else {
286     if(length<0) {
287       length=(int32_t)uprv_strlen(src);
288     }
289     if(cloneArrayIfNeeded(length, length, FALSE)) {
290       u_charsToUChars(src, getArrayStart(), length);
291       setLength(length);
292     } else {
293       setToBogus();
294     }
295   }
296 }
297 
298 #if U_CHARSET_IS_UTF8
299 
UnicodeString(const char * codepageData)300 UnicodeString::UnicodeString(const char *codepageData)
301   : fShortLength(0),
302     fFlags(kShortString) {
303   if(codepageData != 0) {
304     setToUTF8(codepageData);
305   }
306 }
307 
UnicodeString(const char * codepageData,int32_t dataLength)308 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
309   : fShortLength(0),
310     fFlags(kShortString) {
311   // if there's nothing to convert, do nothing
312   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
313     return;
314   }
315   if(dataLength == -1) {
316     dataLength = (int32_t)uprv_strlen(codepageData);
317   }
318   setToUTF8(StringPiece(codepageData, dataLength));
319 }
320 
321 // else see unistr_cnv.cpp
322 #endif
323 
UnicodeString(const UnicodeString & that)324 UnicodeString::UnicodeString(const UnicodeString& that)
325   : Replaceable(),
326     fShortLength(0),
327     fFlags(kShortString)
328 {
329   copyFrom(that);
330 }
331 
UnicodeString(const UnicodeString & that,int32_t srcStart)332 UnicodeString::UnicodeString(const UnicodeString& that,
333                              int32_t srcStart)
334   : Replaceable(),
335     fShortLength(0),
336     fFlags(kShortString)
337 {
338   setTo(that, srcStart);
339 }
340 
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)341 UnicodeString::UnicodeString(const UnicodeString& that,
342                              int32_t srcStart,
343                              int32_t srcLength)
344   : Replaceable(),
345     fShortLength(0),
346     fFlags(kShortString)
347 {
348   setTo(that, srcStart, srcLength);
349 }
350 
351 // Replaceable base class clone() default implementation, does not clone
352 Replaceable *
clone() const353 Replaceable::clone() const {
354   return NULL;
355 }
356 
357 // UnicodeString overrides clone() with a real implementation
358 Replaceable *
clone() const359 UnicodeString::clone() const {
360   return new UnicodeString(*this);
361 }
362 
363 //========================================
364 // array allocation
365 //========================================
366 
367 UBool
allocate(int32_t capacity)368 UnicodeString::allocate(int32_t capacity) {
369   if(capacity <= US_STACKBUF_SIZE) {
370     fFlags = kShortString;
371   } else {
372     // count bytes for the refCounter and the string capacity, and
373     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
374     // to be safely aligned for the refCount
375     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
376     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
377     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
378     if(array != 0) {
379       // set initial refCount and point behind the refCount
380       *array++ = 1;
381 
382       // have fArray point to the first UChar
383       fUnion.fFields.fArray = (UChar *)array;
384       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
385       fFlags = kLongString;
386     } else {
387       fShortLength = 0;
388       fUnion.fFields.fArray = 0;
389       fUnion.fFields.fCapacity = 0;
390       fFlags = kIsBogus;
391       return FALSE;
392     }
393   }
394   return TRUE;
395 }
396 
397 //========================================
398 // Destructor
399 //========================================
~UnicodeString()400 UnicodeString::~UnicodeString()
401 {
402   releaseArray();
403 }
404 
405 //========================================
406 // Factory methods
407 //========================================
408 
fromUTF8(const StringPiece & utf8)409 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
410   UnicodeString result;
411   result.setToUTF8(utf8);
412   return result;
413 }
414 
fromUTF32(const UChar32 * utf32,int32_t length)415 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
416   UnicodeString result;
417   int32_t capacity;
418   // Most UTF-32 strings will be BMP-only and result in a same-length
419   // UTF-16 string. We overestimate the capacity just slightly,
420   // just in case there are a few supplementary characters.
421   if(length <= US_STACKBUF_SIZE) {
422     capacity = US_STACKBUF_SIZE;
423   } else {
424     capacity = length + (length >> 4) + 4;
425   }
426   do {
427     UChar *utf16 = result.getBuffer(capacity);
428     int32_t length16;
429     UErrorCode errorCode = U_ZERO_ERROR;
430     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
431         utf32, length,
432         0xfffd,  // Substitution character.
433         NULL,    // Don't care about number of substitutions.
434         &errorCode);
435     result.releaseBuffer(length16);
436     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
437       capacity = length16 + 1;  // +1 for the terminating NUL.
438       continue;
439     } else if(U_FAILURE(errorCode)) {
440       result.setToBogus();
441     }
442     break;
443   } while(TRUE);
444   return result;
445 }
446 
447 //========================================
448 // Assignment
449 //========================================
450 
451 UnicodeString &
operator =(const UnicodeString & src)452 UnicodeString::operator=(const UnicodeString &src) {
453   return copyFrom(src);
454 }
455 
456 UnicodeString &
fastCopyFrom(const UnicodeString & src)457 UnicodeString::fastCopyFrom(const UnicodeString &src) {
458   return copyFrom(src, TRUE);
459 }
460 
461 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)462 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
463   // if assigning to ourselves, do nothing
464   if(this == 0 || this == &src) {
465     return *this;
466   }
467 
468   // is the right side bogus?
469   if(&src == 0 || src.isBogus()) {
470     setToBogus();
471     return *this;
472   }
473 
474   // delete the current contents
475   releaseArray();
476 
477   if(src.isEmpty()) {
478     // empty string - use the stack buffer
479     setToEmpty();
480     return *this;
481   }
482 
483   // we always copy the length
484   int32_t srcLength = src.length();
485   setLength(srcLength);
486 
487   // fLength>0 and not an "open" src.getBuffer(minCapacity)
488   switch(src.fFlags) {
489   case kShortString:
490     // short string using the stack buffer, do the same
491     fFlags = kShortString;
492     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
493     break;
494   case kLongString:
495     // src uses a refCounted string buffer, use that buffer with refCount
496     // src is const, use a cast - we don't really change it
497     ((UnicodeString &)src).addRef();
498     // copy all fields, share the reference-counted buffer
499     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
500     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
501     fFlags = src.fFlags;
502     break;
503   case kReadonlyAlias:
504     if(fastCopy) {
505       // src is a readonly alias, do the same
506       // -> maintain the readonly alias as such
507       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
508       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
509       fFlags = src.fFlags;
510       break;
511     }
512     // else if(!fastCopy) fall through to case kWritableAlias
513     // -> allocate a new buffer and copy the contents
514   case kWritableAlias:
515     // src is a writable alias; we make a copy of that instead
516     if(allocate(srcLength)) {
517       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
518       break;
519     }
520     // if there is not enough memory, then fall through to setting to bogus
521   default:
522     // if src is bogus, set ourselves to bogus
523     // do not call setToBogus() here because fArray and fFlags are not consistent here
524     fShortLength = 0;
525     fUnion.fFields.fArray = 0;
526     fUnion.fFields.fCapacity = 0;
527     fFlags = kIsBogus;
528     break;
529   }
530 
531   return *this;
532 }
533 
534 //========================================
535 // Miscellaneous operations
536 //========================================
537 
unescape() const538 UnicodeString UnicodeString::unescape() const {
539     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
540     const UChar *array = getBuffer();
541     int32_t len = length();
542     int32_t prev = 0;
543     for (int32_t i=0;;) {
544         if (i == len) {
545             result.append(array, prev, len - prev);
546             break;
547         }
548         if (array[i++] == 0x5C /*'\\'*/) {
549             result.append(array, prev, (i - 1) - prev);
550             UChar32 c = unescapeAt(i); // advances i
551             if (c < 0) {
552                 result.remove(); // return empty string
553                 break; // invalid escape sequence
554             }
555             result.append(c);
556             prev = i;
557         }
558     }
559     return result;
560 }
561 
unescapeAt(int32_t & offset) const562 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
563     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
564 }
565 
566 //========================================
567 // Read-only implementation
568 //========================================
569 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const570 UnicodeString::doCompare( int32_t start,
571               int32_t length,
572               const UChar *srcChars,
573               int32_t srcStart,
574               int32_t srcLength) const
575 {
576   // compare illegal string values
577   // treat const UChar *srcChars==NULL as an empty string
578   if(isBogus()) {
579     return -1;
580   }
581 
582   // pin indices to legal values
583   pinIndices(start, length);
584 
585   if(srcChars == NULL) {
586     srcStart = srcLength = 0;
587   }
588 
589   // get the correct pointer
590   const UChar *chars = getArrayStart();
591 
592   chars += start;
593   srcChars += srcStart;
594 
595   int32_t minLength;
596   int8_t lengthResult;
597 
598   // get the srcLength if necessary
599   if(srcLength < 0) {
600     srcLength = u_strlen(srcChars + srcStart);
601   }
602 
603   // are we comparing different lengths?
604   if(length != srcLength) {
605     if(length < srcLength) {
606       minLength = length;
607       lengthResult = -1;
608     } else {
609       minLength = srcLength;
610       lengthResult = 1;
611     }
612   } else {
613     minLength = length;
614     lengthResult = 0;
615   }
616 
617   /*
618    * note that uprv_memcmp() returns an int but we return an int8_t;
619    * we need to take care not to truncate the result -
620    * one way to do this is to right-shift the value to
621    * move the sign bit into the lower 8 bits and making sure that this
622    * does not become 0 itself
623    */
624 
625   if(minLength > 0 && chars != srcChars) {
626     int32_t result;
627 
628 #   if U_IS_BIG_ENDIAN
629       // big-endian: byte comparison works
630       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
631       if(result != 0) {
632         return (int8_t)(result >> 15 | 1);
633       }
634 #   else
635       // little-endian: compare UChar units
636       do {
637         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
638         if(result != 0) {
639           return (int8_t)(result >> 15 | 1);
640         }
641       } while(--minLength > 0);
642 #   endif
643   }
644   return lengthResult;
645 }
646 
647 /* String compare in code point order - doCompare() compares in code unit order. */
648 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const649 UnicodeString::doCompareCodePointOrder(int32_t start,
650                                        int32_t length,
651                                        const UChar *srcChars,
652                                        int32_t srcStart,
653                                        int32_t srcLength) const
654 {
655   // compare illegal string values
656   // treat const UChar *srcChars==NULL as an empty string
657   if(isBogus()) {
658     return -1;
659   }
660 
661   // pin indices to legal values
662   pinIndices(start, length);
663 
664   if(srcChars == NULL) {
665     srcStart = srcLength = 0;
666   }
667 
668   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
669   /* translate the 32-bit result into an 8-bit one */
670   if(diff!=0) {
671     return (int8_t)(diff >> 15 | 1);
672   } else {
673     return 0;
674   }
675 }
676 
677 int32_t
getLength() const678 UnicodeString::getLength() const {
679     return length();
680 }
681 
682 UChar
getCharAt(int32_t offset) const683 UnicodeString::getCharAt(int32_t offset) const {
684   return charAt(offset);
685 }
686 
687 UChar32
getChar32At(int32_t offset) const688 UnicodeString::getChar32At(int32_t offset) const {
689   return char32At(offset);
690 }
691 
692 int32_t
countChar32(int32_t start,int32_t length) const693 UnicodeString::countChar32(int32_t start, int32_t length) const {
694   pinIndices(start, length);
695   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
696   return u_countChar32(getArrayStart()+start, length);
697 }
698 
699 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const700 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
701   pinIndices(start, length);
702   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
703   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
704 }
705 
706 int32_t
moveIndex32(int32_t index,int32_t delta) const707 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
708   // pin index
709   int32_t len = length();
710   if(index<0) {
711     index=0;
712   } else if(index>len) {
713     index=len;
714   }
715 
716   const UChar *array = getArrayStart();
717   if(delta>0) {
718     UTF_FWD_N(array, index, len, delta);
719   } else {
720     UTF_BACK_N(array, 0, index, -delta);
721   }
722 
723   return index;
724 }
725 
726 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const727 UnicodeString::doExtract(int32_t start,
728              int32_t length,
729              UChar *dst,
730              int32_t dstStart) const
731 {
732   // pin indices to legal values
733   pinIndices(start, length);
734 
735   // do not copy anything if we alias dst itself
736   const UChar *array = getArrayStart();
737   if(array + start != dst + dstStart) {
738     us_arrayCopy(array, start, dst, dstStart, length);
739   }
740 }
741 
742 int32_t
extract(UChar * dest,int32_t destCapacity,UErrorCode & errorCode) const743 UnicodeString::extract(UChar *dest, int32_t destCapacity,
744                        UErrorCode &errorCode) const {
745   int32_t len = length();
746   if(U_SUCCESS(errorCode)) {
747     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
748       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
749     } else {
750       const UChar *array = getArrayStart();
751       if(len>0 && len<=destCapacity && array!=dest) {
752         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
753       }
754       return u_terminateUChars(dest, destCapacity, len, &errorCode);
755     }
756   }
757 
758   return len;
759 }
760 
761 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const762 UnicodeString::extract(int32_t start,
763                        int32_t length,
764                        char *target,
765                        int32_t targetCapacity,
766                        enum EInvariant) const
767 {
768   // if the arguments are illegal, then do nothing
769   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
770     return 0;
771   }
772 
773   // pin the indices to legal values
774   pinIndices(start, length);
775 
776   if(length <= targetCapacity) {
777     u_UCharsToChars(getArrayStart() + start, target, length);
778   }
779   UErrorCode status = U_ZERO_ERROR;
780   return u_terminateChars(target, targetCapacity, length, &status);
781 }
782 
783 UnicodeString
tempSubString(int32_t start,int32_t len) const784 UnicodeString::tempSubString(int32_t start, int32_t len) const {
785   pinIndices(start, len);
786   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
787   if(array==NULL) {
788     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
789     len=-2;  // bogus result string
790   }
791   return UnicodeString(FALSE, array + start, len);
792 }
793 
794 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const795 UnicodeString::toUTF8(int32_t start, int32_t len,
796                       char *target, int32_t capacity) const {
797   pinIndices(start, len);
798   int32_t length8;
799   UErrorCode errorCode = U_ZERO_ERROR;
800   u_strToUTF8WithSub(target, capacity, &length8,
801                      getBuffer() + start, len,
802                      0xFFFD,  // Standard substitution character.
803                      NULL,    // Don't care about number of substitutions.
804                      &errorCode);
805   return length8;
806 }
807 
808 #if U_CHARSET_IS_UTF8
809 
810 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const811 UnicodeString::extract(int32_t start, int32_t len,
812                        char *target, uint32_t dstSize) const {
813   // if the arguments are illegal, then do nothing
814   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
815     return 0;
816   }
817   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
818 }
819 
820 // else see unistr_cnv.cpp
821 #endif
822 
823 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const824 UnicodeString::extractBetween(int32_t start,
825                   int32_t limit,
826                   UnicodeString& target) const {
827   pinIndex(start);
828   pinIndex(limit);
829   doExtract(start, limit - start, target);
830 }
831 
832 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
833 // as many bytes as the source has UChars.
834 // The "worst cases" are writing systems like Indic, Thai and CJK with
835 // 3:1 bytes:UChars.
836 void
toUTF8(ByteSink & sink) const837 UnicodeString::toUTF8(ByteSink &sink) const {
838   int32_t length16 = length();
839   if(length16 != 0) {
840     char stackBuffer[1024];
841     int32_t capacity = (int32_t)sizeof(stackBuffer);
842     UBool utf8IsOwned = FALSE;
843     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
844                                       3*length16,
845                                       stackBuffer, capacity,
846                                       &capacity);
847     int32_t length8 = 0;
848     UErrorCode errorCode = U_ZERO_ERROR;
849     u_strToUTF8WithSub(utf8, capacity, &length8,
850                        getBuffer(), length16,
851                        0xFFFD,  // Standard substitution character.
852                        NULL,    // Don't care about number of substitutions.
853                        &errorCode);
854     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
855       utf8 = (char *)uprv_malloc(length8);
856       if(utf8 != NULL) {
857         utf8IsOwned = TRUE;
858         errorCode = U_ZERO_ERROR;
859         u_strToUTF8WithSub(utf8, length8, &length8,
860                            getBuffer(), length16,
861                            0xFFFD,  // Standard substitution character.
862                            NULL,    // Don't care about number of substitutions.
863                            &errorCode);
864       } else {
865         errorCode = U_MEMORY_ALLOCATION_ERROR;
866       }
867     }
868     if(U_SUCCESS(errorCode)) {
869       sink.Append(utf8, length8);
870       sink.Flush();
871     }
872     if(utf8IsOwned) {
873       uprv_free(utf8);
874     }
875   }
876 }
877 
878 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const879 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
880   int32_t length32=0;
881   if(U_SUCCESS(errorCode)) {
882     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
883     u_strToUTF32WithSub(utf32, capacity, &length32,
884         getBuffer(), length(),
885         0xfffd,  // Substitution character.
886         NULL,    // Don't care about number of substitutions.
887         &errorCode);
888   }
889   return length32;
890 }
891 
892 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const893 UnicodeString::indexOf(const UChar *srcChars,
894                int32_t srcStart,
895                int32_t srcLength,
896                int32_t start,
897                int32_t length) const
898 {
899   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
900     return -1;
901   }
902 
903   // UnicodeString does not find empty substrings
904   if(srcLength < 0 && srcChars[srcStart] == 0) {
905     return -1;
906   }
907 
908   // get the indices within bounds
909   pinIndices(start, length);
910 
911   // find the first occurrence of the substring
912   const UChar *array = getArrayStart();
913   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
914   if(match == NULL) {
915     return -1;
916   } else {
917     return (int32_t)(match - array);
918   }
919 }
920 
921 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const922 UnicodeString::doIndexOf(UChar c,
923              int32_t start,
924              int32_t length) const
925 {
926   // pin indices
927   pinIndices(start, length);
928 
929   // find the first occurrence of c
930   const UChar *array = getArrayStart();
931   const UChar *match = u_memchr(array + start, c, length);
932   if(match == NULL) {
933     return -1;
934   } else {
935     return (int32_t)(match - array);
936   }
937 }
938 
939 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const940 UnicodeString::doIndexOf(UChar32 c,
941                          int32_t start,
942                          int32_t length) const {
943   // pin indices
944   pinIndices(start, length);
945 
946   // find the first occurrence of c
947   const UChar *array = getArrayStart();
948   const UChar *match = u_memchr32(array + start, c, length);
949   if(match == NULL) {
950     return -1;
951   } else {
952     return (int32_t)(match - array);
953   }
954 }
955 
956 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const957 UnicodeString::lastIndexOf(const UChar *srcChars,
958                int32_t srcStart,
959                int32_t srcLength,
960                int32_t start,
961                int32_t length) const
962 {
963   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
964     return -1;
965   }
966 
967   // UnicodeString does not find empty substrings
968   if(srcLength < 0 && srcChars[srcStart] == 0) {
969     return -1;
970   }
971 
972   // get the indices within bounds
973   pinIndices(start, length);
974 
975   // find the last occurrence of the substring
976   const UChar *array = getArrayStart();
977   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
978   if(match == NULL) {
979     return -1;
980   } else {
981     return (int32_t)(match - array);
982   }
983 }
984 
985 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const986 UnicodeString::doLastIndexOf(UChar c,
987                  int32_t start,
988                  int32_t length) const
989 {
990   if(isBogus()) {
991     return -1;
992   }
993 
994   // pin indices
995   pinIndices(start, length);
996 
997   // find the last occurrence of c
998   const UChar *array = getArrayStart();
999   const UChar *match = u_memrchr(array + start, c, length);
1000   if(match == NULL) {
1001     return -1;
1002   } else {
1003     return (int32_t)(match - array);
1004   }
1005 }
1006 
1007 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1008 UnicodeString::doLastIndexOf(UChar32 c,
1009                              int32_t start,
1010                              int32_t length) const {
1011   // pin indices
1012   pinIndices(start, length);
1013 
1014   // find the last occurrence of c
1015   const UChar *array = getArrayStart();
1016   const UChar *match = u_memrchr32(array + start, c, length);
1017   if(match == NULL) {
1018     return -1;
1019   } else {
1020     return (int32_t)(match - array);
1021   }
1022 }
1023 
1024 //========================================
1025 // Write implementation
1026 //========================================
1027 
1028 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1029 UnicodeString::findAndReplace(int32_t start,
1030                   int32_t length,
1031                   const UnicodeString& oldText,
1032                   int32_t oldStart,
1033                   int32_t oldLength,
1034                   const UnicodeString& newText,
1035                   int32_t newStart,
1036                   int32_t newLength)
1037 {
1038   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1039     return *this;
1040   }
1041 
1042   pinIndices(start, length);
1043   oldText.pinIndices(oldStart, oldLength);
1044   newText.pinIndices(newStart, newLength);
1045 
1046   if(oldLength == 0) {
1047     return *this;
1048   }
1049 
1050   while(length > 0 && length >= oldLength) {
1051     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1052     if(pos < 0) {
1053       // no more oldText's here: done
1054       break;
1055     } else {
1056       // we found oldText, replace it by newText and go beyond it
1057       replace(pos, oldLength, newText, newStart, newLength);
1058       length -= pos + oldLength - start;
1059       start = pos + newLength;
1060     }
1061   }
1062 
1063   return *this;
1064 }
1065 
1066 
1067 void
setToBogus()1068 UnicodeString::setToBogus()
1069 {
1070   releaseArray();
1071 
1072   fShortLength = 0;
1073   fUnion.fFields.fArray = 0;
1074   fUnion.fFields.fCapacity = 0;
1075   fFlags = kIsBogus;
1076 }
1077 
1078 // turn a bogus string into an empty one
1079 void
unBogus()1080 UnicodeString::unBogus() {
1081   if(fFlags & kIsBogus) {
1082     setToEmpty();
1083   }
1084 }
1085 
1086 // setTo() analogous to the readonly-aliasing constructor with the same signature
1087 UnicodeString &
setTo(UBool isTerminated,const UChar * text,int32_t textLength)1088 UnicodeString::setTo(UBool isTerminated,
1089                      const UChar *text,
1090                      int32_t textLength)
1091 {
1092   if(fFlags & kOpenGetBuffer) {
1093     // do not modify a string that has an "open" getBuffer(minCapacity)
1094     return *this;
1095   }
1096 
1097   if(text == NULL) {
1098     // treat as an empty string, do not alias
1099     releaseArray();
1100     setToEmpty();
1101     return *this;
1102   }
1103 
1104   if( textLength < -1 ||
1105       (textLength == -1 && !isTerminated) ||
1106       (textLength >= 0 && isTerminated && text[textLength] != 0)
1107   ) {
1108     setToBogus();
1109     return *this;
1110   }
1111 
1112   releaseArray();
1113 
1114   if(textLength == -1) {
1115     // text is terminated, or else it would have failed the above test
1116     textLength = u_strlen(text);
1117   }
1118   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1119 
1120   fFlags = kReadonlyAlias;
1121   return *this;
1122 }
1123 
1124 // setTo() analogous to the writable-aliasing constructor with the same signature
1125 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1126 UnicodeString::setTo(UChar *buffer,
1127                      int32_t buffLength,
1128                      int32_t buffCapacity) {
1129   if(fFlags & kOpenGetBuffer) {
1130     // do not modify a string that has an "open" getBuffer(minCapacity)
1131     return *this;
1132   }
1133 
1134   if(buffer == NULL) {
1135     // treat as an empty string, do not alias
1136     releaseArray();
1137     setToEmpty();
1138     return *this;
1139   }
1140 
1141   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1142     setToBogus();
1143     return *this;
1144   } else if(buffLength == -1) {
1145     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1146     const UChar *p = buffer, *limit = buffer + buffCapacity;
1147     while(p != limit && *p != 0) {
1148       ++p;
1149     }
1150     buffLength = (int32_t)(p - buffer);
1151   }
1152 
1153   releaseArray();
1154 
1155   setArray(buffer, buffLength, buffCapacity);
1156   fFlags = kWritableAlias;
1157   return *this;
1158 }
1159 
setToUTF8(const StringPiece & utf8)1160 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1161   unBogus();
1162   int32_t length = utf8.length();
1163   int32_t capacity;
1164   // The UTF-16 string will be at most as long as the UTF-8 string.
1165   if(length <= US_STACKBUF_SIZE) {
1166     capacity = US_STACKBUF_SIZE;
1167   } else {
1168     capacity = length + 1;  // +1 for the terminating NUL.
1169   }
1170   UChar *utf16 = getBuffer(capacity);
1171   int32_t length16;
1172   UErrorCode errorCode = U_ZERO_ERROR;
1173   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1174       utf8.data(), length,
1175       0xfffd,  // Substitution character.
1176       NULL,    // Don't care about number of substitutions.
1177       &errorCode);
1178   releaseBuffer(length16);
1179   if(U_FAILURE(errorCode)) {
1180     setToBogus();
1181   }
1182   return *this;
1183 }
1184 
1185 UnicodeString&
setCharAt(int32_t offset,UChar c)1186 UnicodeString::setCharAt(int32_t offset,
1187              UChar c)
1188 {
1189   int32_t len = length();
1190   if(cloneArrayIfNeeded() && len > 0) {
1191     if(offset < 0) {
1192       offset = 0;
1193     } else if(offset >= len) {
1194       offset = len - 1;
1195     }
1196 
1197     getArrayStart()[offset] = c;
1198   }
1199   return *this;
1200 }
1201 
1202 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1203 UnicodeString::doReplace( int32_t start,
1204               int32_t length,
1205               const UnicodeString& src,
1206               int32_t srcStart,
1207               int32_t srcLength)
1208 {
1209   if(!src.isBogus()) {
1210     // pin the indices to legal values
1211     src.pinIndices(srcStart, srcLength);
1212 
1213     // get the characters from src
1214     // and replace the range in ourselves with them
1215     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1216   } else {
1217     // remove the range
1218     return doReplace(start, length, 0, 0, 0);
1219   }
1220 }
1221 
1222 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1223 UnicodeString::doReplace(int32_t start,
1224              int32_t length,
1225              const UChar *srcChars,
1226              int32_t srcStart,
1227              int32_t srcLength)
1228 {
1229   if(!isWritable()) {
1230     return *this;
1231   }
1232 
1233   int32_t oldLength = this->length();
1234 
1235   // optimize (read-only alias).remove(0, start) and .remove(start, end)
1236   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1237     if(start == 0) {
1238       // remove prefix by adjusting the array pointer
1239       pinIndex(length);
1240       fUnion.fFields.fArray += length;
1241       fUnion.fFields.fCapacity -= length;
1242       setLength(oldLength - length);
1243       return *this;
1244     } else {
1245       pinIndex(start);
1246       if(length >= (oldLength - start)) {
1247         // remove suffix by reducing the length (like truncate())
1248         setLength(start);
1249         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1250         return *this;
1251       }
1252     }
1253   }
1254 
1255   if(srcChars == 0) {
1256     srcStart = srcLength = 0;
1257   } else if(srcLength < 0) {
1258     // get the srcLength if necessary
1259     srcLength = u_strlen(srcChars + srcStart);
1260   }
1261 
1262   // calculate the size of the string after the replace
1263   int32_t newSize;
1264 
1265   // optimize append() onto a large-enough, owned string
1266   if(start >= oldLength) {
1267     newSize = oldLength + srcLength;
1268     if(newSize <= getCapacity() && isBufferWritable()) {
1269       us_arrayCopy(srcChars, srcStart, getArrayStart(), oldLength, srcLength);
1270       setLength(newSize);
1271       return *this;
1272     } else {
1273       // pin the indices to legal values
1274       start = oldLength;
1275       length = 0;
1276     }
1277   } else {
1278     // pin the indices to legal values
1279     pinIndices(start, length);
1280 
1281     newSize = oldLength - length + srcLength;
1282   }
1283 
1284   // the following may change fArray but will not copy the current contents;
1285   // therefore we need to keep the current fArray
1286   UChar oldStackBuffer[US_STACKBUF_SIZE];
1287   UChar *oldArray;
1288   if((fFlags&kUsingStackBuffer) && (newSize > US_STACKBUF_SIZE)) {
1289     // copy the stack buffer contents because it will be overwritten with
1290     // fUnion.fFields values
1291     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1292     oldArray = oldStackBuffer;
1293   } else {
1294     oldArray = getArrayStart();
1295   }
1296 
1297   // clone our array and allocate a bigger array if needed
1298   int32_t *bufferToDelete = 0;
1299   if(!cloneArrayIfNeeded(newSize, newSize + (newSize >> 2) + kGrowSize,
1300                          FALSE, &bufferToDelete)
1301   ) {
1302     return *this;
1303   }
1304 
1305   // now do the replace
1306 
1307   UChar *newArray = getArrayStart();
1308   if(newArray != oldArray) {
1309     // if fArray changed, then we need to copy everything except what will change
1310     us_arrayCopy(oldArray, 0, newArray, 0, start);
1311     us_arrayCopy(oldArray, start + length,
1312                  newArray, start + srcLength,
1313                  oldLength - (start + length));
1314   } else if(length != srcLength) {
1315     // fArray did not change; copy only the portion that isn't changing, leaving a hole
1316     us_arrayCopy(oldArray, start + length,
1317                  newArray, start + srcLength,
1318                  oldLength - (start + length));
1319   }
1320 
1321   // now fill in the hole with the new string
1322   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1323 
1324   setLength(newSize);
1325 
1326   // delayed delete in case srcChars == fArray when we started, and
1327   // to keep oldArray alive for the above operations
1328   if (bufferToDelete) {
1329     uprv_free(bufferToDelete);
1330   }
1331 
1332   return *this;
1333 }
1334 
1335 /**
1336  * Replaceable API
1337  */
1338 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1339 UnicodeString::handleReplaceBetween(int32_t start,
1340                                     int32_t limit,
1341                                     const UnicodeString& text) {
1342     replaceBetween(start, limit, text);
1343 }
1344 
1345 /**
1346  * Replaceable API
1347  */
1348 void
copy(int32_t start,int32_t limit,int32_t dest)1349 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1350     if (limit <= start) {
1351         return; // Nothing to do; avoid bogus malloc call
1352     }
1353     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1354     // Check to make sure text is not null.
1355     if (text != NULL) {
1356 	    extractBetween(start, limit, text, 0);
1357 	    insert(dest, text, 0, limit - start);
1358 	    uprv_free(text);
1359     }
1360 }
1361 
1362 /**
1363  * Replaceable API
1364  *
1365  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1366  * so we implement this function here.
1367  */
hasMetaData() const1368 UBool Replaceable::hasMetaData() const {
1369     return TRUE;
1370 }
1371 
1372 /**
1373  * Replaceable API
1374  */
hasMetaData() const1375 UBool UnicodeString::hasMetaData() const {
1376     return FALSE;
1377 }
1378 
1379 UnicodeString&
doReverse(int32_t start,int32_t length)1380 UnicodeString::doReverse(int32_t start, int32_t length) {
1381   if(length <= 1 || !cloneArrayIfNeeded()) {
1382     return *this;
1383   }
1384 
1385   // pin the indices to legal values
1386   pinIndices(start, length);
1387   if(length <= 1) {  // pinIndices() might have shrunk the length
1388     return *this;
1389   }
1390 
1391   UChar *left = getArrayStart() + start;
1392   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1393   UChar swap;
1394   UBool hasSupplementary = FALSE;
1395 
1396   // Before the loop we know left<right because length>=2.
1397   do {
1398     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1399     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1400     *right-- = swap;
1401   } while(left < right);
1402   // Make sure to test the middle code unit of an odd-length string.
1403   // Redundant if the length is even.
1404   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1405 
1406   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1407   if(hasSupplementary) {
1408     UChar swap2;
1409 
1410     left = getArrayStart() + start;
1411     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1412     while(left < right) {
1413       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1414         *left++ = swap2;
1415         *left++ = swap;
1416       } else {
1417         ++left;
1418       }
1419     }
1420   }
1421 
1422   return *this;
1423 }
1424 
1425 UBool
padLeading(int32_t targetLength,UChar padChar)1426 UnicodeString::padLeading(int32_t targetLength,
1427                           UChar padChar)
1428 {
1429   int32_t oldLength = length();
1430   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1431     return FALSE;
1432   } else {
1433     // move contents up by padding width
1434     UChar *array = getArrayStart();
1435     int32_t start = targetLength - oldLength;
1436     us_arrayCopy(array, 0, array, start, oldLength);
1437 
1438     // fill in padding character
1439     while(--start >= 0) {
1440       array[start] = padChar;
1441     }
1442     setLength(targetLength);
1443     return TRUE;
1444   }
1445 }
1446 
1447 UBool
padTrailing(int32_t targetLength,UChar padChar)1448 UnicodeString::padTrailing(int32_t targetLength,
1449                            UChar padChar)
1450 {
1451   int32_t oldLength = length();
1452   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1453     return FALSE;
1454   } else {
1455     // fill in padding character
1456     UChar *array = getArrayStart();
1457     int32_t length = targetLength;
1458     while(--length >= oldLength) {
1459       array[length] = padChar;
1460     }
1461     setLength(targetLength);
1462     return TRUE;
1463   }
1464 }
1465 
1466 //========================================
1467 // Hashing
1468 //========================================
1469 int32_t
doHashCode() const1470 UnicodeString::doHashCode() const
1471 {
1472     /* Delegate hash computation to uhash.  This makes UnicodeString
1473      * hashing consistent with UChar* hashing.  */
1474     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1475     if (hashCode == kInvalidHashCode) {
1476         hashCode = kEmptyHashCode;
1477     }
1478     return hashCode;
1479 }
1480 
1481 //========================================
1482 // External Buffer
1483 //========================================
1484 
1485 UChar *
getBuffer(int32_t minCapacity)1486 UnicodeString::getBuffer(int32_t minCapacity) {
1487   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1488     fFlags|=kOpenGetBuffer;
1489     fShortLength=0;
1490     return getArrayStart();
1491   } else {
1492     return 0;
1493   }
1494 }
1495 
1496 void
releaseBuffer(int32_t newLength)1497 UnicodeString::releaseBuffer(int32_t newLength) {
1498   if(fFlags&kOpenGetBuffer && newLength>=-1) {
1499     // set the new fLength
1500     int32_t capacity=getCapacity();
1501     if(newLength==-1) {
1502       // the new length is the string length, capped by fCapacity
1503       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1504       while(p<limit && *p!=0) {
1505         ++p;
1506       }
1507       newLength=(int32_t)(p-array);
1508     } else if(newLength>capacity) {
1509       newLength=capacity;
1510     }
1511     setLength(newLength);
1512     fFlags&=~kOpenGetBuffer;
1513   }
1514 }
1515 
1516 //========================================
1517 // Miscellaneous
1518 //========================================
1519 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1520 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1521                                   int32_t growCapacity,
1522                                   UBool doCopyArray,
1523                                   int32_t **pBufferToDelete,
1524                                   UBool forceClone) {
1525   // default parameters need to be static, therefore
1526   // the defaults are -1 to have convenience defaults
1527   if(newCapacity == -1) {
1528     newCapacity = getCapacity();
1529   }
1530 
1531   // while a getBuffer(minCapacity) is "open",
1532   // prevent any modifications of the string by returning FALSE here
1533   // if the string is bogus, then only an assignment or similar can revive it
1534   if(!isWritable()) {
1535     return FALSE;
1536   }
1537 
1538   /*
1539    * We need to make a copy of the array if
1540    * the buffer is read-only, or
1541    * the buffer is refCounted (shared), and refCount>1, or
1542    * the buffer is too small.
1543    * Return FALSE if memory could not be allocated.
1544    */
1545   if(forceClone ||
1546      fFlags & kBufferIsReadonly ||
1547      (fFlags & kRefCounted && refCount() > 1) ||
1548      newCapacity > getCapacity()
1549   ) {
1550     // check growCapacity for default value and use of the stack buffer
1551     if(growCapacity == -1) {
1552       growCapacity = newCapacity;
1553     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1554       growCapacity = US_STACKBUF_SIZE;
1555     }
1556 
1557     // save old values
1558     UChar oldStackBuffer[US_STACKBUF_SIZE];
1559     UChar *oldArray;
1560     uint8_t flags = fFlags;
1561 
1562     if(flags&kUsingStackBuffer) {
1563       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1564         // copy the stack buffer contents because it will be overwritten with
1565         // fUnion.fFields values
1566         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1567         oldArray = oldStackBuffer;
1568       } else {
1569         oldArray = 0; // no need to copy from stack buffer to itself
1570       }
1571     } else {
1572       oldArray = fUnion.fFields.fArray;
1573     }
1574 
1575     // allocate a new array
1576     if(allocate(growCapacity) ||
1577        (newCapacity < growCapacity && allocate(newCapacity))
1578     ) {
1579       if(doCopyArray && oldArray != 0) {
1580         // copy the contents
1581         // do not copy more than what fits - it may be smaller than before
1582         int32_t minLength = length();
1583         newCapacity = getCapacity();
1584         if(newCapacity < minLength) {
1585           minLength = newCapacity;
1586           setLength(minLength);
1587         }
1588         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1589       } else {
1590         fShortLength = 0;
1591       }
1592 
1593       // release the old array
1594       if(flags & kRefCounted) {
1595         // the array is refCounted; decrement and release if 0
1596         int32_t *pRefCount = ((int32_t *)oldArray - 1);
1597         if(umtx_atomic_dec(pRefCount) == 0) {
1598           if(pBufferToDelete == 0) {
1599             uprv_free(pRefCount);
1600           } else {
1601             // the caller requested to delete it himself
1602             *pBufferToDelete = pRefCount;
1603           }
1604         }
1605       }
1606     } else {
1607       // not enough memory for growCapacity and not even for the smaller newCapacity
1608       // reset the old values for setToBogus() to release the array
1609       if(!(flags&kUsingStackBuffer)) {
1610         fUnion.fFields.fArray = oldArray;
1611       }
1612       fFlags = flags;
1613       setToBogus();
1614       return FALSE;
1615     }
1616   }
1617   return TRUE;
1618 }
1619 U_NAMESPACE_END
1620 
1621 #ifdef U_STATIC_IMPLEMENTATION
1622 /*
1623 This should never be called. It is defined here to make sure that the
1624 virtual vector deleting destructor is defined within unistr.cpp.
1625 The vector deleting destructor is already a part of UObject,
1626 but defining it here makes sure that it is included with this object file.
1627 This makes sure that static library dependencies are kept to a minimum.
1628 */
uprv_UnicodeStringDummy(void)1629 static void uprv_UnicodeStringDummy(void) {
1630     U_NAMESPACE_USE
1631     delete [] (new UnicodeString[2]);
1632 }
1633 #endif
1634