• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 * Copyright (C) 1999-2011, International Business Machines Corporation and   *
4 * others. All Rights Reserved.                                               *
5 ******************************************************************************
6 *
7 * File unistr.cpp
8 *
9 * Modification History:
10 *
11 *   Date        Name        Description
12 *   09/25/98    stephen     Creation.
13 *   04/20/99    stephen     Overhauled per 4/16 code review.
14 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
15 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
16 *                           Replaceable.
17 *   06/25/01    grhoten     Removed the dependency on iostream
18 ******************************************************************************
19 */
20 
21 #include "unicode/utypes.h"
22 #include "unicode/appendable.h"
23 #include "unicode/putil.h"
24 #include "cstring.h"
25 #include "cmemory.h"
26 #include "unicode/ustring.h"
27 #include "unicode/unistr.h"
28 #include "uhash.h"
29 #include "ustr_imp.h"
30 #include "umutex.h"
31 
32 #if 0
33 
34 #if U_IOSTREAM_SOURCE >= 199711
35 #include <iostream>
36 using namespace std;
37 #elif U_IOSTREAM_SOURCE >= 198506
38 #include <iostream.h>
39 #endif
40 
41 //DEBUGGING
42 void
43 print(const UnicodeString& s,
44       const char *name)
45 {
46   UChar c;
47   cout << name << ":|";
48   for(int i = 0; i < s.length(); ++i) {
49     c = s[i];
50     if(c>= 0x007E || c < 0x0020)
51       cout << "[0x" << hex << s[i] << "]";
52     else
53       cout << (char) s[i];
54   }
55   cout << '|' << endl;
56 }
57 
58 void
59 print(const UChar *s,
60       int32_t len,
61       const char *name)
62 {
63   UChar c;
64   cout << name << ":|";
65   for(int i = 0; i < len; ++i) {
66     c = s[i];
67     if(c>= 0x007E || c < 0x0020)
68       cout << "[0x" << hex << s[i] << "]";
69     else
70       cout << (char) s[i];
71   }
72   cout << '|' << endl;
73 }
74 // END DEBUGGING
75 #endif
76 
77 // Local function definitions for now
78 
79 // need to copy areas that may overlap
80 static
81 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)82 us_arrayCopy(const UChar *src, int32_t srcStart,
83          UChar *dst, int32_t dstStart, int32_t count)
84 {
85   if(count>0) {
86     uprv_memmove(dst+dstStart, src+srcStart, (size_t)(count*sizeof(*src)));
87   }
88 }
89 
90 // u_unescapeAt() callback to get a UChar from a UnicodeString
91 U_CDECL_BEGIN
92 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)93 UnicodeString_charAt(int32_t offset, void *context) {
94     return ((U_NAMESPACE_QUALIFIER UnicodeString*) context)->charAt(offset);
95 }
96 U_CDECL_END
97 
98 U_NAMESPACE_BEGIN
99 
100 /* The Replaceable virtual destructor can't be defined in the header
101    due to how AIX works with multiple definitions of virtual functions.
102 */
~Replaceable()103 Replaceable::~Replaceable() {}
Replaceable()104 Replaceable::Replaceable() {}
105 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
106 
107 UnicodeString U_EXPORT2
108 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
109     return
110         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
111             append(s1).
112                 append(s2);
113 }
114 
115 //========================================
116 // Reference Counting functions, put at top of file so that optimizing compilers
117 //                               have a chance to automatically inline.
118 //========================================
119 
120 void
addRef()121 UnicodeString::addRef()
122 {  umtx_atomic_inc((int32_t *)fUnion.fFields.fArray - 1);}
123 
124 int32_t
removeRef()125 UnicodeString::removeRef()
126 { return umtx_atomic_dec((int32_t *)fUnion.fFields.fArray - 1);}
127 
128 int32_t
refCount() const129 UnicodeString::refCount() const
130 {
131     umtx_lock(NULL);
132     // Note: without the lock to force a memory barrier, we might see a very
133     //       stale value on some multi-processor systems.
134     int32_t  count = *((int32_t *)fUnion.fFields.fArray - 1);
135     umtx_unlock(NULL);
136     return count;
137  }
138 
139 void
releaseArray()140 UnicodeString::releaseArray() {
141   if((fFlags & kRefCounted) && removeRef() == 0) {
142     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
143   }
144 }
145 
146 
147 
148 //========================================
149 // Constructors
150 //========================================
UnicodeString()151 UnicodeString::UnicodeString()
152   : fShortLength(0),
153     fFlags(kShortString)
154 {}
155 
UnicodeString(int32_t capacity,UChar32 c,int32_t count)156 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count)
157   : fShortLength(0),
158     fFlags(0)
159 {
160   if(count <= 0 || (uint32_t)c > 0x10ffff) {
161     // just allocate and do not do anything else
162     allocate(capacity);
163   } else {
164     // count > 0, allocate and fill the new string with count c's
165     int32_t unitCount = UTF_CHAR_LENGTH(c), length = count * unitCount;
166     if(capacity < length) {
167       capacity = length;
168     }
169     if(allocate(capacity)) {
170       UChar *array = getArrayStart();
171       int32_t i = 0;
172 
173       // fill the new string with c
174       if(unitCount == 1) {
175         // fill with length UChars
176         while(i < length) {
177           array[i++] = (UChar)c;
178         }
179       } else {
180         // get the code units for c
181         UChar units[UTF_MAX_CHAR_LENGTH];
182         UTF_APPEND_CHAR_UNSAFE(units, i, c);
183 
184         // now it must be i==unitCount
185         i = 0;
186 
187         // for Unicode, unitCount can only be 1, 2, 3, or 4
188         // 1 is handled above
189         while(i < length) {
190           int32_t unitIdx = 0;
191           while(unitIdx < unitCount) {
192             array[i++]=units[unitIdx++];
193           }
194         }
195       }
196     }
197     setLength(length);
198   }
199 }
200 
UnicodeString(UChar ch)201 UnicodeString::UnicodeString(UChar ch)
202   : fShortLength(1),
203     fFlags(kShortString)
204 {
205   fUnion.fStackBuffer[0] = ch;
206 }
207 
UnicodeString(UChar32 ch)208 UnicodeString::UnicodeString(UChar32 ch)
209   : fShortLength(0),
210     fFlags(kShortString)
211 {
212   int32_t i = 0;
213   UBool isError = FALSE;
214   U16_APPEND(fUnion.fStackBuffer, i, US_STACKBUF_SIZE, ch, isError);
215   fShortLength = (int8_t)i;
216 }
217 
UnicodeString(const UChar * text)218 UnicodeString::UnicodeString(const UChar *text)
219   : fShortLength(0),
220     fFlags(kShortString)
221 {
222   doReplace(0, 0, text, 0, -1);
223 }
224 
UnicodeString(const UChar * text,int32_t textLength)225 UnicodeString::UnicodeString(const UChar *text,
226                              int32_t textLength)
227   : fShortLength(0),
228     fFlags(kShortString)
229 {
230   doReplace(0, 0, text, 0, textLength);
231 }
232 
UnicodeString(UBool isTerminated,const UChar * text,int32_t textLength)233 UnicodeString::UnicodeString(UBool isTerminated,
234                              const UChar *text,
235                              int32_t textLength)
236   : fShortLength(0),
237     fFlags(kReadonlyAlias)
238 {
239   if(text == NULL) {
240     // treat as an empty string, do not alias
241     setToEmpty();
242   } else if(textLength < -1 ||
243             (textLength == -1 && !isTerminated) ||
244             (textLength >= 0 && isTerminated && text[textLength] != 0)
245   ) {
246     setToBogus();
247   } else {
248     if(textLength == -1) {
249       // text is terminated, or else it would have failed the above test
250       textLength = u_strlen(text);
251     }
252     setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
253   }
254 }
255 
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)256 UnicodeString::UnicodeString(UChar *buff,
257                              int32_t buffLength,
258                              int32_t buffCapacity)
259   : fShortLength(0),
260     fFlags(kWritableAlias)
261 {
262   if(buff == NULL) {
263     // treat as an empty string, do not alias
264     setToEmpty();
265   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
266     setToBogus();
267   } else {
268     if(buffLength == -1) {
269       // fLength = u_strlen(buff); but do not look beyond buffCapacity
270       const UChar *p = buff, *limit = buff + buffCapacity;
271       while(p != limit && *p != 0) {
272         ++p;
273       }
274       buffLength = (int32_t)(p - buff);
275     }
276     setArray(buff, buffLength, buffCapacity);
277   }
278 }
279 
UnicodeString(const char * src,int32_t length,EInvariant)280 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant)
281   : fShortLength(0),
282     fFlags(kShortString)
283 {
284   if(src==NULL) {
285     // treat as an empty string
286   } else {
287     if(length<0) {
288       length=(int32_t)uprv_strlen(src);
289     }
290     if(cloneArrayIfNeeded(length, length, FALSE)) {
291       u_charsToUChars(src, getArrayStart(), length);
292       setLength(length);
293     } else {
294       setToBogus();
295     }
296   }
297 }
298 
299 #if U_CHARSET_IS_UTF8
300 
UnicodeString(const char * codepageData)301 UnicodeString::UnicodeString(const char *codepageData)
302   : fShortLength(0),
303     fFlags(kShortString) {
304   if(codepageData != 0) {
305     setToUTF8(codepageData);
306   }
307 }
308 
UnicodeString(const char * codepageData,int32_t dataLength)309 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength)
310   : fShortLength(0),
311     fFlags(kShortString) {
312   // if there's nothing to convert, do nothing
313   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
314     return;
315   }
316   if(dataLength == -1) {
317     dataLength = (int32_t)uprv_strlen(codepageData);
318   }
319   setToUTF8(StringPiece(codepageData, dataLength));
320 }
321 
322 // else see unistr_cnv.cpp
323 #endif
324 
UnicodeString(const UnicodeString & that)325 UnicodeString::UnicodeString(const UnicodeString& that)
326   : Replaceable(),
327     fShortLength(0),
328     fFlags(kShortString)
329 {
330   copyFrom(that);
331 }
332 
UnicodeString(const UnicodeString & that,int32_t srcStart)333 UnicodeString::UnicodeString(const UnicodeString& that,
334                              int32_t srcStart)
335   : Replaceable(),
336     fShortLength(0),
337     fFlags(kShortString)
338 {
339   setTo(that, srcStart);
340 }
341 
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)342 UnicodeString::UnicodeString(const UnicodeString& that,
343                              int32_t srcStart,
344                              int32_t srcLength)
345   : Replaceable(),
346     fShortLength(0),
347     fFlags(kShortString)
348 {
349   setTo(that, srcStart, srcLength);
350 }
351 
352 // Replaceable base class clone() default implementation, does not clone
353 Replaceable *
clone() const354 Replaceable::clone() const {
355   return NULL;
356 }
357 
358 // UnicodeString overrides clone() with a real implementation
359 Replaceable *
clone() const360 UnicodeString::clone() const {
361   return new UnicodeString(*this);
362 }
363 
364 //========================================
365 // array allocation
366 //========================================
367 
368 UBool
allocate(int32_t capacity)369 UnicodeString::allocate(int32_t capacity) {
370   if(capacity <= US_STACKBUF_SIZE) {
371     fFlags = kShortString;
372   } else {
373     // count bytes for the refCounter and the string capacity, and
374     // round up to a multiple of 16; then divide by 4 and allocate int32_t's
375     // to be safely aligned for the refCount
376     // the +1 is for the NUL terminator, to avoid reallocation in getTerminatedBuffer()
377     int32_t words = (int32_t)(((sizeof(int32_t) + (capacity + 1) * U_SIZEOF_UCHAR + 15) & ~15) >> 2);
378     int32_t *array = (int32_t*) uprv_malloc( sizeof(int32_t) * words );
379     if(array != 0) {
380       // set initial refCount and point behind the refCount
381       *array++ = 1;
382 
383       // have fArray point to the first UChar
384       fUnion.fFields.fArray = (UChar *)array;
385       fUnion.fFields.fCapacity = (int32_t)((words - 1) * (sizeof(int32_t) / U_SIZEOF_UCHAR));
386       fFlags = kLongString;
387     } else {
388       fShortLength = 0;
389       fUnion.fFields.fArray = 0;
390       fUnion.fFields.fCapacity = 0;
391       fFlags = kIsBogus;
392       return FALSE;
393     }
394   }
395   return TRUE;
396 }
397 
398 //========================================
399 // Destructor
400 //========================================
~UnicodeString()401 UnicodeString::~UnicodeString()
402 {
403   releaseArray();
404 }
405 
406 //========================================
407 // Factory methods
408 //========================================
409 
fromUTF8(const StringPiece & utf8)410 UnicodeString UnicodeString::fromUTF8(const StringPiece &utf8) {
411   UnicodeString result;
412   result.setToUTF8(utf8);
413   return result;
414 }
415 
fromUTF32(const UChar32 * utf32,int32_t length)416 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
417   UnicodeString result;
418   int32_t capacity;
419   // Most UTF-32 strings will be BMP-only and result in a same-length
420   // UTF-16 string. We overestimate the capacity just slightly,
421   // just in case there are a few supplementary characters.
422   if(length <= US_STACKBUF_SIZE) {
423     capacity = US_STACKBUF_SIZE;
424   } else {
425     capacity = length + (length >> 4) + 4;
426   }
427   do {
428     UChar *utf16 = result.getBuffer(capacity);
429     int32_t length16;
430     UErrorCode errorCode = U_ZERO_ERROR;
431     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
432         utf32, length,
433         0xfffd,  // Substitution character.
434         NULL,    // Don't care about number of substitutions.
435         &errorCode);
436     result.releaseBuffer(length16);
437     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
438       capacity = length16 + 1;  // +1 for the terminating NUL.
439       continue;
440     } else if(U_FAILURE(errorCode)) {
441       result.setToBogus();
442     }
443     break;
444   } while(TRUE);
445   return result;
446 }
447 
448 //========================================
449 // Assignment
450 //========================================
451 
452 UnicodeString &
operator =(const UnicodeString & src)453 UnicodeString::operator=(const UnicodeString &src) {
454   return copyFrom(src);
455 }
456 
457 UnicodeString &
fastCopyFrom(const UnicodeString & src)458 UnicodeString::fastCopyFrom(const UnicodeString &src) {
459   return copyFrom(src, TRUE);
460 }
461 
462 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)463 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
464   // if assigning to ourselves, do nothing
465   if(this == 0 || this == &src) {
466     return *this;
467   }
468 
469   // is the right side bogus?
470   if(&src == 0 || src.isBogus()) {
471     setToBogus();
472     return *this;
473   }
474 
475   // delete the current contents
476   releaseArray();
477 
478   if(src.isEmpty()) {
479     // empty string - use the stack buffer
480     setToEmpty();
481     return *this;
482   }
483 
484   // we always copy the length
485   int32_t srcLength = src.length();
486   setLength(srcLength);
487 
488   // fLength>0 and not an "open" src.getBuffer(minCapacity)
489   switch(src.fFlags) {
490   case kShortString:
491     // short string using the stack buffer, do the same
492     fFlags = kShortString;
493     uprv_memcpy(fUnion.fStackBuffer, src.fUnion.fStackBuffer, srcLength * U_SIZEOF_UCHAR);
494     break;
495   case kLongString:
496     // src uses a refCounted string buffer, use that buffer with refCount
497     // src is const, use a cast - we don't really change it
498     ((UnicodeString &)src).addRef();
499     // copy all fields, share the reference-counted buffer
500     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
501     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
502     fFlags = src.fFlags;
503     break;
504   case kReadonlyAlias:
505     if(fastCopy) {
506       // src is a readonly alias, do the same
507       // -> maintain the readonly alias as such
508       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
509       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
510       fFlags = src.fFlags;
511       break;
512     }
513     // else if(!fastCopy) fall through to case kWritableAlias
514     // -> allocate a new buffer and copy the contents
515   case kWritableAlias:
516     // src is a writable alias; we make a copy of that instead
517     if(allocate(srcLength)) {
518       uprv_memcpy(getArrayStart(), src.getArrayStart(), srcLength * U_SIZEOF_UCHAR);
519       break;
520     }
521     // if there is not enough memory, then fall through to setting to bogus
522   default:
523     // if src is bogus, set ourselves to bogus
524     // do not call setToBogus() here because fArray and fFlags are not consistent here
525     fShortLength = 0;
526     fUnion.fFields.fArray = 0;
527     fUnion.fFields.fCapacity = 0;
528     fFlags = kIsBogus;
529     break;
530   }
531 
532   return *this;
533 }
534 
535 //========================================
536 // Miscellaneous operations
537 //========================================
538 
unescape() const539 UnicodeString UnicodeString::unescape() const {
540     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
541     const UChar *array = getBuffer();
542     int32_t len = length();
543     int32_t prev = 0;
544     for (int32_t i=0;;) {
545         if (i == len) {
546             result.append(array, prev, len - prev);
547             break;
548         }
549         if (array[i++] == 0x5C /*'\\'*/) {
550             result.append(array, prev, (i - 1) - prev);
551             UChar32 c = unescapeAt(i); // advances i
552             if (c < 0) {
553                 result.remove(); // return empty string
554                 break; // invalid escape sequence
555             }
556             result.append(c);
557             prev = i;
558         }
559     }
560     return result;
561 }
562 
unescapeAt(int32_t & offset) const563 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
564     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
565 }
566 
567 //========================================
568 // Read-only implementation
569 //========================================
570 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const571 UnicodeString::doCompare( int32_t start,
572               int32_t length,
573               const UChar *srcChars,
574               int32_t srcStart,
575               int32_t srcLength) const
576 {
577   // compare illegal string values
578   // treat const UChar *srcChars==NULL as an empty string
579   if(isBogus()) {
580     return -1;
581   }
582 
583   // pin indices to legal values
584   pinIndices(start, length);
585 
586   if(srcChars == NULL) {
587     srcStart = srcLength = 0;
588   }
589 
590   // get the correct pointer
591   const UChar *chars = getArrayStart();
592 
593   chars += start;
594   srcChars += srcStart;
595 
596   int32_t minLength;
597   int8_t lengthResult;
598 
599   // get the srcLength if necessary
600   if(srcLength < 0) {
601     srcLength = u_strlen(srcChars + srcStart);
602   }
603 
604   // are we comparing different lengths?
605   if(length != srcLength) {
606     if(length < srcLength) {
607       minLength = length;
608       lengthResult = -1;
609     } else {
610       minLength = srcLength;
611       lengthResult = 1;
612     }
613   } else {
614     minLength = length;
615     lengthResult = 0;
616   }
617 
618   /*
619    * note that uprv_memcmp() returns an int but we return an int8_t;
620    * we need to take care not to truncate the result -
621    * one way to do this is to right-shift the value to
622    * move the sign bit into the lower 8 bits and making sure that this
623    * does not become 0 itself
624    */
625 
626   if(minLength > 0 && chars != srcChars) {
627     int32_t result;
628 
629 #   if U_IS_BIG_ENDIAN
630       // big-endian: byte comparison works
631       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
632       if(result != 0) {
633         return (int8_t)(result >> 15 | 1);
634       }
635 #   else
636       // little-endian: compare UChar units
637       do {
638         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
639         if(result != 0) {
640           return (int8_t)(result >> 15 | 1);
641         }
642       } while(--minLength > 0);
643 #   endif
644   }
645   return lengthResult;
646 }
647 
648 /* String compare in code point order - doCompare() compares in code unit order. */
649 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const650 UnicodeString::doCompareCodePointOrder(int32_t start,
651                                        int32_t length,
652                                        const UChar *srcChars,
653                                        int32_t srcStart,
654                                        int32_t srcLength) const
655 {
656   // compare illegal string values
657   // treat const UChar *srcChars==NULL as an empty string
658   if(isBogus()) {
659     return -1;
660   }
661 
662   // pin indices to legal values
663   pinIndices(start, length);
664 
665   if(srcChars == NULL) {
666     srcStart = srcLength = 0;
667   }
668 
669   int32_t diff = uprv_strCompare(getArrayStart() + start, length, srcChars + srcStart, srcLength, FALSE, TRUE);
670   /* translate the 32-bit result into an 8-bit one */
671   if(diff!=0) {
672     return (int8_t)(diff >> 15 | 1);
673   } else {
674     return 0;
675   }
676 }
677 
678 int32_t
getLength() const679 UnicodeString::getLength() const {
680     return length();
681 }
682 
683 UChar
getCharAt(int32_t offset) const684 UnicodeString::getCharAt(int32_t offset) const {
685   return charAt(offset);
686 }
687 
688 UChar32
getChar32At(int32_t offset) const689 UnicodeString::getChar32At(int32_t offset) const {
690   return char32At(offset);
691 }
692 
693 int32_t
countChar32(int32_t start,int32_t length) const694 UnicodeString::countChar32(int32_t start, int32_t length) const {
695   pinIndices(start, length);
696   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
697   return u_countChar32(getArrayStart()+start, length);
698 }
699 
700 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const701 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
702   pinIndices(start, length);
703   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
704   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
705 }
706 
707 int32_t
moveIndex32(int32_t index,int32_t delta) const708 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
709   // pin index
710   int32_t len = length();
711   if(index<0) {
712     index=0;
713   } else if(index>len) {
714     index=len;
715   }
716 
717   const UChar *array = getArrayStart();
718   if(delta>0) {
719     UTF_FWD_N(array, index, len, delta);
720   } else {
721     UTF_BACK_N(array, 0, index, -delta);
722   }
723 
724   return index;
725 }
726 
727 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const728 UnicodeString::doExtract(int32_t start,
729              int32_t length,
730              UChar *dst,
731              int32_t dstStart) const
732 {
733   // pin indices to legal values
734   pinIndices(start, length);
735 
736   // do not copy anything if we alias dst itself
737   const UChar *array = getArrayStart();
738   if(array + start != dst + dstStart) {
739     us_arrayCopy(array, start, dst, dstStart, length);
740   }
741 }
742 
743 int32_t
extract(UChar * dest,int32_t destCapacity,UErrorCode & errorCode) const744 UnicodeString::extract(UChar *dest, int32_t destCapacity,
745                        UErrorCode &errorCode) const {
746   int32_t len = length();
747   if(U_SUCCESS(errorCode)) {
748     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
749       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
750     } else {
751       const UChar *array = getArrayStart();
752       if(len>0 && len<=destCapacity && array!=dest) {
753         uprv_memcpy(dest, array, len*U_SIZEOF_UCHAR);
754       }
755       return u_terminateUChars(dest, destCapacity, len, &errorCode);
756     }
757   }
758 
759   return len;
760 }
761 
762 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const763 UnicodeString::extract(int32_t start,
764                        int32_t length,
765                        char *target,
766                        int32_t targetCapacity,
767                        enum EInvariant) const
768 {
769   // if the arguments are illegal, then do nothing
770   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
771     return 0;
772   }
773 
774   // pin the indices to legal values
775   pinIndices(start, length);
776 
777   if(length <= targetCapacity) {
778     u_UCharsToChars(getArrayStart() + start, target, length);
779   }
780   UErrorCode status = U_ZERO_ERROR;
781   return u_terminateChars(target, targetCapacity, length, &status);
782 }
783 
784 UnicodeString
tempSubString(int32_t start,int32_t len) const785 UnicodeString::tempSubString(int32_t start, int32_t len) const {
786   pinIndices(start, len);
787   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
788   if(array==NULL) {
789     array=fUnion.fStackBuffer;  // anything not NULL because that would make an empty string
790     len=-2;  // bogus result string
791   }
792   return UnicodeString(FALSE, array + start, len);
793 }
794 
795 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const796 UnicodeString::toUTF8(int32_t start, int32_t len,
797                       char *target, int32_t capacity) const {
798   pinIndices(start, len);
799   int32_t length8;
800   UErrorCode errorCode = U_ZERO_ERROR;
801   u_strToUTF8WithSub(target, capacity, &length8,
802                      getBuffer() + start, len,
803                      0xFFFD,  // Standard substitution character.
804                      NULL,    // Don't care about number of substitutions.
805                      &errorCode);
806   return length8;
807 }
808 
809 #if U_CHARSET_IS_UTF8
810 
811 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const812 UnicodeString::extract(int32_t start, int32_t len,
813                        char *target, uint32_t dstSize) const {
814   // if the arguments are illegal, then do nothing
815   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
816     return 0;
817   }
818   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
819 }
820 
821 // else see unistr_cnv.cpp
822 #endif
823 
824 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const825 UnicodeString::extractBetween(int32_t start,
826                   int32_t limit,
827                   UnicodeString& target) const {
828   pinIndex(start);
829   pinIndex(limit);
830   doExtract(start, limit - start, target);
831 }
832 
833 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
834 // as many bytes as the source has UChars.
835 // The "worst cases" are writing systems like Indic, Thai and CJK with
836 // 3:1 bytes:UChars.
837 void
toUTF8(ByteSink & sink) const838 UnicodeString::toUTF8(ByteSink &sink) const {
839   int32_t length16 = length();
840   if(length16 != 0) {
841     char stackBuffer[1024];
842     int32_t capacity = (int32_t)sizeof(stackBuffer);
843     UBool utf8IsOwned = FALSE;
844     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
845                                       3*length16,
846                                       stackBuffer, capacity,
847                                       &capacity);
848     int32_t length8 = 0;
849     UErrorCode errorCode = U_ZERO_ERROR;
850     u_strToUTF8WithSub(utf8, capacity, &length8,
851                        getBuffer(), length16,
852                        0xFFFD,  // Standard substitution character.
853                        NULL,    // Don't care about number of substitutions.
854                        &errorCode);
855     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
856       utf8 = (char *)uprv_malloc(length8);
857       if(utf8 != NULL) {
858         utf8IsOwned = TRUE;
859         errorCode = U_ZERO_ERROR;
860         u_strToUTF8WithSub(utf8, length8, &length8,
861                            getBuffer(), length16,
862                            0xFFFD,  // Standard substitution character.
863                            NULL,    // Don't care about number of substitutions.
864                            &errorCode);
865       } else {
866         errorCode = U_MEMORY_ALLOCATION_ERROR;
867       }
868     }
869     if(U_SUCCESS(errorCode)) {
870       sink.Append(utf8, length8);
871       sink.Flush();
872     }
873     if(utf8IsOwned) {
874       uprv_free(utf8);
875     }
876   }
877 }
878 
879 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const880 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
881   int32_t length32=0;
882   if(U_SUCCESS(errorCode)) {
883     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
884     u_strToUTF32WithSub(utf32, capacity, &length32,
885         getBuffer(), length(),
886         0xfffd,  // Substitution character.
887         NULL,    // Don't care about number of substitutions.
888         &errorCode);
889   }
890   return length32;
891 }
892 
893 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const894 UnicodeString::indexOf(const UChar *srcChars,
895                int32_t srcStart,
896                int32_t srcLength,
897                int32_t start,
898                int32_t length) const
899 {
900   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
901     return -1;
902   }
903 
904   // UnicodeString does not find empty substrings
905   if(srcLength < 0 && srcChars[srcStart] == 0) {
906     return -1;
907   }
908 
909   // get the indices within bounds
910   pinIndices(start, length);
911 
912   // find the first occurrence of the substring
913   const UChar *array = getArrayStart();
914   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
915   if(match == NULL) {
916     return -1;
917   } else {
918     return (int32_t)(match - array);
919   }
920 }
921 
922 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const923 UnicodeString::doIndexOf(UChar c,
924              int32_t start,
925              int32_t length) const
926 {
927   // pin indices
928   pinIndices(start, length);
929 
930   // find the first occurrence of c
931   const UChar *array = getArrayStart();
932   const UChar *match = u_memchr(array + start, c, length);
933   if(match == NULL) {
934     return -1;
935   } else {
936     return (int32_t)(match - array);
937   }
938 }
939 
940 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const941 UnicodeString::doIndexOf(UChar32 c,
942                          int32_t start,
943                          int32_t length) const {
944   // pin indices
945   pinIndices(start, length);
946 
947   // find the first occurrence of c
948   const UChar *array = getArrayStart();
949   const UChar *match = u_memchr32(array + start, c, length);
950   if(match == NULL) {
951     return -1;
952   } else {
953     return (int32_t)(match - array);
954   }
955 }
956 
957 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const958 UnicodeString::lastIndexOf(const UChar *srcChars,
959                int32_t srcStart,
960                int32_t srcLength,
961                int32_t start,
962                int32_t length) const
963 {
964   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
965     return -1;
966   }
967 
968   // UnicodeString does not find empty substrings
969   if(srcLength < 0 && srcChars[srcStart] == 0) {
970     return -1;
971   }
972 
973   // get the indices within bounds
974   pinIndices(start, length);
975 
976   // find the last occurrence of the substring
977   const UChar *array = getArrayStart();
978   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
979   if(match == NULL) {
980     return -1;
981   } else {
982     return (int32_t)(match - array);
983   }
984 }
985 
986 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const987 UnicodeString::doLastIndexOf(UChar c,
988                  int32_t start,
989                  int32_t length) const
990 {
991   if(isBogus()) {
992     return -1;
993   }
994 
995   // pin indices
996   pinIndices(start, length);
997 
998   // find the last occurrence of c
999   const UChar *array = getArrayStart();
1000   const UChar *match = u_memrchr(array + start, c, length);
1001   if(match == NULL) {
1002     return -1;
1003   } else {
1004     return (int32_t)(match - array);
1005   }
1006 }
1007 
1008 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1009 UnicodeString::doLastIndexOf(UChar32 c,
1010                              int32_t start,
1011                              int32_t length) const {
1012   // pin indices
1013   pinIndices(start, length);
1014 
1015   // find the last occurrence of c
1016   const UChar *array = getArrayStart();
1017   const UChar *match = u_memrchr32(array + start, c, length);
1018   if(match == NULL) {
1019     return -1;
1020   } else {
1021     return (int32_t)(match - array);
1022   }
1023 }
1024 
1025 //========================================
1026 // Write implementation
1027 //========================================
1028 
1029 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1030 UnicodeString::findAndReplace(int32_t start,
1031                   int32_t length,
1032                   const UnicodeString& oldText,
1033                   int32_t oldStart,
1034                   int32_t oldLength,
1035                   const UnicodeString& newText,
1036                   int32_t newStart,
1037                   int32_t newLength)
1038 {
1039   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1040     return *this;
1041   }
1042 
1043   pinIndices(start, length);
1044   oldText.pinIndices(oldStart, oldLength);
1045   newText.pinIndices(newStart, newLength);
1046 
1047   if(oldLength == 0) {
1048     return *this;
1049   }
1050 
1051   while(length > 0 && length >= oldLength) {
1052     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1053     if(pos < 0) {
1054       // no more oldText's here: done
1055       break;
1056     } else {
1057       // we found oldText, replace it by newText and go beyond it
1058       replace(pos, oldLength, newText, newStart, newLength);
1059       length -= pos + oldLength - start;
1060       start = pos + newLength;
1061     }
1062   }
1063 
1064   return *this;
1065 }
1066 
1067 
1068 void
setToBogus()1069 UnicodeString::setToBogus()
1070 {
1071   releaseArray();
1072 
1073   fShortLength = 0;
1074   fUnion.fFields.fArray = 0;
1075   fUnion.fFields.fCapacity = 0;
1076   fFlags = kIsBogus;
1077 }
1078 
1079 // turn a bogus string into an empty one
1080 void
unBogus()1081 UnicodeString::unBogus() {
1082   if(fFlags & kIsBogus) {
1083     setToEmpty();
1084   }
1085 }
1086 
1087 // setTo() analogous to the readonly-aliasing constructor with the same signature
1088 UnicodeString &
setTo(UBool isTerminated,const UChar * text,int32_t textLength)1089 UnicodeString::setTo(UBool isTerminated,
1090                      const UChar *text,
1091                      int32_t textLength)
1092 {
1093   if(fFlags & kOpenGetBuffer) {
1094     // do not modify a string that has an "open" getBuffer(minCapacity)
1095     return *this;
1096   }
1097 
1098   if(text == NULL) {
1099     // treat as an empty string, do not alias
1100     releaseArray();
1101     setToEmpty();
1102     return *this;
1103   }
1104 
1105   if( textLength < -1 ||
1106       (textLength == -1 && !isTerminated) ||
1107       (textLength >= 0 && isTerminated && text[textLength] != 0)
1108   ) {
1109     setToBogus();
1110     return *this;
1111   }
1112 
1113   releaseArray();
1114 
1115   if(textLength == -1) {
1116     // text is terminated, or else it would have failed the above test
1117     textLength = u_strlen(text);
1118   }
1119   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1120 
1121   fFlags = kReadonlyAlias;
1122   return *this;
1123 }
1124 
1125 // setTo() analogous to the writable-aliasing constructor with the same signature
1126 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1127 UnicodeString::setTo(UChar *buffer,
1128                      int32_t buffLength,
1129                      int32_t buffCapacity) {
1130   if(fFlags & kOpenGetBuffer) {
1131     // do not modify a string that has an "open" getBuffer(minCapacity)
1132     return *this;
1133   }
1134 
1135   if(buffer == NULL) {
1136     // treat as an empty string, do not alias
1137     releaseArray();
1138     setToEmpty();
1139     return *this;
1140   }
1141 
1142   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1143     setToBogus();
1144     return *this;
1145   } else if(buffLength == -1) {
1146     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1147     const UChar *p = buffer, *limit = buffer + buffCapacity;
1148     while(p != limit && *p != 0) {
1149       ++p;
1150     }
1151     buffLength = (int32_t)(p - buffer);
1152   }
1153 
1154   releaseArray();
1155 
1156   setArray(buffer, buffLength, buffCapacity);
1157   fFlags = kWritableAlias;
1158   return *this;
1159 }
1160 
setToUTF8(const StringPiece & utf8)1161 UnicodeString &UnicodeString::setToUTF8(const StringPiece &utf8) {
1162   unBogus();
1163   int32_t length = utf8.length();
1164   int32_t capacity;
1165   // The UTF-16 string will be at most as long as the UTF-8 string.
1166   if(length <= US_STACKBUF_SIZE) {
1167     capacity = US_STACKBUF_SIZE;
1168   } else {
1169     capacity = length + 1;  // +1 for the terminating NUL.
1170   }
1171   UChar *utf16 = getBuffer(capacity);
1172   int32_t length16;
1173   UErrorCode errorCode = U_ZERO_ERROR;
1174   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1175       utf8.data(), length,
1176       0xfffd,  // Substitution character.
1177       NULL,    // Don't care about number of substitutions.
1178       &errorCode);
1179   releaseBuffer(length16);
1180   if(U_FAILURE(errorCode)) {
1181     setToBogus();
1182   }
1183   return *this;
1184 }
1185 
1186 UnicodeString&
setCharAt(int32_t offset,UChar c)1187 UnicodeString::setCharAt(int32_t offset,
1188              UChar c)
1189 {
1190   int32_t len = length();
1191   if(cloneArrayIfNeeded() && len > 0) {
1192     if(offset < 0) {
1193       offset = 0;
1194     } else if(offset >= len) {
1195       offset = len - 1;
1196     }
1197 
1198     getArrayStart()[offset] = c;
1199   }
1200   return *this;
1201 }
1202 
1203 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1204 UnicodeString::doReplace( int32_t start,
1205               int32_t length,
1206               const UnicodeString& src,
1207               int32_t srcStart,
1208               int32_t srcLength)
1209 {
1210   if(!src.isBogus()) {
1211     // pin the indices to legal values
1212     src.pinIndices(srcStart, srcLength);
1213 
1214     // get the characters from src
1215     // and replace the range in ourselves with them
1216     return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1217   } else {
1218     // remove the range
1219     return doReplace(start, length, 0, 0, 0);
1220   }
1221 }
1222 
1223 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1224 UnicodeString::doReplace(int32_t start,
1225              int32_t length,
1226              const UChar *srcChars,
1227              int32_t srcStart,
1228              int32_t srcLength)
1229 {
1230   if(!isWritable()) {
1231     return *this;
1232   }
1233 
1234   int32_t oldLength = this->length();
1235 
1236   // optimize (read-only alias).remove(0, start) and .remove(start, end)
1237   if((fFlags&kBufferIsReadonly) && srcLength == 0) {
1238     if(start == 0) {
1239       // remove prefix by adjusting the array pointer
1240       pinIndex(length);
1241       fUnion.fFields.fArray += length;
1242       fUnion.fFields.fCapacity -= length;
1243       setLength(oldLength - length);
1244       return *this;
1245     } else {
1246       pinIndex(start);
1247       if(length >= (oldLength - start)) {
1248         // remove suffix by reducing the length (like truncate())
1249         setLength(start);
1250         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1251         return *this;
1252       }
1253     }
1254   }
1255 
1256   if(srcChars == 0) {
1257     srcStart = srcLength = 0;
1258   } else if(srcLength < 0) {
1259     // get the srcLength if necessary
1260     srcLength = u_strlen(srcChars + srcStart);
1261   }
1262 
1263   // calculate the size of the string after the replace
1264   int32_t newLength;
1265 
1266   // optimize append() onto a large-enough, owned string
1267   if(start >= oldLength) {
1268     newLength = oldLength + srcLength;
1269     if(newLength <= getCapacity() && isBufferWritable()) {
1270       UChar *oldArray = getArrayStart();
1271       // Do not copy characters when
1272       //   UChar *buffer=str.getAppendBuffer(...);
1273       // is followed by
1274       //   str.append(buffer, length);
1275       // or
1276       //   str.appendString(buffer, length)
1277       // or similar.
1278       if(srcChars + srcStart != oldArray + start || start > oldLength) {
1279         us_arrayCopy(srcChars, srcStart, oldArray, oldLength, srcLength);
1280       }
1281       setLength(newLength);
1282       return *this;
1283     } else {
1284       // pin the indices to legal values
1285       start = oldLength;
1286       length = 0;
1287     }
1288   } else {
1289     // pin the indices to legal values
1290     pinIndices(start, length);
1291 
1292     newLength = oldLength - length + srcLength;
1293   }
1294 
1295   // the following may change fArray but will not copy the current contents;
1296   // therefore we need to keep the current fArray
1297   UChar oldStackBuffer[US_STACKBUF_SIZE];
1298   UChar *oldArray;
1299   if((fFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1300     // copy the stack buffer contents because it will be overwritten with
1301     // fUnion.fFields values
1302     u_memcpy(oldStackBuffer, fUnion.fStackBuffer, oldLength);
1303     oldArray = oldStackBuffer;
1304   } else {
1305     oldArray = getArrayStart();
1306   }
1307 
1308   // clone our array and allocate a bigger array if needed
1309   int32_t *bufferToDelete = 0;
1310   if(!cloneArrayIfNeeded(newLength, newLength + (newLength >> 2) + kGrowSize,
1311                          FALSE, &bufferToDelete)
1312   ) {
1313     return *this;
1314   }
1315 
1316   // now do the replace
1317 
1318   UChar *newArray = getArrayStart();
1319   if(newArray != oldArray) {
1320     // if fArray changed, then we need to copy everything except what will change
1321     us_arrayCopy(oldArray, 0, newArray, 0, start);
1322     us_arrayCopy(oldArray, start + length,
1323                  newArray, start + srcLength,
1324                  oldLength - (start + length));
1325   } else if(length != srcLength) {
1326     // fArray did not change; copy only the portion that isn't changing, leaving a hole
1327     us_arrayCopy(oldArray, start + length,
1328                  newArray, start + srcLength,
1329                  oldLength - (start + length));
1330   }
1331 
1332   // now fill in the hole with the new string
1333   us_arrayCopy(srcChars, srcStart, newArray, start, srcLength);
1334 
1335   setLength(newLength);
1336 
1337   // delayed delete in case srcChars == fArray when we started, and
1338   // to keep oldArray alive for the above operations
1339   if (bufferToDelete) {
1340     uprv_free(bufferToDelete);
1341   }
1342 
1343   return *this;
1344 }
1345 
1346 /**
1347  * Replaceable API
1348  */
1349 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1350 UnicodeString::handleReplaceBetween(int32_t start,
1351                                     int32_t limit,
1352                                     const UnicodeString& text) {
1353     replaceBetween(start, limit, text);
1354 }
1355 
1356 /**
1357  * Replaceable API
1358  */
1359 void
copy(int32_t start,int32_t limit,int32_t dest)1360 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1361     if (limit <= start) {
1362         return; // Nothing to do; avoid bogus malloc call
1363     }
1364     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1365     // Check to make sure text is not null.
1366     if (text != NULL) {
1367 	    extractBetween(start, limit, text, 0);
1368 	    insert(dest, text, 0, limit - start);
1369 	    uprv_free(text);
1370     }
1371 }
1372 
1373 /**
1374  * Replaceable API
1375  *
1376  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1377  * so we implement this function here.
1378  */
hasMetaData() const1379 UBool Replaceable::hasMetaData() const {
1380     return TRUE;
1381 }
1382 
1383 /**
1384  * Replaceable API
1385  */
hasMetaData() const1386 UBool UnicodeString::hasMetaData() const {
1387     return FALSE;
1388 }
1389 
1390 UnicodeString&
doReverse(int32_t start,int32_t length)1391 UnicodeString::doReverse(int32_t start, int32_t length) {
1392   if(length <= 1 || !cloneArrayIfNeeded()) {
1393     return *this;
1394   }
1395 
1396   // pin the indices to legal values
1397   pinIndices(start, length);
1398   if(length <= 1) {  // pinIndices() might have shrunk the length
1399     return *this;
1400   }
1401 
1402   UChar *left = getArrayStart() + start;
1403   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1404   UChar swap;
1405   UBool hasSupplementary = FALSE;
1406 
1407   // Before the loop we know left<right because length>=2.
1408   do {
1409     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1410     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1411     *right-- = swap;
1412   } while(left < right);
1413   // Make sure to test the middle code unit of an odd-length string.
1414   // Redundant if the length is even.
1415   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1416 
1417   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1418   if(hasSupplementary) {
1419     UChar swap2;
1420 
1421     left = getArrayStart() + start;
1422     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1423     while(left < right) {
1424       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1425         *left++ = swap2;
1426         *left++ = swap;
1427       } else {
1428         ++left;
1429       }
1430     }
1431   }
1432 
1433   return *this;
1434 }
1435 
1436 UBool
padLeading(int32_t targetLength,UChar padChar)1437 UnicodeString::padLeading(int32_t targetLength,
1438                           UChar padChar)
1439 {
1440   int32_t oldLength = length();
1441   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1442     return FALSE;
1443   } else {
1444     // move contents up by padding width
1445     UChar *array = getArrayStart();
1446     int32_t start = targetLength - oldLength;
1447     us_arrayCopy(array, 0, array, start, oldLength);
1448 
1449     // fill in padding character
1450     while(--start >= 0) {
1451       array[start] = padChar;
1452     }
1453     setLength(targetLength);
1454     return TRUE;
1455   }
1456 }
1457 
1458 UBool
padTrailing(int32_t targetLength,UChar padChar)1459 UnicodeString::padTrailing(int32_t targetLength,
1460                            UChar padChar)
1461 {
1462   int32_t oldLength = length();
1463   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1464     return FALSE;
1465   } else {
1466     // fill in padding character
1467     UChar *array = getArrayStart();
1468     int32_t length = targetLength;
1469     while(--length >= oldLength) {
1470       array[length] = padChar;
1471     }
1472     setLength(targetLength);
1473     return TRUE;
1474   }
1475 }
1476 
1477 //========================================
1478 // Hashing
1479 //========================================
1480 int32_t
doHashCode() const1481 UnicodeString::doHashCode() const
1482 {
1483     /* Delegate hash computation to uhash.  This makes UnicodeString
1484      * hashing consistent with UChar* hashing.  */
1485     int32_t hashCode = uhash_hashUCharsN(getArrayStart(), length());
1486     if (hashCode == kInvalidHashCode) {
1487         hashCode = kEmptyHashCode;
1488     }
1489     return hashCode;
1490 }
1491 
1492 //========================================
1493 // External Buffer
1494 //========================================
1495 
1496 UChar *
getBuffer(int32_t minCapacity)1497 UnicodeString::getBuffer(int32_t minCapacity) {
1498   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1499     fFlags|=kOpenGetBuffer;
1500     fShortLength=0;
1501     return getArrayStart();
1502   } else {
1503     return 0;
1504   }
1505 }
1506 
1507 void
releaseBuffer(int32_t newLength)1508 UnicodeString::releaseBuffer(int32_t newLength) {
1509   if(fFlags&kOpenGetBuffer && newLength>=-1) {
1510     // set the new fLength
1511     int32_t capacity=getCapacity();
1512     if(newLength==-1) {
1513       // the new length is the string length, capped by fCapacity
1514       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1515       while(p<limit && *p!=0) {
1516         ++p;
1517       }
1518       newLength=(int32_t)(p-array);
1519     } else if(newLength>capacity) {
1520       newLength=capacity;
1521     }
1522     setLength(newLength);
1523     fFlags&=~kOpenGetBuffer;
1524   }
1525 }
1526 
1527 //========================================
1528 // Miscellaneous
1529 //========================================
1530 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1531 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1532                                   int32_t growCapacity,
1533                                   UBool doCopyArray,
1534                                   int32_t **pBufferToDelete,
1535                                   UBool forceClone) {
1536   // default parameters need to be static, therefore
1537   // the defaults are -1 to have convenience defaults
1538   if(newCapacity == -1) {
1539     newCapacity = getCapacity();
1540   }
1541 
1542   // while a getBuffer(minCapacity) is "open",
1543   // prevent any modifications of the string by returning FALSE here
1544   // if the string is bogus, then only an assignment or similar can revive it
1545   if(!isWritable()) {
1546     return FALSE;
1547   }
1548 
1549   /*
1550    * We need to make a copy of the array if
1551    * the buffer is read-only, or
1552    * the buffer is refCounted (shared), and refCount>1, or
1553    * the buffer is too small.
1554    * Return FALSE if memory could not be allocated.
1555    */
1556   if(forceClone ||
1557      fFlags & kBufferIsReadonly ||
1558      (fFlags & kRefCounted && refCount() > 1) ||
1559      newCapacity > getCapacity()
1560   ) {
1561     // check growCapacity for default value and use of the stack buffer
1562     if(growCapacity == -1) {
1563       growCapacity = newCapacity;
1564     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1565       growCapacity = US_STACKBUF_SIZE;
1566     }
1567 
1568     // save old values
1569     UChar oldStackBuffer[US_STACKBUF_SIZE];
1570     UChar *oldArray;
1571     uint8_t flags = fFlags;
1572 
1573     if(flags&kUsingStackBuffer) {
1574       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1575         // copy the stack buffer contents because it will be overwritten with
1576         // fUnion.fFields values
1577         us_arrayCopy(fUnion.fStackBuffer, 0, oldStackBuffer, 0, fShortLength);
1578         oldArray = oldStackBuffer;
1579       } else {
1580         oldArray = 0; // no need to copy from stack buffer to itself
1581       }
1582     } else {
1583       oldArray = fUnion.fFields.fArray;
1584     }
1585 
1586     // allocate a new array
1587     if(allocate(growCapacity) ||
1588        (newCapacity < growCapacity && allocate(newCapacity))
1589     ) {
1590       if(doCopyArray && oldArray != 0) {
1591         // copy the contents
1592         // do not copy more than what fits - it may be smaller than before
1593         int32_t minLength = length();
1594         newCapacity = getCapacity();
1595         if(newCapacity < minLength) {
1596           minLength = newCapacity;
1597           setLength(minLength);
1598         }
1599         us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1600       } else {
1601         fShortLength = 0;
1602       }
1603 
1604       // release the old array
1605       if(flags & kRefCounted) {
1606         // the array is refCounted; decrement and release if 0
1607         int32_t *pRefCount = ((int32_t *)oldArray - 1);
1608         if(umtx_atomic_dec(pRefCount) == 0) {
1609           if(pBufferToDelete == 0) {
1610             uprv_free(pRefCount);
1611           } else {
1612             // the caller requested to delete it himself
1613             *pBufferToDelete = pRefCount;
1614           }
1615         }
1616       }
1617     } else {
1618       // not enough memory for growCapacity and not even for the smaller newCapacity
1619       // reset the old values for setToBogus() to release the array
1620       if(!(flags&kUsingStackBuffer)) {
1621         fUnion.fFields.fArray = oldArray;
1622       }
1623       fFlags = flags;
1624       setToBogus();
1625       return FALSE;
1626     }
1627   }
1628   return TRUE;
1629 }
1630 
1631 // UnicodeStringAppendable ------------------------------------------------- ***
1632 
1633 UBool
appendCodeUnit(UChar c)1634 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1635   return str.doReplace(str.length(), 0, &c, 0, 1).isWritable();
1636 }
1637 
1638 UBool
appendCodePoint(UChar32 c)1639 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1640   UChar buffer[U16_MAX_LENGTH];
1641   int32_t cLength = 0;
1642   UBool isError = FALSE;
1643   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1644   return !isError && str.doReplace(str.length(), 0, buffer, 0, cLength).isWritable();
1645 }
1646 
1647 UBool
appendString(const UChar * s,int32_t length)1648 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1649   return str.doReplace(str.length(), 0, s, 0, length).isWritable();
1650 }
1651 
1652 UBool
reserveAppendCapacity(int32_t appendCapacity)1653 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1654   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1655 }
1656 
1657 UChar *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,UChar * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1658 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1659                                          int32_t desiredCapacityHint,
1660                                          UChar *scratch, int32_t scratchCapacity,
1661                                          int32_t *resultCapacity) {
1662   if(minCapacity < 1 || scratchCapacity < minCapacity) {
1663     *resultCapacity = 0;
1664     return NULL;
1665   }
1666   int32_t oldLength = str.length();
1667   if(str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1668     *resultCapacity = str.getCapacity() - oldLength;
1669     return str.getArrayStart() + oldLength;
1670   }
1671   *resultCapacity = scratchCapacity;
1672   return scratch;
1673 }
1674 
1675 U_NAMESPACE_END
1676 
1677 #ifdef U_STATIC_IMPLEMENTATION
1678 /*
1679 This should never be called. It is defined here to make sure that the
1680 virtual vector deleting destructor is defined within unistr.cpp.
1681 The vector deleting destructor is already a part of UObject,
1682 but defining it here makes sure that it is included with this object file.
1683 This makes sure that static library dependencies are kept to a minimum.
1684 */
uprv_UnicodeStringDummy(void)1685 static void uprv_UnicodeStringDummy(void) {
1686     U_NAMESPACE_USE
1687     delete [] (new UnicodeString[2]);
1688 }
1689 #endif
1690