• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1999-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 *
9 * File unistr.cpp
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   09/25/98    stephen     Creation.
15 *   04/20/99    stephen     Overhauled per 4/16 code review.
16 *   07/09/99    stephen     Renamed {hi,lo},{byte,word} to icu_X for HP/UX
17 *   11/18/99    aliu        Added handleReplaceBetween() to make inherit from
18 *                           Replaceable.
19 *   06/25/01    grhoten     Removed the dependency on iostream
20 ******************************************************************************
21 */
22 
23 #include "unicode/utypes.h"
24 #include "unicode/appendable.h"
25 #include "unicode/putil.h"
26 #include "cstring.h"
27 #include "cmemory.h"
28 #include "unicode/ustring.h"
29 #include "unicode/unistr.h"
30 #include "unicode/utf.h"
31 #include "unicode/utf16.h"
32 #include "uelement.h"
33 #include "ustr_imp.h"
34 #include "umutex.h"
35 #include "uassert.h"
36 
37 #if 0
38 
39 #include <iostream>
40 using namespace std;
41 
42 //DEBUGGING
43 void
44 print(const UnicodeString& s,
45       const char *name)
46 {
47   UChar c;
48   cout << name << ":|";
49   for(int i = 0; i < s.length(); ++i) {
50     c = s[i];
51     if(c>= 0x007E || c < 0x0020)
52       cout << "[0x" << hex << s[i] << "]";
53     else
54       cout << (char) s[i];
55   }
56   cout << '|' << endl;
57 }
58 
59 void
60 print(const UChar *s,
61       int32_t len,
62       const char *name)
63 {
64   UChar c;
65   cout << name << ":|";
66   for(int i = 0; i < len; ++i) {
67     c = s[i];
68     if(c>= 0x007E || c < 0x0020)
69       cout << "[0x" << hex << s[i] << "]";
70     else
71       cout << (char) s[i];
72   }
73   cout << '|' << endl;
74 }
75 // END DEBUGGING
76 #endif
77 
78 // Local function definitions for now
79 
80 // need to copy areas that may overlap
81 static
82 inline void
us_arrayCopy(const UChar * src,int32_t srcStart,UChar * dst,int32_t dstStart,int32_t count)83 us_arrayCopy(const UChar *src, int32_t srcStart,
84          UChar *dst, int32_t dstStart, int32_t count)
85 {
86   if(count>0) {
87     uprv_memmove(dst+dstStart, src+srcStart, (size_t)count*sizeof(*src));
88   }
89 }
90 
91 // u_unescapeAt() callback to get a UChar from a UnicodeString
92 U_CDECL_BEGIN
93 static UChar U_CALLCONV
UnicodeString_charAt(int32_t offset,void * context)94 UnicodeString_charAt(int32_t offset, void *context) {
95     return ((icu::UnicodeString*) context)->charAt(offset);
96 }
97 U_CDECL_END
98 
99 U_NAMESPACE_BEGIN
100 
101 /* The Replaceable virtual destructor can't be defined in the header
102    due to how AIX works with multiple definitions of virtual functions.
103 */
~Replaceable()104 Replaceable::~Replaceable() {}
105 
106 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(UnicodeString)
107 
108 UnicodeString U_EXPORT2
109 operator+ (const UnicodeString &s1, const UnicodeString &s2) {
110     return
111         UnicodeString(s1.length()+s2.length()+1, (UChar32)0, 0).
112             append(s1).
113                 append(s2);
114 }
115 
116 //========================================
117 // Reference Counting functions, put at top of file so that optimizing compilers
118 //                               have a chance to automatically inline.
119 //========================================
120 
121 void
addRef()122 UnicodeString::addRef() {
123   umtx_atomic_inc((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
124 }
125 
126 int32_t
removeRef()127 UnicodeString::removeRef() {
128   return umtx_atomic_dec((u_atomic_int32_t *)fUnion.fFields.fArray - 1);
129 }
130 
131 int32_t
refCount() const132 UnicodeString::refCount() const {
133   return umtx_loadAcquire(*((u_atomic_int32_t *)fUnion.fFields.fArray - 1));
134 }
135 
136 void
releaseArray()137 UnicodeString::releaseArray() {
138   if((fUnion.fFields.fLengthAndFlags & kRefCounted) && removeRef() == 0) {
139     uprv_free((int32_t *)fUnion.fFields.fArray - 1);
140   }
141 }
142 
143 
144 
145 //========================================
146 // Constructors
147 //========================================
148 
149 // The default constructor is inline in unistr.h.
150 
UnicodeString(int32_t capacity,UChar32 c,int32_t count)151 UnicodeString::UnicodeString(int32_t capacity, UChar32 c, int32_t count) {
152   fUnion.fFields.fLengthAndFlags = 0;
153   if(count <= 0 || (uint32_t)c > 0x10ffff) {
154     // just allocate and do not do anything else
155     allocate(capacity);
156   } else if(c <= 0xffff) {
157     int32_t length = count;
158     if(capacity < length) {
159       capacity = length;
160     }
161     if(allocate(capacity)) {
162       UChar *array = getArrayStart();
163       UChar unit = (UChar)c;
164       for(int32_t i = 0; i < length; ++i) {
165         array[i] = unit;
166       }
167       setLength(length);
168     }
169   } else {  // supplementary code point, write surrogate pairs
170     if(count > (INT32_MAX / 2)) {
171       // We would get more than 2G UChars.
172       allocate(capacity);
173       return;
174     }
175     int32_t length = count * 2;
176     if(capacity < length) {
177       capacity = length;
178     }
179     if(allocate(capacity)) {
180       UChar *array = getArrayStart();
181       UChar lead = U16_LEAD(c);
182       UChar trail = U16_TRAIL(c);
183       for(int32_t i = 0; i < length; i += 2) {
184         array[i] = lead;
185         array[i + 1] = trail;
186       }
187       setLength(length);
188     }
189   }
190 }
191 
UnicodeString(UChar ch)192 UnicodeString::UnicodeString(UChar ch) {
193   fUnion.fFields.fLengthAndFlags = kLength1 | kShortString;
194   fUnion.fStackFields.fBuffer[0] = ch;
195 }
196 
UnicodeString(UChar32 ch)197 UnicodeString::UnicodeString(UChar32 ch) {
198   fUnion.fFields.fLengthAndFlags = kShortString;
199   int32_t i = 0;
200   UBool isError = FALSE;
201   U16_APPEND(fUnion.fStackFields.fBuffer, i, US_STACKBUF_SIZE, ch, isError);
202   // We test isError so that the compiler does not complain that we don't.
203   // If isError then i==0 which is what we want anyway.
204   if(!isError) {
205     setShortLength(i);
206   }
207 }
208 
UnicodeString(const UChar * text)209 UnicodeString::UnicodeString(const UChar *text) {
210   fUnion.fFields.fLengthAndFlags = kShortString;
211   doAppend(text, 0, -1);
212 }
213 
UnicodeString(const UChar * text,int32_t textLength)214 UnicodeString::UnicodeString(const UChar *text,
215                              int32_t textLength) {
216   fUnion.fFields.fLengthAndFlags = kShortString;
217   doAppend(text, 0, textLength);
218 }
219 
UnicodeString(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)220 UnicodeString::UnicodeString(UBool isTerminated,
221                              ConstChar16Ptr textPtr,
222                              int32_t textLength) {
223   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
224   const UChar *text = textPtr;
225   if(text == NULL) {
226     // treat as an empty string, do not alias
227     setToEmpty();
228   } else if(textLength < -1 ||
229             (textLength == -1 && !isTerminated) ||
230             (textLength >= 0 && isTerminated && text[textLength] != 0)
231   ) {
232     setToBogus();
233   } else {
234     if(textLength == -1) {
235       // text is terminated, or else it would have failed the above test
236       textLength = u_strlen(text);
237     }
238     setArray(const_cast<UChar *>(text), textLength,
239              isTerminated ? textLength + 1 : textLength);
240   }
241 }
242 
UnicodeString(UChar * buff,int32_t buffLength,int32_t buffCapacity)243 UnicodeString::UnicodeString(UChar *buff,
244                              int32_t buffLength,
245                              int32_t buffCapacity) {
246   fUnion.fFields.fLengthAndFlags = kWritableAlias;
247   if(buff == NULL) {
248     // treat as an empty string, do not alias
249     setToEmpty();
250   } else if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
251     setToBogus();
252   } else {
253     if(buffLength == -1) {
254       // fLength = u_strlen(buff); but do not look beyond buffCapacity
255       const UChar *p = buff, *limit = buff + buffCapacity;
256       while(p != limit && *p != 0) {
257         ++p;
258       }
259       buffLength = (int32_t)(p - buff);
260     }
261     setArray(buff, buffLength, buffCapacity);
262   }
263 }
264 
UnicodeString(const char * src,int32_t length,EInvariant)265 UnicodeString::UnicodeString(const char *src, int32_t length, EInvariant) {
266   fUnion.fFields.fLengthAndFlags = kShortString;
267   if(src==NULL) {
268     // treat as an empty string
269   } else {
270     if(length<0) {
271       length=(int32_t)uprv_strlen(src);
272     }
273     if(cloneArrayIfNeeded(length, length, FALSE)) {
274       u_charsToUChars(src, getArrayStart(), length);
275       setLength(length);
276     } else {
277       setToBogus();
278     }
279   }
280 }
281 
282 #if U_CHARSET_IS_UTF8
283 
UnicodeString(const char * codepageData)284 UnicodeString::UnicodeString(const char *codepageData) {
285   fUnion.fFields.fLengthAndFlags = kShortString;
286   if(codepageData != 0) {
287     setToUTF8(codepageData);
288   }
289 }
290 
UnicodeString(const char * codepageData,int32_t dataLength)291 UnicodeString::UnicodeString(const char *codepageData, int32_t dataLength) {
292   fUnion.fFields.fLengthAndFlags = kShortString;
293   // if there's nothing to convert, do nothing
294   if(codepageData == 0 || dataLength == 0 || dataLength < -1) {
295     return;
296   }
297   if(dataLength == -1) {
298     dataLength = (int32_t)uprv_strlen(codepageData);
299   }
300   setToUTF8(StringPiece(codepageData, dataLength));
301 }
302 
303 // else see unistr_cnv.cpp
304 #endif
305 
UnicodeString(const UnicodeString & that)306 UnicodeString::UnicodeString(const UnicodeString& that) {
307   fUnion.fFields.fLengthAndFlags = kShortString;
308   copyFrom(that);
309 }
310 
UnicodeString(UnicodeString && src)311 UnicodeString::UnicodeString(UnicodeString &&src) U_NOEXCEPT {
312   copyFieldsFrom(src, TRUE);
313 }
314 
UnicodeString(const UnicodeString & that,int32_t srcStart)315 UnicodeString::UnicodeString(const UnicodeString& that,
316                              int32_t srcStart) {
317   fUnion.fFields.fLengthAndFlags = kShortString;
318   setTo(that, srcStart);
319 }
320 
UnicodeString(const UnicodeString & that,int32_t srcStart,int32_t srcLength)321 UnicodeString::UnicodeString(const UnicodeString& that,
322                              int32_t srcStart,
323                              int32_t srcLength) {
324   fUnion.fFields.fLengthAndFlags = kShortString;
325   setTo(that, srcStart, srcLength);
326 }
327 
328 // Replaceable base class clone() default implementation, does not clone
329 Replaceable *
clone() const330 Replaceable::clone() const {
331   return NULL;
332 }
333 
334 // UnicodeString overrides clone() with a real implementation
335 UnicodeString *
clone() const336 UnicodeString::clone() const {
337   return new UnicodeString(*this);
338 }
339 
340 //========================================
341 // array allocation
342 //========================================
343 
344 namespace {
345 
346 const int32_t kGrowSize = 128;
347 
348 // The number of bytes for one int32_t reference counter and capacity UChars
349 // must fit into a 32-bit size_t (at least when on a 32-bit platform).
350 // We also add one for the NUL terminator, to avoid reallocation in getTerminatedBuffer(),
351 // and round up to a multiple of 16 bytes.
352 // This means that capacity must be at most (0xfffffff0 - 4) / 2 - 1 = 0x7ffffff5.
353 // (With more complicated checks we could go up to 0x7ffffffd without rounding up,
354 // but that does not seem worth it.)
355 const int32_t kMaxCapacity = 0x7ffffff5;
356 
getGrowCapacity(int32_t newLength)357 int32_t getGrowCapacity(int32_t newLength) {
358   int32_t growSize = (newLength >> 2) + kGrowSize;
359   if(growSize <= (kMaxCapacity - newLength)) {
360     return newLength + growSize;
361   } else {
362     return kMaxCapacity;
363   }
364 }
365 
366 }  // namespace
367 
368 UBool
allocate(int32_t capacity)369 UnicodeString::allocate(int32_t capacity) {
370   if(capacity <= US_STACKBUF_SIZE) {
371     fUnion.fFields.fLengthAndFlags = kShortString;
372     return TRUE;
373   }
374   if(capacity <= kMaxCapacity) {
375     ++capacity;  // for the NUL
376     // Switch to size_t which is unsigned so that we can allocate up to 4GB.
377     // Reference counter + UChars.
378     size_t numBytes = sizeof(int32_t) + (size_t)capacity * U_SIZEOF_UCHAR;
379     // Round up to a multiple of 16.
380     numBytes = (numBytes + 15) & ~15;
381     int32_t *array = (int32_t *) uprv_malloc(numBytes);
382     if(array != NULL) {
383       // set initial refCount and point behind the refCount
384       *array++ = 1;
385       numBytes -= sizeof(int32_t);
386 
387       // have fArray point to the first UChar
388       fUnion.fFields.fArray = (UChar *)array;
389       fUnion.fFields.fCapacity = (int32_t)(numBytes / U_SIZEOF_UCHAR);
390       fUnion.fFields.fLengthAndFlags = kLongString;
391       return TRUE;
392     }
393   }
394   fUnion.fFields.fLengthAndFlags = kIsBogus;
395   fUnion.fFields.fArray = 0;
396   fUnion.fFields.fCapacity = 0;
397   return FALSE;
398 }
399 
400 //========================================
401 // Destructor
402 //========================================
403 
404 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
405 static u_atomic_int32_t finalLengthCounts[0x400];  // UnicodeString::kMaxShortLength+1
406 static u_atomic_int32_t beyondCount(0);
407 
unistr_printLengths()408 U_CAPI void unistr_printLengths() {
409   int32_t i;
410   for(i = 0; i <= 59; ++i) {
411     printf("%2d,  %9d\n", i, (int32_t)finalLengthCounts[i]);
412   }
413   int32_t beyond = beyondCount;
414   for(; i < UPRV_LENGTHOF(finalLengthCounts); ++i) {
415     beyond += finalLengthCounts[i];
416   }
417   printf(">59, %9d\n", beyond);
418 }
419 #endif
420 
~UnicodeString()421 UnicodeString::~UnicodeString()
422 {
423 #ifdef UNISTR_COUNT_FINAL_STRING_LENGTHS
424   // Count lengths of strings at the end of their lifetime.
425   // Useful for discussion of a desirable stack buffer size.
426   // Count the contents length, not the optional NUL terminator nor further capacity.
427   // Ignore open-buffer strings and strings which alias external storage.
428   if((fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kReadonlyAlias|kWritableAlias)) == 0) {
429     if(hasShortLength()) {
430       umtx_atomic_inc(finalLengthCounts + getShortLength());
431     } else {
432       umtx_atomic_inc(&beyondCount);
433     }
434   }
435 #endif
436 
437   releaseArray();
438 }
439 
440 //========================================
441 // Factory methods
442 //========================================
443 
fromUTF8(StringPiece utf8)444 UnicodeString UnicodeString::fromUTF8(StringPiece utf8) {
445   UnicodeString result;
446   result.setToUTF8(utf8);
447   return result;
448 }
449 
fromUTF32(const UChar32 * utf32,int32_t length)450 UnicodeString UnicodeString::fromUTF32(const UChar32 *utf32, int32_t length) {
451   UnicodeString result;
452   int32_t capacity;
453   // Most UTF-32 strings will be BMP-only and result in a same-length
454   // UTF-16 string. We overestimate the capacity just slightly,
455   // just in case there are a few supplementary characters.
456   if(length <= US_STACKBUF_SIZE) {
457     capacity = US_STACKBUF_SIZE;
458   } else {
459     capacity = length + (length >> 4) + 4;
460   }
461   do {
462     UChar *utf16 = result.getBuffer(capacity);
463     int32_t length16;
464     UErrorCode errorCode = U_ZERO_ERROR;
465     u_strFromUTF32WithSub(utf16, result.getCapacity(), &length16,
466         utf32, length,
467         0xfffd,  // Substitution character.
468         NULL,    // Don't care about number of substitutions.
469         &errorCode);
470     result.releaseBuffer(length16);
471     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
472       capacity = length16 + 1;  // +1 for the terminating NUL.
473       continue;
474     } else if(U_FAILURE(errorCode)) {
475       result.setToBogus();
476     }
477     break;
478   } while(TRUE);
479   return result;
480 }
481 
482 //========================================
483 // Assignment
484 //========================================
485 
486 UnicodeString &
operator =(const UnicodeString & src)487 UnicodeString::operator=(const UnicodeString &src) {
488   return copyFrom(src);
489 }
490 
491 UnicodeString &
fastCopyFrom(const UnicodeString & src)492 UnicodeString::fastCopyFrom(const UnicodeString &src) {
493   return copyFrom(src, TRUE);
494 }
495 
496 UnicodeString &
copyFrom(const UnicodeString & src,UBool fastCopy)497 UnicodeString::copyFrom(const UnicodeString &src, UBool fastCopy) {
498   // if assigning to ourselves, do nothing
499   if(this == &src) {
500     return *this;
501   }
502 
503   // is the right side bogus?
504   if(src.isBogus()) {
505     setToBogus();
506     return *this;
507   }
508 
509   // delete the current contents
510   releaseArray();
511 
512   if(src.isEmpty()) {
513     // empty string - use the stack buffer
514     setToEmpty();
515     return *this;
516   }
517 
518   // fLength>0 and not an "open" src.getBuffer(minCapacity)
519   fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
520   switch(src.fUnion.fFields.fLengthAndFlags & kAllStorageFlags) {
521   case kShortString:
522     // short string using the stack buffer, do the same
523     uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
524                 getShortLength() * U_SIZEOF_UCHAR);
525     break;
526   case kLongString:
527     // src uses a refCounted string buffer, use that buffer with refCount
528     // src is const, use a cast - we don't actually change it
529     ((UnicodeString &)src).addRef();
530     // copy all fields, share the reference-counted buffer
531     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
532     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
533     if(!hasShortLength()) {
534       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
535     }
536     break;
537   case kReadonlyAlias:
538     if(fastCopy) {
539       // src is a readonly alias, do the same
540       // -> maintain the readonly alias as such
541       fUnion.fFields.fArray = src.fUnion.fFields.fArray;
542       fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
543       if(!hasShortLength()) {
544         fUnion.fFields.fLength = src.fUnion.fFields.fLength;
545       }
546       break;
547     }
548     // else if(!fastCopy) fall through to case kWritableAlias
549     // -> allocate a new buffer and copy the contents
550     U_FALLTHROUGH;
551   case kWritableAlias: {
552     // src is a writable alias; we make a copy of that instead
553     int32_t srcLength = src.length();
554     if(allocate(srcLength)) {
555       u_memcpy(getArrayStart(), src.getArrayStart(), srcLength);
556       setLength(srcLength);
557       break;
558     }
559     // if there is not enough memory, then fall through to setting to bogus
560     U_FALLTHROUGH;
561   }
562   default:
563     // if src is bogus, set ourselves to bogus
564     // do not call setToBogus() here because fArray and flags are not consistent here
565     fUnion.fFields.fLengthAndFlags = kIsBogus;
566     fUnion.fFields.fArray = 0;
567     fUnion.fFields.fCapacity = 0;
568     break;
569   }
570 
571   return *this;
572 }
573 
operator =(UnicodeString && src)574 UnicodeString &UnicodeString::operator=(UnicodeString &&src) U_NOEXCEPT {
575   // No explicit check for self move assignment, consistent with standard library.
576   // Self move assignment causes no crash nor leak but might make the object bogus.
577   releaseArray();
578   copyFieldsFrom(src, TRUE);
579   return *this;
580 }
581 
582 // Same as move assignment except without memory management.
copyFieldsFrom(UnicodeString & src,UBool setSrcToBogus)583 void UnicodeString::copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT {
584   int16_t lengthAndFlags = fUnion.fFields.fLengthAndFlags = src.fUnion.fFields.fLengthAndFlags;
585   if(lengthAndFlags & kUsingStackBuffer) {
586     // Short string using the stack buffer, copy the contents.
587     // Check for self assignment to prevent "overlap in memcpy" warnings,
588     // although it should be harmless to copy a buffer to itself exactly.
589     if(this != &src) {
590       uprv_memcpy(fUnion.fStackFields.fBuffer, src.fUnion.fStackFields.fBuffer,
591                   getShortLength() * U_SIZEOF_UCHAR);
592     }
593   } else {
594     // In all other cases, copy all fields.
595     fUnion.fFields.fArray = src.fUnion.fFields.fArray;
596     fUnion.fFields.fCapacity = src.fUnion.fFields.fCapacity;
597     if(!hasShortLength()) {
598       fUnion.fFields.fLength = src.fUnion.fFields.fLength;
599     }
600     if(setSrcToBogus) {
601       // Set src to bogus without releasing any memory.
602       src.fUnion.fFields.fLengthAndFlags = kIsBogus;
603       src.fUnion.fFields.fArray = NULL;
604       src.fUnion.fFields.fCapacity = 0;
605     }
606   }
607 }
608 
swap(UnicodeString & other)609 void UnicodeString::swap(UnicodeString &other) U_NOEXCEPT {
610   UnicodeString temp;  // Empty short string: Known not to need releaseArray().
611   // Copy fields without resetting source values in between.
612   temp.copyFieldsFrom(*this, FALSE);
613   this->copyFieldsFrom(other, FALSE);
614   other.copyFieldsFrom(temp, FALSE);
615   // Set temp to an empty string so that other's memory is not released twice.
616   temp.fUnion.fFields.fLengthAndFlags = kShortString;
617 }
618 
619 //========================================
620 // Miscellaneous operations
621 //========================================
622 
unescape() const623 UnicodeString UnicodeString::unescape() const {
624     UnicodeString result(length(), (UChar32)0, (int32_t)0); // construct with capacity
625     if (result.isBogus()) {
626         return result;
627     }
628     const UChar *array = getBuffer();
629     int32_t len = length();
630     int32_t prev = 0;
631     for (int32_t i=0;;) {
632         if (i == len) {
633             result.append(array, prev, len - prev);
634             break;
635         }
636         if (array[i++] == 0x5C /*'\\'*/) {
637             result.append(array, prev, (i - 1) - prev);
638             UChar32 c = unescapeAt(i); // advances i
639             if (c < 0) {
640                 result.remove(); // return empty string
641                 break; // invalid escape sequence
642             }
643             result.append(c);
644             prev = i;
645         }
646     }
647     return result;
648 }
649 
unescapeAt(int32_t & offset) const650 UChar32 UnicodeString::unescapeAt(int32_t &offset) const {
651     return u_unescapeAt(UnicodeString_charAt, &offset, length(), (void*)this);
652 }
653 
654 //========================================
655 // Read-only implementation
656 //========================================
657 UBool
doEquals(const UnicodeString & text,int32_t len) const658 UnicodeString::doEquals(const UnicodeString &text, int32_t len) const {
659   // Requires: this & text not bogus and have same lengths.
660   // Byte-wise comparison works for equality regardless of endianness.
661   return uprv_memcmp(getArrayStart(), text.getArrayStart(), len * U_SIZEOF_UCHAR) == 0;
662 }
663 
664 int8_t
doCompare(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const665 UnicodeString::doCompare( int32_t start,
666               int32_t length,
667               const UChar *srcChars,
668               int32_t srcStart,
669               int32_t srcLength) const
670 {
671   // compare illegal string values
672   if(isBogus()) {
673     return -1;
674   }
675 
676   // pin indices to legal values
677   pinIndices(start, length);
678 
679   if(srcChars == NULL) {
680     // treat const UChar *srcChars==NULL as an empty string
681     return length == 0 ? 0 : 1;
682   }
683 
684   // get the correct pointer
685   const UChar *chars = getArrayStart();
686 
687   chars += start;
688   srcChars += srcStart;
689 
690   int32_t minLength;
691   int8_t lengthResult;
692 
693   // get the srcLength if necessary
694   if(srcLength < 0) {
695     srcLength = u_strlen(srcChars + srcStart);
696   }
697 
698   // are we comparing different lengths?
699   if(length != srcLength) {
700     if(length < srcLength) {
701       minLength = length;
702       lengthResult = -1;
703     } else {
704       minLength = srcLength;
705       lengthResult = 1;
706     }
707   } else {
708     minLength = length;
709     lengthResult = 0;
710   }
711 
712   /*
713    * note that uprv_memcmp() returns an int but we return an int8_t;
714    * we need to take care not to truncate the result -
715    * one way to do this is to right-shift the value to
716    * move the sign bit into the lower 8 bits and making sure that this
717    * does not become 0 itself
718    */
719 
720   if(minLength > 0 && chars != srcChars) {
721     int32_t result;
722 
723 #   if U_IS_BIG_ENDIAN
724       // big-endian: byte comparison works
725       result = uprv_memcmp(chars, srcChars, minLength * sizeof(UChar));
726       if(result != 0) {
727         return (int8_t)(result >> 15 | 1);
728       }
729 #   else
730       // little-endian: compare UChar units
731       do {
732         result = ((int32_t)*(chars++) - (int32_t)*(srcChars++));
733         if(result != 0) {
734           return (int8_t)(result >> 15 | 1);
735         }
736       } while(--minLength > 0);
737 #   endif
738   }
739   return lengthResult;
740 }
741 
742 /* String compare in code point order - doCompare() compares in code unit order. */
743 int8_t
doCompareCodePointOrder(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength) const744 UnicodeString::doCompareCodePointOrder(int32_t start,
745                                        int32_t length,
746                                        const UChar *srcChars,
747                                        int32_t srcStart,
748                                        int32_t srcLength) const
749 {
750   // compare illegal string values
751   // treat const UChar *srcChars==NULL as an empty string
752   if(isBogus()) {
753     return -1;
754   }
755 
756   // pin indices to legal values
757   pinIndices(start, length);
758 
759   if(srcChars == NULL) {
760     srcStart = srcLength = 0;
761   }
762 
763   int32_t diff = uprv_strCompare(getArrayStart() + start, length, (srcChars!=NULL)?(srcChars + srcStart):NULL, srcLength, FALSE, TRUE);
764   /* translate the 32-bit result into an 8-bit one */
765   if(diff!=0) {
766     return (int8_t)(diff >> 15 | 1);
767   } else {
768     return 0;
769   }
770 }
771 
772 int32_t
getLength() const773 UnicodeString::getLength() const {
774     return length();
775 }
776 
777 UChar
getCharAt(int32_t offset) const778 UnicodeString::getCharAt(int32_t offset) const {
779   return charAt(offset);
780 }
781 
782 UChar32
getChar32At(int32_t offset) const783 UnicodeString::getChar32At(int32_t offset) const {
784   return char32At(offset);
785 }
786 
787 UChar32
char32At(int32_t offset) const788 UnicodeString::char32At(int32_t offset) const
789 {
790   int32_t len = length();
791   if((uint32_t)offset < (uint32_t)len) {
792     const UChar *array = getArrayStart();
793     UChar32 c;
794     U16_GET(array, 0, offset, len, c);
795     return c;
796   } else {
797     return kInvalidUChar;
798   }
799 }
800 
801 int32_t
getChar32Start(int32_t offset) const802 UnicodeString::getChar32Start(int32_t offset) const {
803   if((uint32_t)offset < (uint32_t)length()) {
804     const UChar *array = getArrayStart();
805     U16_SET_CP_START(array, 0, offset);
806     return offset;
807   } else {
808     return 0;
809   }
810 }
811 
812 int32_t
getChar32Limit(int32_t offset) const813 UnicodeString::getChar32Limit(int32_t offset) const {
814   int32_t len = length();
815   if((uint32_t)offset < (uint32_t)len) {
816     const UChar *array = getArrayStart();
817     U16_SET_CP_LIMIT(array, 0, offset, len);
818     return offset;
819   } else {
820     return len;
821   }
822 }
823 
824 int32_t
countChar32(int32_t start,int32_t length) const825 UnicodeString::countChar32(int32_t start, int32_t length) const {
826   pinIndices(start, length);
827   // if(isBogus()) then fArray==0 and start==0 - u_countChar32() checks for NULL
828   return u_countChar32(getArrayStart()+start, length);
829 }
830 
831 UBool
hasMoreChar32Than(int32_t start,int32_t length,int32_t number) const832 UnicodeString::hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const {
833   pinIndices(start, length);
834   // if(isBogus()) then fArray==0 and start==0 - u_strHasMoreChar32Than() checks for NULL
835   return u_strHasMoreChar32Than(getArrayStart()+start, length, number);
836 }
837 
838 int32_t
moveIndex32(int32_t index,int32_t delta) const839 UnicodeString::moveIndex32(int32_t index, int32_t delta) const {
840   // pin index
841   int32_t len = length();
842   if(index<0) {
843     index=0;
844   } else if(index>len) {
845     index=len;
846   }
847 
848   const UChar *array = getArrayStart();
849   if(delta>0) {
850     U16_FWD_N(array, index, len, delta);
851   } else {
852     U16_BACK_N(array, 0, index, -delta);
853   }
854 
855   return index;
856 }
857 
858 void
doExtract(int32_t start,int32_t length,UChar * dst,int32_t dstStart) const859 UnicodeString::doExtract(int32_t start,
860              int32_t length,
861              UChar *dst,
862              int32_t dstStart) const
863 {
864   // pin indices to legal values
865   pinIndices(start, length);
866 
867   // do not copy anything if we alias dst itself
868   const UChar *array = getArrayStart();
869   if(array + start != dst + dstStart) {
870     us_arrayCopy(array, start, dst, dstStart, length);
871   }
872 }
873 
874 int32_t
extract(Char16Ptr dest,int32_t destCapacity,UErrorCode & errorCode) const875 UnicodeString::extract(Char16Ptr dest, int32_t destCapacity,
876                        UErrorCode &errorCode) const {
877   int32_t len = length();
878   if(U_SUCCESS(errorCode)) {
879     if(isBogus() || destCapacity<0 || (destCapacity>0 && dest==0)) {
880       errorCode=U_ILLEGAL_ARGUMENT_ERROR;
881     } else {
882       const UChar *array = getArrayStart();
883       if(len>0 && len<=destCapacity && array!=dest) {
884         u_memcpy(dest, array, len);
885       }
886       return u_terminateUChars(dest, destCapacity, len, &errorCode);
887     }
888   }
889 
890   return len;
891 }
892 
893 int32_t
extract(int32_t start,int32_t length,char * target,int32_t targetCapacity,enum EInvariant) const894 UnicodeString::extract(int32_t start,
895                        int32_t length,
896                        char *target,
897                        int32_t targetCapacity,
898                        enum EInvariant) const
899 {
900   // if the arguments are illegal, then do nothing
901   if(targetCapacity < 0 || (targetCapacity > 0 && target == NULL)) {
902     return 0;
903   }
904 
905   // pin the indices to legal values
906   pinIndices(start, length);
907 
908   if(length <= targetCapacity) {
909     u_UCharsToChars(getArrayStart() + start, target, length);
910   }
911   UErrorCode status = U_ZERO_ERROR;
912   return u_terminateChars(target, targetCapacity, length, &status);
913 }
914 
915 UnicodeString
tempSubString(int32_t start,int32_t len) const916 UnicodeString::tempSubString(int32_t start, int32_t len) const {
917   pinIndices(start, len);
918   const UChar *array = getBuffer();  // not getArrayStart() to check kIsBogus & kOpenGetBuffer
919   if(array==NULL) {
920     array=fUnion.fStackFields.fBuffer;  // anything not NULL because that would make an empty string
921     len=-2;  // bogus result string
922   }
923   return UnicodeString(FALSE, array + start, len);
924 }
925 
926 int32_t
toUTF8(int32_t start,int32_t len,char * target,int32_t capacity) const927 UnicodeString::toUTF8(int32_t start, int32_t len,
928                       char *target, int32_t capacity) const {
929   pinIndices(start, len);
930   int32_t length8;
931   UErrorCode errorCode = U_ZERO_ERROR;
932   u_strToUTF8WithSub(target, capacity, &length8,
933                      getBuffer() + start, len,
934                      0xFFFD,  // Standard substitution character.
935                      NULL,    // Don't care about number of substitutions.
936                      &errorCode);
937   return length8;
938 }
939 
940 #if U_CHARSET_IS_UTF8
941 
942 int32_t
extract(int32_t start,int32_t len,char * target,uint32_t dstSize) const943 UnicodeString::extract(int32_t start, int32_t len,
944                        char *target, uint32_t dstSize) const {
945   // if the arguments are illegal, then do nothing
946   if(/*dstSize < 0 || */(dstSize > 0 && target == 0)) {
947     return 0;
948   }
949   return toUTF8(start, len, target, dstSize <= 0x7fffffff ? (int32_t)dstSize : 0x7fffffff);
950 }
951 
952 // else see unistr_cnv.cpp
953 #endif
954 
955 void
extractBetween(int32_t start,int32_t limit,UnicodeString & target) const956 UnicodeString::extractBetween(int32_t start,
957                   int32_t limit,
958                   UnicodeString& target) const {
959   pinIndex(start);
960   pinIndex(limit);
961   doExtract(start, limit - start, target);
962 }
963 
964 // When converting from UTF-16 to UTF-8, the result will have at most 3 times
965 // as many bytes as the source has UChars.
966 // The "worst cases" are writing systems like Indic, Thai and CJK with
967 // 3:1 bytes:UChars.
968 void
toUTF8(ByteSink & sink) const969 UnicodeString::toUTF8(ByteSink &sink) const {
970   int32_t length16 = length();
971   if(length16 != 0) {
972     char stackBuffer[1024];
973     int32_t capacity = (int32_t)sizeof(stackBuffer);
974     UBool utf8IsOwned = FALSE;
975     char *utf8 = sink.GetAppendBuffer(length16 < capacity ? length16 : capacity,
976                                       3*length16,
977                                       stackBuffer, capacity,
978                                       &capacity);
979     int32_t length8 = 0;
980     UErrorCode errorCode = U_ZERO_ERROR;
981     u_strToUTF8WithSub(utf8, capacity, &length8,
982                        getBuffer(), length16,
983                        0xFFFD,  // Standard substitution character.
984                        NULL,    // Don't care about number of substitutions.
985                        &errorCode);
986     if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
987       utf8 = (char *)uprv_malloc(length8);
988       if(utf8 != NULL) {
989         utf8IsOwned = TRUE;
990         errorCode = U_ZERO_ERROR;
991         u_strToUTF8WithSub(utf8, length8, &length8,
992                            getBuffer(), length16,
993                            0xFFFD,  // Standard substitution character.
994                            NULL,    // Don't care about number of substitutions.
995                            &errorCode);
996       } else {
997         errorCode = U_MEMORY_ALLOCATION_ERROR;
998       }
999     }
1000     if(U_SUCCESS(errorCode)) {
1001       sink.Append(utf8, length8);
1002       sink.Flush();
1003     }
1004     if(utf8IsOwned) {
1005       uprv_free(utf8);
1006     }
1007   }
1008 }
1009 
1010 int32_t
toUTF32(UChar32 * utf32,int32_t capacity,UErrorCode & errorCode) const1011 UnicodeString::toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const {
1012   int32_t length32=0;
1013   if(U_SUCCESS(errorCode)) {
1014     // getBuffer() and u_strToUTF32WithSub() check for illegal arguments.
1015     u_strToUTF32WithSub(utf32, capacity, &length32,
1016         getBuffer(), length(),
1017         0xfffd,  // Substitution character.
1018         NULL,    // Don't care about number of substitutions.
1019         &errorCode);
1020   }
1021   return length32;
1022 }
1023 
1024 int32_t
indexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1025 UnicodeString::indexOf(const UChar *srcChars,
1026                int32_t srcStart,
1027                int32_t srcLength,
1028                int32_t start,
1029                int32_t length) const
1030 {
1031   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1032     return -1;
1033   }
1034 
1035   // UnicodeString does not find empty substrings
1036   if(srcLength < 0 && srcChars[srcStart] == 0) {
1037     return -1;
1038   }
1039 
1040   // get the indices within bounds
1041   pinIndices(start, length);
1042 
1043   // find the first occurrence of the substring
1044   const UChar *array = getArrayStart();
1045   const UChar *match = u_strFindFirst(array + start, length, srcChars + srcStart, srcLength);
1046   if(match == NULL) {
1047     return -1;
1048   } else {
1049     return (int32_t)(match - array);
1050   }
1051 }
1052 
1053 int32_t
doIndexOf(UChar c,int32_t start,int32_t length) const1054 UnicodeString::doIndexOf(UChar c,
1055              int32_t start,
1056              int32_t length) const
1057 {
1058   // pin indices
1059   pinIndices(start, length);
1060 
1061   // find the first occurrence of c
1062   const UChar *array = getArrayStart();
1063   const UChar *match = u_memchr(array + start, c, length);
1064   if(match == NULL) {
1065     return -1;
1066   } else {
1067     return (int32_t)(match - array);
1068   }
1069 }
1070 
1071 int32_t
doIndexOf(UChar32 c,int32_t start,int32_t length) const1072 UnicodeString::doIndexOf(UChar32 c,
1073                          int32_t start,
1074                          int32_t length) const {
1075   // pin indices
1076   pinIndices(start, length);
1077 
1078   // find the first occurrence of c
1079   const UChar *array = getArrayStart();
1080   const UChar *match = u_memchr32(array + start, c, length);
1081   if(match == NULL) {
1082     return -1;
1083   } else {
1084     return (int32_t)(match - array);
1085   }
1086 }
1087 
1088 int32_t
lastIndexOf(const UChar * srcChars,int32_t srcStart,int32_t srcLength,int32_t start,int32_t length) const1089 UnicodeString::lastIndexOf(const UChar *srcChars,
1090                int32_t srcStart,
1091                int32_t srcLength,
1092                int32_t start,
1093                int32_t length) const
1094 {
1095   if(isBogus() || srcChars == 0 || srcStart < 0 || srcLength == 0) {
1096     return -1;
1097   }
1098 
1099   // UnicodeString does not find empty substrings
1100   if(srcLength < 0 && srcChars[srcStart] == 0) {
1101     return -1;
1102   }
1103 
1104   // get the indices within bounds
1105   pinIndices(start, length);
1106 
1107   // find the last occurrence of the substring
1108   const UChar *array = getArrayStart();
1109   const UChar *match = u_strFindLast(array + start, length, srcChars + srcStart, srcLength);
1110   if(match == NULL) {
1111     return -1;
1112   } else {
1113     return (int32_t)(match - array);
1114   }
1115 }
1116 
1117 int32_t
doLastIndexOf(UChar c,int32_t start,int32_t length) const1118 UnicodeString::doLastIndexOf(UChar c,
1119                  int32_t start,
1120                  int32_t length) const
1121 {
1122   if(isBogus()) {
1123     return -1;
1124   }
1125 
1126   // pin indices
1127   pinIndices(start, length);
1128 
1129   // find the last occurrence of c
1130   const UChar *array = getArrayStart();
1131   const UChar *match = u_memrchr(array + start, c, length);
1132   if(match == NULL) {
1133     return -1;
1134   } else {
1135     return (int32_t)(match - array);
1136   }
1137 }
1138 
1139 int32_t
doLastIndexOf(UChar32 c,int32_t start,int32_t length) const1140 UnicodeString::doLastIndexOf(UChar32 c,
1141                              int32_t start,
1142                              int32_t length) const {
1143   // pin indices
1144   pinIndices(start, length);
1145 
1146   // find the last occurrence of c
1147   const UChar *array = getArrayStart();
1148   const UChar *match = u_memrchr32(array + start, c, length);
1149   if(match == NULL) {
1150     return -1;
1151   } else {
1152     return (int32_t)(match - array);
1153   }
1154 }
1155 
1156 //========================================
1157 // Write implementation
1158 //========================================
1159 
1160 UnicodeString&
findAndReplace(int32_t start,int32_t length,const UnicodeString & oldText,int32_t oldStart,int32_t oldLength,const UnicodeString & newText,int32_t newStart,int32_t newLength)1161 UnicodeString::findAndReplace(int32_t start,
1162                   int32_t length,
1163                   const UnicodeString& oldText,
1164                   int32_t oldStart,
1165                   int32_t oldLength,
1166                   const UnicodeString& newText,
1167                   int32_t newStart,
1168                   int32_t newLength)
1169 {
1170   if(isBogus() || oldText.isBogus() || newText.isBogus()) {
1171     return *this;
1172   }
1173 
1174   pinIndices(start, length);
1175   oldText.pinIndices(oldStart, oldLength);
1176   newText.pinIndices(newStart, newLength);
1177 
1178   if(oldLength == 0) {
1179     return *this;
1180   }
1181 
1182   while(length > 0 && length >= oldLength) {
1183     int32_t pos = indexOf(oldText, oldStart, oldLength, start, length);
1184     if(pos < 0) {
1185       // no more oldText's here: done
1186       break;
1187     } else {
1188       // we found oldText, replace it by newText and go beyond it
1189       replace(pos, oldLength, newText, newStart, newLength);
1190       length -= pos + oldLength - start;
1191       start = pos + newLength;
1192     }
1193   }
1194 
1195   return *this;
1196 }
1197 
1198 
1199 void
setToBogus()1200 UnicodeString::setToBogus()
1201 {
1202   releaseArray();
1203 
1204   fUnion.fFields.fLengthAndFlags = kIsBogus;
1205   fUnion.fFields.fArray = 0;
1206   fUnion.fFields.fCapacity = 0;
1207 }
1208 
1209 // turn a bogus string into an empty one
1210 void
unBogus()1211 UnicodeString::unBogus() {
1212   if(fUnion.fFields.fLengthAndFlags & kIsBogus) {
1213     setToEmpty();
1214   }
1215 }
1216 
1217 const char16_t *
getTerminatedBuffer()1218 UnicodeString::getTerminatedBuffer() {
1219   if(!isWritable()) {
1220     return nullptr;
1221   }
1222   UChar *array = getArrayStart();
1223   int32_t len = length();
1224   if(len < getCapacity()) {
1225     if(fUnion.fFields.fLengthAndFlags & kBufferIsReadonly) {
1226       // If len<capacity on a read-only alias, then array[len] is
1227       // either the original NUL (if constructed with (TRUE, s, length))
1228       // or one of the original string contents characters (if later truncated),
1229       // therefore we can assume that array[len] is initialized memory.
1230       if(array[len] == 0) {
1231         return array;
1232       }
1233     } else if(((fUnion.fFields.fLengthAndFlags & kRefCounted) == 0 || refCount() == 1)) {
1234       // kRefCounted: Do not write the NUL if the buffer is shared.
1235       // That is mostly safe, except when the length of one copy was modified
1236       // without copy-on-write, e.g., via truncate(newLength) or remove(void).
1237       // Then the NUL would be written into the middle of another copy's string.
1238 
1239       // Otherwise, the buffer is fully writable and it is anyway safe to write the NUL.
1240       // Do not test if there is a NUL already because it might be uninitialized memory.
1241       // (That would be safe, but tools like valgrind & Purify would complain.)
1242       array[len] = 0;
1243       return array;
1244     }
1245   }
1246   if(len<INT32_MAX && cloneArrayIfNeeded(len+1)) {
1247     array = getArrayStart();
1248     array[len] = 0;
1249     return array;
1250   } else {
1251     return nullptr;
1252   }
1253 }
1254 
1255 // setTo() analogous to the readonly-aliasing constructor with the same signature
1256 UnicodeString &
setTo(UBool isTerminated,ConstChar16Ptr textPtr,int32_t textLength)1257 UnicodeString::setTo(UBool isTerminated,
1258                      ConstChar16Ptr textPtr,
1259                      int32_t textLength)
1260 {
1261   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1262     // do not modify a string that has an "open" getBuffer(minCapacity)
1263     return *this;
1264   }
1265 
1266   const UChar *text = textPtr;
1267   if(text == NULL) {
1268     // treat as an empty string, do not alias
1269     releaseArray();
1270     setToEmpty();
1271     return *this;
1272   }
1273 
1274   if( textLength < -1 ||
1275       (textLength == -1 && !isTerminated) ||
1276       (textLength >= 0 && isTerminated && text[textLength] != 0)
1277   ) {
1278     setToBogus();
1279     return *this;
1280   }
1281 
1282   releaseArray();
1283 
1284   if(textLength == -1) {
1285     // text is terminated, or else it would have failed the above test
1286     textLength = u_strlen(text);
1287   }
1288   fUnion.fFields.fLengthAndFlags = kReadonlyAlias;
1289   setArray((UChar *)text, textLength, isTerminated ? textLength + 1 : textLength);
1290   return *this;
1291 }
1292 
1293 // setTo() analogous to the writable-aliasing constructor with the same signature
1294 UnicodeString &
setTo(UChar * buffer,int32_t buffLength,int32_t buffCapacity)1295 UnicodeString::setTo(UChar *buffer,
1296                      int32_t buffLength,
1297                      int32_t buffCapacity) {
1298   if(fUnion.fFields.fLengthAndFlags & kOpenGetBuffer) {
1299     // do not modify a string that has an "open" getBuffer(minCapacity)
1300     return *this;
1301   }
1302 
1303   if(buffer == NULL) {
1304     // treat as an empty string, do not alias
1305     releaseArray();
1306     setToEmpty();
1307     return *this;
1308   }
1309 
1310   if(buffLength < -1 || buffCapacity < 0 || buffLength > buffCapacity) {
1311     setToBogus();
1312     return *this;
1313   } else if(buffLength == -1) {
1314     // buffLength = u_strlen(buff); but do not look beyond buffCapacity
1315     const UChar *p = buffer, *limit = buffer + buffCapacity;
1316     while(p != limit && *p != 0) {
1317       ++p;
1318     }
1319     buffLength = (int32_t)(p - buffer);
1320   }
1321 
1322   releaseArray();
1323 
1324   fUnion.fFields.fLengthAndFlags = kWritableAlias;
1325   setArray(buffer, buffLength, buffCapacity);
1326   return *this;
1327 }
1328 
setToUTF8(StringPiece utf8)1329 UnicodeString &UnicodeString::setToUTF8(StringPiece utf8) {
1330   unBogus();
1331   int32_t length = utf8.length();
1332   int32_t capacity;
1333   // The UTF-16 string will be at most as long as the UTF-8 string.
1334   if(length <= US_STACKBUF_SIZE) {
1335     capacity = US_STACKBUF_SIZE;
1336   } else {
1337     capacity = length + 1;  // +1 for the terminating NUL.
1338   }
1339   UChar *utf16 = getBuffer(capacity);
1340   int32_t length16;
1341   UErrorCode errorCode = U_ZERO_ERROR;
1342   u_strFromUTF8WithSub(utf16, getCapacity(), &length16,
1343       utf8.data(), length,
1344       0xfffd,  // Substitution character.
1345       NULL,    // Don't care about number of substitutions.
1346       &errorCode);
1347   releaseBuffer(length16);
1348   if(U_FAILURE(errorCode)) {
1349     setToBogus();
1350   }
1351   return *this;
1352 }
1353 
1354 UnicodeString&
setCharAt(int32_t offset,UChar c)1355 UnicodeString::setCharAt(int32_t offset,
1356              UChar c)
1357 {
1358   int32_t len = length();
1359   if(cloneArrayIfNeeded() && len > 0) {
1360     if(offset < 0) {
1361       offset = 0;
1362     } else if(offset >= len) {
1363       offset = len - 1;
1364     }
1365 
1366     getArrayStart()[offset] = c;
1367   }
1368   return *this;
1369 }
1370 
1371 UnicodeString&
replace(int32_t start,int32_t _length,UChar32 srcChar)1372 UnicodeString::replace(int32_t start,
1373                int32_t _length,
1374                UChar32 srcChar) {
1375   UChar buffer[U16_MAX_LENGTH];
1376   int32_t count = 0;
1377   UBool isError = FALSE;
1378   U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
1379   // We test isError so that the compiler does not complain that we don't.
1380   // If isError (srcChar is not a valid code point) then count==0 which means
1381   // we remove the source segment rather than replacing it with srcChar.
1382   return doReplace(start, _length, buffer, 0, isError ? 0 : count);
1383 }
1384 
1385 UnicodeString&
append(UChar32 srcChar)1386 UnicodeString::append(UChar32 srcChar) {
1387   UChar buffer[U16_MAX_LENGTH];
1388   int32_t _length = 0;
1389   UBool isError = FALSE;
1390   U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
1391   // We test isError so that the compiler does not complain that we don't.
1392   // If isError then _length==0 which turns the doAppend() into a no-op anyway.
1393   return isError ? *this : doAppend(buffer, 0, _length);
1394 }
1395 
1396 UnicodeString&
doReplace(int32_t start,int32_t length,const UnicodeString & src,int32_t srcStart,int32_t srcLength)1397 UnicodeString::doReplace( int32_t start,
1398               int32_t length,
1399               const UnicodeString& src,
1400               int32_t srcStart,
1401               int32_t srcLength)
1402 {
1403   // pin the indices to legal values
1404   src.pinIndices(srcStart, srcLength);
1405 
1406   // get the characters from src
1407   // and replace the range in ourselves with them
1408   return doReplace(start, length, src.getArrayStart(), srcStart, srcLength);
1409 }
1410 
1411 UnicodeString&
doReplace(int32_t start,int32_t length,const UChar * srcChars,int32_t srcStart,int32_t srcLength)1412 UnicodeString::doReplace(int32_t start,
1413              int32_t length,
1414              const UChar *srcChars,
1415              int32_t srcStart,
1416              int32_t srcLength)
1417 {
1418   if(!isWritable()) {
1419     return *this;
1420   }
1421 
1422   int32_t oldLength = this->length();
1423 
1424   // optimize (read-only alias).remove(0, start) and .remove(start, end)
1425   if((fUnion.fFields.fLengthAndFlags&kBufferIsReadonly) && srcLength == 0) {
1426     if(start == 0) {
1427       // remove prefix by adjusting the array pointer
1428       pinIndex(length);
1429       fUnion.fFields.fArray += length;
1430       fUnion.fFields.fCapacity -= length;
1431       setLength(oldLength - length);
1432       return *this;
1433     } else {
1434       pinIndex(start);
1435       if(length >= (oldLength - start)) {
1436         // remove suffix by reducing the length (like truncate())
1437         setLength(start);
1438         fUnion.fFields.fCapacity = start;  // not NUL-terminated any more
1439         return *this;
1440       }
1441     }
1442   }
1443 
1444   if(start == oldLength) {
1445     return doAppend(srcChars, srcStart, srcLength);
1446   }
1447 
1448   if(srcChars == 0) {
1449     srcLength = 0;
1450   } else {
1451     // Perform all remaining operations relative to srcChars + srcStart.
1452     // From this point forward, do not use srcStart.
1453     srcChars += srcStart;
1454     if (srcLength < 0) {
1455       // get the srcLength if necessary
1456       srcLength = u_strlen(srcChars);
1457     }
1458   }
1459 
1460   // pin the indices to legal values
1461   pinIndices(start, length);
1462 
1463   // Calculate the size of the string after the replace.
1464   // Avoid int32_t overflow.
1465   int32_t newLength = oldLength - length;
1466   if(srcLength > (INT32_MAX - newLength)) {
1467     setToBogus();
1468     return *this;
1469   }
1470   newLength += srcLength;
1471 
1472   // Check for insertion into ourself
1473   const UChar *oldArray = getArrayStart();
1474   if (isBufferWritable() &&
1475       oldArray < srcChars + srcLength &&
1476       srcChars < oldArray + oldLength) {
1477     // Copy into a new UnicodeString and start over
1478     UnicodeString copy(srcChars, srcLength);
1479     if (copy.isBogus()) {
1480       setToBogus();
1481       return *this;
1482     }
1483     return doReplace(start, length, copy.getArrayStart(), 0, srcLength);
1484   }
1485 
1486   // cloneArrayIfNeeded(doCopyArray=FALSE) may change fArray but will not copy the current contents;
1487   // therefore we need to keep the current fArray
1488   UChar oldStackBuffer[US_STACKBUF_SIZE];
1489   if((fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) && (newLength > US_STACKBUF_SIZE)) {
1490     // copy the stack buffer contents because it will be overwritten with
1491     // fUnion.fFields values
1492     u_memcpy(oldStackBuffer, oldArray, oldLength);
1493     oldArray = oldStackBuffer;
1494   }
1495 
1496   // clone our array and allocate a bigger array if needed
1497   int32_t *bufferToDelete = 0;
1498   if(!cloneArrayIfNeeded(newLength, getGrowCapacity(newLength),
1499                          FALSE, &bufferToDelete)
1500   ) {
1501     return *this;
1502   }
1503 
1504   // now do the replace
1505 
1506   UChar *newArray = getArrayStart();
1507   if(newArray != oldArray) {
1508     // if fArray changed, then we need to copy everything except what will change
1509     us_arrayCopy(oldArray, 0, newArray, 0, start);
1510     us_arrayCopy(oldArray, start + length,
1511                  newArray, start + srcLength,
1512                  oldLength - (start + length));
1513   } else if(length != srcLength) {
1514     // fArray did not change; copy only the portion that isn't changing, leaving a hole
1515     us_arrayCopy(oldArray, start + length,
1516                  newArray, start + srcLength,
1517                  oldLength - (start + length));
1518   }
1519 
1520   // now fill in the hole with the new string
1521   us_arrayCopy(srcChars, 0, newArray, start, srcLength);
1522 
1523   setLength(newLength);
1524 
1525   // delayed delete in case srcChars == fArray when we started, and
1526   // to keep oldArray alive for the above operations
1527   if (bufferToDelete) {
1528     uprv_free(bufferToDelete);
1529   }
1530 
1531   return *this;
1532 }
1533 
1534 // Versions of doReplace() only for append() variants.
1535 // doReplace() and doAppend() optimize for different cases.
1536 
1537 UnicodeString&
doAppend(const UnicodeString & src,int32_t srcStart,int32_t srcLength)1538 UnicodeString::doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength) {
1539   if(srcLength == 0) {
1540     return *this;
1541   }
1542 
1543   // pin the indices to legal values
1544   src.pinIndices(srcStart, srcLength);
1545   return doAppend(src.getArrayStart(), srcStart, srcLength);
1546 }
1547 
1548 UnicodeString&
doAppend(const UChar * srcChars,int32_t srcStart,int32_t srcLength)1549 UnicodeString::doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength) {
1550   if(!isWritable() || srcLength == 0 || srcChars == NULL) {
1551     return *this;
1552   }
1553 
1554   // Perform all remaining operations relative to srcChars + srcStart.
1555   // From this point forward, do not use srcStart.
1556   srcChars += srcStart;
1557 
1558   if(srcLength < 0) {
1559     // get the srcLength if necessary
1560     if((srcLength = u_strlen(srcChars)) == 0) {
1561       return *this;
1562     }
1563   }
1564 
1565   int32_t oldLength = length();
1566   int32_t newLength;
1567   if (uprv_add32_overflow(oldLength, srcLength, &newLength)) {
1568     setToBogus();
1569     return *this;
1570   }
1571 
1572   // Check for append onto ourself
1573   const UChar* oldArray = getArrayStart();
1574   if (isBufferWritable() &&
1575       oldArray < srcChars + srcLength &&
1576       srcChars < oldArray + oldLength) {
1577     // Copy into a new UnicodeString and start over
1578     UnicodeString copy(srcChars, srcLength);
1579     if (copy.isBogus()) {
1580       setToBogus();
1581       return *this;
1582     }
1583     return doAppend(copy.getArrayStart(), 0, srcLength);
1584   }
1585 
1586   // optimize append() onto a large-enough, owned string
1587   if((newLength <= getCapacity() && isBufferWritable()) ||
1588       cloneArrayIfNeeded(newLength, getGrowCapacity(newLength))) {
1589     UChar *newArray = getArrayStart();
1590     // Do not copy characters when
1591     //   UChar *buffer=str.getAppendBuffer(...);
1592     // is followed by
1593     //   str.append(buffer, length);
1594     // or
1595     //   str.appendString(buffer, length)
1596     // or similar.
1597     if(srcChars != newArray + oldLength) {
1598       us_arrayCopy(srcChars, 0, newArray, oldLength, srcLength);
1599     }
1600     setLength(newLength);
1601   }
1602   return *this;
1603 }
1604 
1605 /**
1606  * Replaceable API
1607  */
1608 void
handleReplaceBetween(int32_t start,int32_t limit,const UnicodeString & text)1609 UnicodeString::handleReplaceBetween(int32_t start,
1610                                     int32_t limit,
1611                                     const UnicodeString& text) {
1612     replaceBetween(start, limit, text);
1613 }
1614 
1615 /**
1616  * Replaceable API
1617  */
1618 void
copy(int32_t start,int32_t limit,int32_t dest)1619 UnicodeString::copy(int32_t start, int32_t limit, int32_t dest) {
1620     if (limit <= start) {
1621         return; // Nothing to do; avoid bogus malloc call
1622     }
1623     UChar* text = (UChar*) uprv_malloc( sizeof(UChar) * (limit - start) );
1624     // Check to make sure text is not null.
1625     if (text != NULL) {
1626 	    extractBetween(start, limit, text, 0);
1627 	    insert(dest, text, 0, limit - start);
1628 	    uprv_free(text);
1629     }
1630 }
1631 
1632 /**
1633  * Replaceable API
1634  *
1635  * NOTE: This is for the Replaceable class.  There is no rep.cpp,
1636  * so we implement this function here.
1637  */
hasMetaData() const1638 UBool Replaceable::hasMetaData() const {
1639     return TRUE;
1640 }
1641 
1642 /**
1643  * Replaceable API
1644  */
hasMetaData() const1645 UBool UnicodeString::hasMetaData() const {
1646     return FALSE;
1647 }
1648 
1649 UnicodeString&
doReverse(int32_t start,int32_t length)1650 UnicodeString::doReverse(int32_t start, int32_t length) {
1651   if(length <= 1 || !cloneArrayIfNeeded()) {
1652     return *this;
1653   }
1654 
1655   // pin the indices to legal values
1656   pinIndices(start, length);
1657   if(length <= 1) {  // pinIndices() might have shrunk the length
1658     return *this;
1659   }
1660 
1661   UChar *left = getArrayStart() + start;
1662   UChar *right = left + length - 1;  // -1 for inclusive boundary (length>=2)
1663   UChar swap;
1664   UBool hasSupplementary = FALSE;
1665 
1666   // Before the loop we know left<right because length>=2.
1667   do {
1668     hasSupplementary |= (UBool)U16_IS_LEAD(swap = *left);
1669     hasSupplementary |= (UBool)U16_IS_LEAD(*left++ = *right);
1670     *right-- = swap;
1671   } while(left < right);
1672   // Make sure to test the middle code unit of an odd-length string.
1673   // Redundant if the length is even.
1674   hasSupplementary |= (UBool)U16_IS_LEAD(*left);
1675 
1676   /* if there are supplementary code points in the reversed range, then re-swap their surrogates */
1677   if(hasSupplementary) {
1678     UChar swap2;
1679 
1680     left = getArrayStart() + start;
1681     right = left + length - 1; // -1 so that we can look at *(left+1) if left<right
1682     while(left < right) {
1683       if(U16_IS_TRAIL(swap = *left) && U16_IS_LEAD(swap2 = *(left + 1))) {
1684         *left++ = swap2;
1685         *left++ = swap;
1686       } else {
1687         ++left;
1688       }
1689     }
1690   }
1691 
1692   return *this;
1693 }
1694 
1695 UBool
padLeading(int32_t targetLength,UChar padChar)1696 UnicodeString::padLeading(int32_t targetLength,
1697                           UChar padChar)
1698 {
1699   int32_t oldLength = length();
1700   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1701     return FALSE;
1702   } else {
1703     // move contents up by padding width
1704     UChar *array = getArrayStart();
1705     int32_t start = targetLength - oldLength;
1706     us_arrayCopy(array, 0, array, start, oldLength);
1707 
1708     // fill in padding character
1709     while(--start >= 0) {
1710       array[start] = padChar;
1711     }
1712     setLength(targetLength);
1713     return TRUE;
1714   }
1715 }
1716 
1717 UBool
padTrailing(int32_t targetLength,UChar padChar)1718 UnicodeString::padTrailing(int32_t targetLength,
1719                            UChar padChar)
1720 {
1721   int32_t oldLength = length();
1722   if(oldLength >= targetLength || !cloneArrayIfNeeded(targetLength)) {
1723     return FALSE;
1724   } else {
1725     // fill in padding character
1726     UChar *array = getArrayStart();
1727     int32_t length = targetLength;
1728     while(--length >= oldLength) {
1729       array[length] = padChar;
1730     }
1731     setLength(targetLength);
1732     return TRUE;
1733   }
1734 }
1735 
1736 //========================================
1737 // Hashing
1738 //========================================
1739 int32_t
doHashCode() const1740 UnicodeString::doHashCode() const
1741 {
1742     /* Delegate hash computation to uhash.  This makes UnicodeString
1743      * hashing consistent with UChar* hashing.  */
1744     int32_t hashCode = ustr_hashUCharsN(getArrayStart(), length());
1745     if (hashCode == kInvalidHashCode) {
1746         hashCode = kEmptyHashCode;
1747     }
1748     return hashCode;
1749 }
1750 
1751 //========================================
1752 // External Buffer
1753 //========================================
1754 
1755 char16_t *
getBuffer(int32_t minCapacity)1756 UnicodeString::getBuffer(int32_t minCapacity) {
1757   if(minCapacity>=-1 && cloneArrayIfNeeded(minCapacity)) {
1758     fUnion.fFields.fLengthAndFlags|=kOpenGetBuffer;
1759     setZeroLength();
1760     return getArrayStart();
1761   } else {
1762     return nullptr;
1763   }
1764 }
1765 
1766 void
releaseBuffer(int32_t newLength)1767 UnicodeString::releaseBuffer(int32_t newLength) {
1768   if(fUnion.fFields.fLengthAndFlags&kOpenGetBuffer && newLength>=-1) {
1769     // set the new fLength
1770     int32_t capacity=getCapacity();
1771     if(newLength==-1) {
1772       // the new length is the string length, capped by fCapacity
1773       const UChar *array=getArrayStart(), *p=array, *limit=array+capacity;
1774       while(p<limit && *p!=0) {
1775         ++p;
1776       }
1777       newLength=(int32_t)(p-array);
1778     } else if(newLength>capacity) {
1779       newLength=capacity;
1780     }
1781     setLength(newLength);
1782     fUnion.fFields.fLengthAndFlags&=~kOpenGetBuffer;
1783   }
1784 }
1785 
1786 //========================================
1787 // Miscellaneous
1788 //========================================
1789 UBool
cloneArrayIfNeeded(int32_t newCapacity,int32_t growCapacity,UBool doCopyArray,int32_t ** pBufferToDelete,UBool forceClone)1790 UnicodeString::cloneArrayIfNeeded(int32_t newCapacity,
1791                                   int32_t growCapacity,
1792                                   UBool doCopyArray,
1793                                   int32_t **pBufferToDelete,
1794                                   UBool forceClone) {
1795   // default parameters need to be static, therefore
1796   // the defaults are -1 to have convenience defaults
1797   if(newCapacity == -1) {
1798     newCapacity = getCapacity();
1799   }
1800 
1801   // while a getBuffer(minCapacity) is "open",
1802   // prevent any modifications of the string by returning FALSE here
1803   // if the string is bogus, then only an assignment or similar can revive it
1804   if(!isWritable()) {
1805     return FALSE;
1806   }
1807 
1808   /*
1809    * We need to make a copy of the array if
1810    * the buffer is read-only, or
1811    * the buffer is refCounted (shared), and refCount>1, or
1812    * the buffer is too small.
1813    * Return FALSE if memory could not be allocated.
1814    */
1815   if(forceClone ||
1816      fUnion.fFields.fLengthAndFlags & kBufferIsReadonly ||
1817      (fUnion.fFields.fLengthAndFlags & kRefCounted && refCount() > 1) ||
1818      newCapacity > getCapacity()
1819   ) {
1820     // check growCapacity for default value and use of the stack buffer
1821     if(growCapacity < 0) {
1822       growCapacity = newCapacity;
1823     } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) {
1824       growCapacity = US_STACKBUF_SIZE;
1825     }
1826 
1827     // save old values
1828     UChar oldStackBuffer[US_STACKBUF_SIZE];
1829     UChar *oldArray;
1830     int32_t oldLength = length();
1831     int16_t flags = fUnion.fFields.fLengthAndFlags;
1832 
1833     if(flags&kUsingStackBuffer) {
1834       U_ASSERT(!(flags&kRefCounted)); /* kRefCounted and kUsingStackBuffer are mutally exclusive */
1835       if(doCopyArray && growCapacity > US_STACKBUF_SIZE) {
1836         // copy the stack buffer contents because it will be overwritten with
1837         // fUnion.fFields values
1838         us_arrayCopy(fUnion.fStackFields.fBuffer, 0, oldStackBuffer, 0, oldLength);
1839         oldArray = oldStackBuffer;
1840       } else {
1841         oldArray = NULL; // no need to copy from the stack buffer to itself
1842       }
1843     } else {
1844       oldArray = fUnion.fFields.fArray;
1845       U_ASSERT(oldArray!=NULL); /* when stack buffer is not used, oldArray must have a non-NULL reference */
1846     }
1847 
1848     // allocate a new array
1849     if(allocate(growCapacity) ||
1850        (newCapacity < growCapacity && allocate(newCapacity))
1851     ) {
1852       if(doCopyArray) {
1853         // copy the contents
1854         // do not copy more than what fits - it may be smaller than before
1855         int32_t minLength = oldLength;
1856         newCapacity = getCapacity();
1857         if(newCapacity < minLength) {
1858           minLength = newCapacity;
1859         }
1860         if(oldArray != NULL) {
1861           us_arrayCopy(oldArray, 0, getArrayStart(), 0, minLength);
1862         }
1863         setLength(minLength);
1864       } else {
1865         setZeroLength();
1866       }
1867 
1868       // release the old array
1869       if(flags & kRefCounted) {
1870         // the array is refCounted; decrement and release if 0
1871         u_atomic_int32_t *pRefCount = ((u_atomic_int32_t *)oldArray - 1);
1872         if(umtx_atomic_dec(pRefCount) == 0) {
1873           if(pBufferToDelete == 0) {
1874               // Note: cast to (void *) is needed with MSVC, where u_atomic_int32_t
1875               // is defined as volatile. (Volatile has useful non-standard behavior
1876               //   with this compiler.)
1877             uprv_free((void *)pRefCount);
1878           } else {
1879             // the caller requested to delete it himself
1880             *pBufferToDelete = (int32_t *)pRefCount;
1881           }
1882         }
1883       }
1884     } else {
1885       // not enough memory for growCapacity and not even for the smaller newCapacity
1886       // reset the old values for setToBogus() to release the array
1887       if(!(flags&kUsingStackBuffer)) {
1888         fUnion.fFields.fArray = oldArray;
1889       }
1890       fUnion.fFields.fLengthAndFlags = flags;
1891       setToBogus();
1892       return FALSE;
1893     }
1894   }
1895   return TRUE;
1896 }
1897 
1898 // UnicodeStringAppendable ------------------------------------------------- ***
1899 
~UnicodeStringAppendable()1900 UnicodeStringAppendable::~UnicodeStringAppendable() {}
1901 
1902 UBool
appendCodeUnit(UChar c)1903 UnicodeStringAppendable::appendCodeUnit(UChar c) {
1904   return str.doAppend(&c, 0, 1).isWritable();
1905 }
1906 
1907 UBool
appendCodePoint(UChar32 c)1908 UnicodeStringAppendable::appendCodePoint(UChar32 c) {
1909   UChar buffer[U16_MAX_LENGTH];
1910   int32_t cLength = 0;
1911   UBool isError = FALSE;
1912   U16_APPEND(buffer, cLength, U16_MAX_LENGTH, c, isError);
1913   return !isError && str.doAppend(buffer, 0, cLength).isWritable();
1914 }
1915 
1916 UBool
appendString(const UChar * s,int32_t length)1917 UnicodeStringAppendable::appendString(const UChar *s, int32_t length) {
1918   return str.doAppend(s, 0, length).isWritable();
1919 }
1920 
1921 UBool
reserveAppendCapacity(int32_t appendCapacity)1922 UnicodeStringAppendable::reserveAppendCapacity(int32_t appendCapacity) {
1923   return str.cloneArrayIfNeeded(str.length() + appendCapacity);
1924 }
1925 
1926 UChar *
getAppendBuffer(int32_t minCapacity,int32_t desiredCapacityHint,UChar * scratch,int32_t scratchCapacity,int32_t * resultCapacity)1927 UnicodeStringAppendable::getAppendBuffer(int32_t minCapacity,
1928                                          int32_t desiredCapacityHint,
1929                                          UChar *scratch, int32_t scratchCapacity,
1930                                          int32_t *resultCapacity) {
1931   if(minCapacity < 1 || scratchCapacity < minCapacity) {
1932     *resultCapacity = 0;
1933     return NULL;
1934   }
1935   int32_t oldLength = str.length();
1936   if(minCapacity <= (kMaxCapacity - oldLength) &&
1937       desiredCapacityHint <= (kMaxCapacity - oldLength) &&
1938       str.cloneArrayIfNeeded(oldLength + minCapacity, oldLength + desiredCapacityHint)) {
1939     *resultCapacity = str.getCapacity() - oldLength;
1940     return str.getArrayStart() + oldLength;
1941   }
1942   *resultCapacity = scratchCapacity;
1943   return scratch;
1944 }
1945 
1946 U_NAMESPACE_END
1947 
1948 U_NAMESPACE_USE
1949 
1950 U_CAPI int32_t U_EXPORT2
uhash_hashUnicodeString(const UElement key)1951 uhash_hashUnicodeString(const UElement key) {
1952     const UnicodeString *str = (const UnicodeString*) key.pointer;
1953     return (str == NULL) ? 0 : str->hashCode();
1954 }
1955 
1956 // Moved here from uhash_us.cpp so that using a UVector of UnicodeString*
1957 // does not depend on hashtable code.
1958 U_CAPI UBool U_EXPORT2
uhash_compareUnicodeString(const UElement key1,const UElement key2)1959 uhash_compareUnicodeString(const UElement key1, const UElement key2) {
1960     const UnicodeString *str1 = (const UnicodeString*) key1.pointer;
1961     const UnicodeString *str2 = (const UnicodeString*) key2.pointer;
1962     if (str1 == str2) {
1963         return TRUE;
1964     }
1965     if (str1 == NULL || str2 == NULL) {
1966         return FALSE;
1967     }
1968     return *str1 == *str2;
1969 }
1970 
1971 #ifdef U_STATIC_IMPLEMENTATION
1972 /*
1973 This should never be called. It is defined here to make sure that the
1974 virtual vector deleting destructor is defined within unistr.cpp.
1975 The vector deleting destructor is already a part of UObject,
1976 but defining it here makes sure that it is included with this object file.
1977 This makes sure that static library dependencies are kept to a minimum.
1978 */
uprv_UnicodeStringDummy(void)1979 static void uprv_UnicodeStringDummy(void) {
1980     delete [] (new UnicodeString[2]);
1981 }
1982 #endif
1983