• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include <utility>
10 
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24 
25 #if 0
26 #include "unicode/ustream.h"
27 
28 #include <iostream>
29 using namespace std;
30 
31 #endif
32 
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34 
35 extern IntlTest *createStringCaseTest();
36 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39     if (exec) logln("TestSuite UnicodeStringTest: ");
40     TESTCASE_AUTO_BEGIN;
41     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42     TESTCASE_AUTO(TestBasicManipulation);
43     TESTCASE_AUTO(TestCompare);
44     TESTCASE_AUTO(TestExtract);
45     TESTCASE_AUTO(TestRemoveReplace);
46     TESTCASE_AUTO(TestSearching);
47     TESTCASE_AUTO(TestSpacePadding);
48     TESTCASE_AUTO(TestPrefixAndSuffix);
49     TESTCASE_AUTO(TestFindAndReplace);
50     TESTCASE_AUTO(TestBogus);
51     TESTCASE_AUTO(TestReverse);
52     TESTCASE_AUTO(TestMiscellaneous);
53     TESTCASE_AUTO(TestStackAllocation);
54     TESTCASE_AUTO(TestUnescape);
55     TESTCASE_AUTO(TestCountChar32);
56     TESTCASE_AUTO(TestStringEnumeration);
57     TESTCASE_AUTO(TestNameSpace);
58     TESTCASE_AUTO(TestUTF32);
59     TESTCASE_AUTO(TestUTF8);
60     TESTCASE_AUTO(TestReadOnlyAlias);
61     TESTCASE_AUTO(TestAppendable);
62     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63     TESTCASE_AUTO(TestSizeofUnicodeString);
64     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65     TESTCASE_AUTO(TestMoveSwap);
66     TESTCASE_AUTO(TestUInt16Pointers);
67     TESTCASE_AUTO(TestWCharPointers);
68     TESTCASE_AUTO(TestNullPointers);
69     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70     TESTCASE_AUTO(TestLargeAppend);
71     /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 begin */
72     TESTCASE_AUTO(TestLargeMemory);
73     /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 end */
74     TESTCASE_AUTO_END;
75 }
76 
77 void
TestBasicManipulation()78 UnicodeStringTest::TestBasicManipulation()
79 {
80     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
81     UnicodeString   expectedValue;
82     UnicodeString   *c;
83 
84     c=test1.clone();
85     test1.insert(24, "good ");
86     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
87     if (test1 != expectedValue)
88         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
89 
90     c->insert(24, "good ");
91     if(*c != expectedValue) {
92         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
93     }
94     delete c;
95 
96     test1.remove(41, 8);
97     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
98     if (test1 != expectedValue)
99         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
100 
101     test1.replace(58, 6, "ir country");
102     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
103     if (test1 != expectedValue)
104         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
105 
106     UChar     temp[80];
107     test1.extract(0, 15, temp);
108 
109     UnicodeString       test2(temp, 15);
110 
111     expectedValue = "Now is the time";
112     if (test2 != expectedValue)
113         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
114 
115     test2 += " for me to go!\n";
116     expectedValue = "Now is the time for me to go!\n";
117     if (test2 != expectedValue)
118         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
119 
120     if (test1.length() != 70)
121         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
122     if (test2.length() != 30)
123         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
124 
125     UnicodeString test3;
126     test3.append((UChar32)0x20402);
127     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
128         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
129     }
130     if(test3.length() != 2){
131         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
132     }
133     test3.append((UChar32)0x0074);
134     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
135         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
136     }
137     if(test3.length() != 3){
138         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
139     }
140 
141     // test some UChar32 overloads
142     if( test3.setTo((UChar32)0x10330).length() != 2 ||
143         test3.insert(0, (UChar32)0x20100).length() != 4 ||
144         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
145         (test3 = (UChar32)0x14001).length() != 2
146     ) {
147         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
148     }
149 
150     {
151         // test moveIndex32()
152         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
153 
154         if(
155             s.moveIndex32(2, -1)!=0 ||
156             s.moveIndex32(2, 1)!=4 ||
157             s.moveIndex32(2, 2)!=5 ||
158             s.moveIndex32(5, -2)!=2 ||
159             s.moveIndex32(0, -1)!=0 ||
160             s.moveIndex32(6, 1)!=6
161         ) {
162             errln("UnicodeString::moveIndex32() failed");
163         }
164 
165         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
166             errln("UnicodeString::getChar32Start() failed");
167         }
168 
169         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
170             errln("UnicodeString::getChar32Limit() failed");
171         }
172     }
173 
174     {
175         // test new 2.2 constructors and setTo function that parallel Java's substring function.
176         UnicodeString src("Hello folks how are you?");
177         UnicodeString target1("how are you?");
178         if (target1 != UnicodeString(src, 12)) {
179             errln("UnicodeString(const UnicodeString&, int32_t) failed");
180         }
181         UnicodeString target2("folks");
182         if (target2 != UnicodeString(src, 6, 5)) {
183             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
184         }
185         if (target1 != target2.setTo(src, 12)) {
186             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
187         }
188     }
189 
190     {
191         // op+ is new in ICU 2.8
192         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
193         if(s!=UnicodeString("abcdefghi", "")) {
194             errln("operator+(UniStr, UniStr) failed");
195         }
196     }
197 
198     {
199         // tests for Jitterbug 2360
200         // verify that APIs with source pointer + length accept length == -1
201         // mostly test only where modified, only few functions did not already do this
202         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
203             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
204         }
205 
206         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
207         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
208 
209         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
210             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
211         }
212         if(t.length()!=u_strlen(buffer)) {
213             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
214         }
215 
216         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
217             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
218         }
219         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
220             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
221         }
222 
223         buffer[u_strlen(buffer)]=0xe4;
224         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
225         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
226             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
227         }
228         if(u.length()!=UPRV_LENGTHOF(buffer)) {
229             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
230         }
231 
232         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
233         UConverter *cnv;
234         UErrorCode errorCode=U_ZERO_ERROR;
235 
236         cnv=ucnv_open("ISO-8859-1", &errorCode);
237         UnicodeString v(cs, -1, cnv, errorCode);
238         ucnv_close(cnv);
239         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
240             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
241         }
242     }
243 
244 #if U_CHARSET_IS_UTF8
245     {
246         // Test the hardcoded-UTF-8 UnicodeString optimizations.
247         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
248         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
249         UnicodeString from8a = UnicodeString((const char *)utf8);
250         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
251         UnicodeString from16(false, utf16, UPRV_LENGTHOF(utf16));
252         if(from8a != from16 || from8b != from16) {
253             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
254         }
255         char buffer[16];
256         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
257         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
258             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
259         }
260         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
261         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
262             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
263         }
264     }
265 #endif
266 }
267 
268 void
TestCompare()269 UnicodeStringTest::TestCompare()
270 {
271     UnicodeString   test1("this is a test");
272     UnicodeString   test2("this is a test");
273     UnicodeString   test3("this is a test of the emergency broadcast system");
274     UnicodeString   test4("never say, \"this is a test\"!!");
275 
276     UnicodeString   test5((UChar)0x5000);
277     UnicodeString   test6((UChar)0x5100);
278 
279     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
280                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
281     char            chars[] = "this is a test";
282 
283     // test operator== and operator!=
284     if (test1 != test2 || test1 == test3 || test1 == test4)
285         errln("operator== or operator!= failed");
286 
287     // test operator> and operator<
288     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
289         !(test5 < test6)
290     ) {
291         errln("operator> or operator< failed");
292     }
293 
294     // test operator>= and operator<=
295     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
296         errln("operator>= or operator<= failed");
297 
298     // test compare(UnicodeString)
299     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
300         errln("compare(UnicodeString) failed");
301 
302     //test compare(offset, length, UnicodeString)
303     if(test1.compare(0, 14, test2) != 0 ||
304         test3.compare(0, 14, test2) != 0 ||
305         test4.compare(12, 14, test2) != 0 ||
306         test3.compare(0, 18, test1) <=0  )
307         errln("compare(offset, length, UnicodeString) fails");
308 
309     // test compare(UChar*)
310     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
311         errln("compare(UChar*) failed");
312 
313     // test compare(char*)
314     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
315         errln("compare(char*) failed");
316 
317     // test compare(UChar*, length)
318     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
319         errln("compare(UChar*, length) failed");
320 
321     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
322     if (test1.compare(0, 14, test2, 0, 14) != 0
323     || test1.compare(0, 14, test3, 0, 14) != 0
324     || test1.compare(0, 14, test4, 12, 14) != 0)
325         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
326 
327     if (test1.compare(10, 4, test2, 0, 4) >= 0
328     || test1.compare(10, 4, test3, 22, 9) <= 0
329     || test1.compare(10, 4, test4, 22, 4) != 0)
330         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
331 
332     // test compareBetween
333     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
334                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
335         errln("compareBetween failed");
336 
337     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
338                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
339         errln("compareBetween failed");
340 
341     // test compare() etc. with strings that share a buffer but are not equal
342     test2=test1; // share the buffer, length() too large for the stackBuffer
343     test2.truncate(1); // change only the length, not the buffer
344     if( test1==test2 || test1<=test2 ||
345         test1.compare(test2)<=0 ||
346         test1.compareCodePointOrder(test2)<=0 ||
347         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
348         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
349         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
350         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
351     ) {
352         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
353     }
354 
355     /* test compareCodePointOrder() */
356     {
357         /* these strings are in ascending order */
358         static const UChar strings[][4]={
359             { 0x61, 0 },                    /* U+0061 */
360             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
361             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
362             { 0xd800, 0 },                  /* U+d800 */
363             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
364             { 0xdfff, 0 },                  /* U+dfff */
365             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
366             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
367             { 0xd800, 0xdc02, 0 },          /* U+10002 */
368             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
369         };
370         UnicodeString u[20]; // must be at least as long as strings[]
371         int32_t i;
372 
373         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
374             u[i]=UnicodeString(true, strings[i], -1);
375         }
376 
377         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
378             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
379                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
380             }
381         }
382     }
383 
384     /* test caseCompare() */
385     {
386         static const UChar
387         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
388         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
389         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
390         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
391 
392         UnicodeString
393             mixed(true, _mixed, -1),
394             otherDefault(true, _otherDefault, -1),
395             otherExcludeSpecialI(true, _otherExcludeSpecialI, -1),
396             different(true, _different, -1);
397 
398         int8_t result;
399 
400         /* test caseCompare() */
401         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
402         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
403             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
404         }
405         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
406         if(result!=0) {
407             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
408         }
409         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
410         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
411             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
412         }
413 
414         /* test caseCompare() */
415         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
416         if(result<=0) {
417             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
418         }
419 
420         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
421         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
422         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
423             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
424         }
425 
426         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
427         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
428         if(result<=0) {
429             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
430         }
431     }
432 
433     // test that srcLength=-1 is handled in functions that
434     // take input const UChar */int32_t srcLength (j785)
435     {
436         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
437         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
438 
439         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
440             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
441         }
442 
443         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
444             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
445         }
446 
447         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
448             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
449         }
450 
451         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
452             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
453         }
454 
455         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
456             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
457         }
458 
459         UnicodeString s2, s3;
460         s2.replace(0, 0, u+1, -1);
461         s3.replace(0, 0, u, 1, -1);
462         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
463             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
464         }
465     }
466 }
467 
468 void
TestExtract()469 UnicodeStringTest::TestExtract()
470 {
471     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
472     UnicodeString  test2;
473     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
475     UnicodeString  test5;
476     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
477 
478     test1.extract(11, 12, test2);
479     test1.extract(11, 12, test3);
480     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
481         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
482     }
483 
484     // test proper pinning in extractBetween()
485     test1.extractBetween(-3, 7, test5);
486     if(test5!=UNICODE_STRING("Now is ", 7)) {
487         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
488     }
489 
490     test1.extractBetween(11, 23, test5);
491     if (test1.extract(60, 71, test6) != 9) {
492         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
493     }
494     if (test1.extract(11, 12, test6) != 12) {
495         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
496     }
497 
498     // convert test4 back to Unicode for comparison
499     UnicodeString test4b(test4, 12);
500 
501     if (test1.extract(11, 12, (char *)NULL) != 12) {
502         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
503     }
504     if (test1.extract(11, -1, test6) != 0) {
505         errln("UnicodeString.extract(-1) failed to stop reading the string.");
506     }
507 
508     for (int32_t i = 0; i < 12; i++) {
509         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
510             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
511             break;
512         }
513         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
514             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
515             break;
516         }
517         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
518             errln(UnicodeString("extracting into an array of char failed at position ") + i);
519             break;
520         }
521         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
522             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
523             break;
524         }
525     }
526 
527     // test preflighting and overflows with invariant conversion
528     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
529         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
530     }
531 
532     test4[2] = (char)0xff;
533     if (test1.extract(0, 10, test4, 2, "") != 10) {
534         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
535     }
536     if (test4[2] != (char)0xff) {
537         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
538     }
539 
540     {
541         // test new, NUL-terminating extract() function
542         UnicodeString s("terminate", "");
543         UChar dest[20]={
544             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
545             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
546         };
547         UErrorCode errorCode;
548         int32_t length;
549 
550         errorCode=U_ZERO_ERROR;
551         length=s.extract((UChar *)NULL, 0, errorCode);
552         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
553             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
554         }
555 
556         errorCode=U_ZERO_ERROR;
557         length=s.extract(dest, s.length()-1, errorCode);
558         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
559             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
560                 length, u_errorName(errorCode), s.length());
561         }
562 
563         errorCode=U_ZERO_ERROR;
564         length=s.extract(dest, s.length(), errorCode);
565         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
566             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
567                 length, u_errorName(errorCode), s.length());
568         }
569         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
570             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
571         }
572 
573         errorCode=U_ZERO_ERROR;
574         length=s.extract(dest, s.length()+1, errorCode);
575         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
576             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
577                 length, u_errorName(errorCode), s.length());
578         }
579         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
580             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
581         }
582     }
583 
584     {
585         // test new UConverter extract() and constructor
586         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
587         char buffer[32];
588         static const char expect[]={
589             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
590             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
591             (char)0xc3, (char)0x84,
592             (char)0xe1, (char)0xbb, (char)0x90
593         };
594         UErrorCode errorCode=U_ZERO_ERROR;
595         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
596         int32_t length;
597 
598         if(U_SUCCESS(errorCode)) {
599             // test preflighting
600             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
601                 errorCode!=U_BUFFER_OVERFLOW_ERROR
602             ) {
603                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
604                       length, u_errorName(errorCode));
605             }
606             errorCode=U_ZERO_ERROR;
607             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
608                 errorCode!=U_BUFFER_OVERFLOW_ERROR
609             ) {
610                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
611                       length, u_errorName(errorCode));
612             }
613 
614             // try error cases
615             errorCode=U_ZERO_ERROR;
616             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
617                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
618             }
619             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
620             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
621                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
622             }
623             errorCode=U_ZERO_ERROR;
624 
625             // extract for real
626             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
627                 uprv_memcmp(buffer, expect, 13)!=0 ||
628                 buffer[13]!=0 ||
629                 U_FAILURE(errorCode)
630             ) {
631                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
632                       length, u_errorName(errorCode));
633             }
634             // Test again with just the converter name.
635             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
636                 uprv_memcmp(buffer, expect, 13)!=0 ||
637                 buffer[13]!=0 ||
638                 U_FAILURE(errorCode)
639             ) {
640                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
641                       length, u_errorName(errorCode));
642             }
643 
644             // try the constructor
645             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
646             if(U_FAILURE(errorCode) || s!=t) {
647                 errln("UnicodeString(UConverter) conversion failed (%s)",
648                       u_errorName(errorCode));
649             }
650 
651             ucnv_close(cnv);
652         }
653     }
654 }
655 
656 void
TestRemoveReplace()657 UnicodeStringTest::TestRemoveReplace()
658 {
659     UnicodeString   test1("The rain in Spain stays mainly on the plain");
660     UnicodeString   test2("eat SPAMburgers!");
661     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
662     char            test4[] = "SPAM";
663     UnicodeString&  test5 = test1;
664 
665     test1.replace(4, 4, test2, 4, 4);
666     test1.replace(12, 5, test3, 4);
667     test3[4] = 0;
668     test1.replace(17, 4, test3);
669     test1.replace(23, 4, test4);
670     test1.replaceBetween(37, 42, test2, 4, 8);
671 
672     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
673         errln("One of the replace methods failed:\n"
674               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
675               "  got \"" + test1 + "\"");
676 
677     test1.remove(21, 1);
678     test1.removeBetween(26, 28);
679 
680     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
681         errln("One of the remove methods failed:\n"
682               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
683               "  got \"" + test1 + "\"");
684 
685     for (int32_t i = 0; i < test1.length(); i++) {
686         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
687             test1.setCharAt(i, 0x78);
688         }
689     }
690 
691     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
692         errln("One of the remove methods failed:\n"
693               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
694               "  got \"" + test1 + "\"");
695 
696     test1.remove();
697     if (test1.length() != 0)
698         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
699 }
700 
701 void
TestSearching()702 UnicodeStringTest::TestSearching()
703 {
704     UnicodeString test1("test test ttest tetest testesteststt");
705     UnicodeString test2("test");
706     UChar testChar = 0x74;
707 
708     UChar32 testChar32 = 0x20402;
709     UChar testData[]={
710         //   0       1       2       3       4       5       6       7
711         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
712 
713         //   8       9      10      11      12      13      14      15
714         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
715 
716         //  16      17      18      19
717         0xdc02, 0xd841, 0x0073, 0x0000
718     };
719     UnicodeString test3(testData);
720     UnicodeString test4(testChar32);
721 
722     uint16_t occurrences = 0;
723     int32_t startPos = 0;
724     for ( ;
725           startPos != -1 && startPos < test1.length();
726           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
727         ;
728     if (occurrences != 6)
729         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
730 
731     for ( occurrences = 0, startPos = 10;
732           startPos != -1 && startPos < test1.length();
733           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
734         ;
735     if (occurrences != 4)
736         errln(UnicodeString("indexOf with starting offset failed: "
737                             "expected to find 4 occurrences, found ") + occurrences);
738 
739     int32_t endPos = 28;
740     for ( occurrences = 0, startPos = 5;
741           startPos != -1 && startPos < test1.length();
742           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
743         ;
744     if (occurrences != 4)
745         errln(UnicodeString("indexOf with starting and ending offsets failed: "
746                             "expected to find 4 occurrences, found ") + occurrences);
747 
748     //using UChar32 string
749     for ( startPos=0, occurrences=0;
750           startPos != -1 && startPos < test3.length();
751           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
752         ;
753     if (occurrences != 4)
754         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
755 
756     for ( startPos=10, occurrences=0;
757           startPos != -1 && startPos < test3.length();
758           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
759         ;
760     if (occurrences != 2)
761         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
762     //---
763 
764     for ( occurrences = 0, startPos = 0;
765           startPos != -1 && startPos < test1.length();
766           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
767         ;
768     if (occurrences != 16)
769         errln(UnicodeString("indexOf with character failed: "
770                             "expected to find 16 occurrences, found ") + occurrences);
771 
772     for ( occurrences = 0, startPos = 10;
773           startPos != -1 && startPos < test1.length();
774           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
775         ;
776     if (occurrences != 12)
777         errln(UnicodeString("indexOf with character & start offset failed: "
778                             "expected to find 12 occurrences, found ") + occurrences);
779 
780     for ( occurrences = 0, startPos = 5, endPos = 28;
781           startPos != -1 && startPos < test1.length();
782           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
783         ;
784     if (occurrences != 10)
785         errln(UnicodeString("indexOf with character & start & end offsets failed: "
786                             "expected to find 10 occurrences, found ") + occurrences);
787 
788     //testing for UChar32
789     UnicodeString subString;
790     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
791         subString.append(test3, startPos, test3.length());
792         if(subString.indexOf(testChar32) != -1 ){
793              ++occurrences;
794         }
795         subString.remove();
796     }
797     if (occurrences != 14)
798         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
799 
800     for ( occurrences = 0, startPos = 0;
801           startPos != -1 && startPos < test3.length();
802           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
803         ;
804     if (occurrences != 4)
805         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
806 
807     endPos=test3.length();
808     for ( occurrences = 0, startPos = 5;
809           startPos != -1 && startPos < test3.length();
810           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
811         ;
812     if (occurrences != 3)
813         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
814     //---
815 
816     if(test1.lastIndexOf(test2)!=29) {
817         errln("test1.lastIndexOf(test2)!=29");
818     }
819 
820     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
821         errln("test1.lastIndexOf(test2, start) failed");
822     }
823 
824     for ( occurrences = 0, startPos = 32;
825           startPos != -1;
826           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
827         ;
828     if (occurrences != 4)
829         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
830                             "expected to find 4 occurrences, found ") + occurrences);
831 
832     for ( occurrences = 0, startPos = 32;
833           startPos != -1;
834           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
835         ;
836     if (occurrences != 11)
837         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
838                             "expected to find 11 occurrences, found ") + occurrences);
839 
840     //testing UChar32
841     startPos=test3.length();
842     for ( occurrences = 0;
843           startPos != -1;
844           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
845         ;
846     if (occurrences != 3)
847         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
848 
849 
850     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
851         subString.remove();
852         subString.append(test3, 0, endPos);
853         if(subString.lastIndexOf(testChar32) != -1 ){
854             ++occurrences;
855         }
856     }
857     if (occurrences != 18)
858         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
859     //---
860 
861     // test that indexOf(UChar32) and lastIndexOf(UChar32)
862     // do not find surrogate code points when they are part of matched pairs
863     // (= part of supplementary code points)
864     // Jitterbug 1542
865     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
866         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
867     }
868     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
869         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
870         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
871     ) {
872         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
873     }
874 }
875 
876 void
TestSpacePadding()877 UnicodeStringTest::TestSpacePadding()
878 {
879     UnicodeString test1("hello");
880     UnicodeString test2("   there");
881     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
882     UnicodeString test4;
883     UBool returnVal;
884     UnicodeString expectedValue;
885 
886     returnVal = test1.padLeading(15);
887     expectedValue = "          hello";
888     if (returnVal == false || test1 != expectedValue)
889         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
890 
891     returnVal = test2.padTrailing(15);
892     expectedValue = "   there       ";
893     if (returnVal == false || test2 != expectedValue)
894         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
895 
896     expectedValue = test3;
897     returnVal = test3.padTrailing(15);
898     if (returnVal == true || test3 != expectedValue)
899         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
900 
901     expectedValue = "hello";
902     test4.setTo(test1).trim();
903 
904     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
905         errln("trim(UnicodeString&) failed");
906 
907     test1.trim();
908     if (test1 != expectedValue)
909         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
910 
911     test2.trim();
912     expectedValue = "there";
913     if (test2 != expectedValue)
914         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
915 
916     test3.trim();
917     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
918     if (test3 != expectedValue)
919         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
920 
921     returnVal = test1.truncate(15);
922     expectedValue = "hello";
923     if (returnVal == true || test1 != expectedValue)
924         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
925 
926     returnVal = test2.truncate(15);
927     expectedValue = "there";
928     if (returnVal == true || test2 != expectedValue)
929         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
930 
931     returnVal = test3.truncate(15);
932     expectedValue = "Hi!  How ya doi";
933     if (returnVal == false || test3 != expectedValue)
934         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
935 }
936 
937 void
TestPrefixAndSuffix()938 UnicodeStringTest::TestPrefixAndSuffix()
939 {
940     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
941     UnicodeString test2("Now");
942     UnicodeString test3("country.");
943     UnicodeString test4("count");
944 
945     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
946         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
947     }
948 
949     if (test1.startsWith(test3) ||
950         test1.startsWith(test3.getBuffer(), test3.length()) ||
951         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
952     ) {
953         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
954     }
955 
956     if (test1.endsWith(test2)) {
957         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
958     }
959 
960     if (!test1.endsWith(test3)) {
961         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962     }
963     if (!test1.endsWith(test3, 0, INT32_MAX)) {
964         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
965     }
966 
967     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
968         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969     }
970     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
971         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
972     }
973 
974     if (!test3.startsWith(test4)) {
975         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
976     }
977 
978     if (test4.startsWith(test3)) {
979         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
980     }
981 }
982 
983 void
TestStartsWithAndEndsWithNulTerminated()984 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
985     UnicodeString test("abcde");
986     const UChar ab[] = { 0x61, 0x62, 0 };
987     const UChar de[] = { 0x64, 0x65, 0 };
988     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
989     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
990     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
991     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
992 }
993 
994 void
TestFindAndReplace()995 UnicodeStringTest::TestFindAndReplace()
996 {
997     UnicodeString test1("One potato, two potato, three potato, four\n");
998     UnicodeString test2("potato");
999     UnicodeString test3("MISSISSIPPI");
1000 
1001     UnicodeString expectedValue;
1002 
1003     test1.findAndReplace(test2, test3);
1004     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1005     if (test1 != expectedValue)
1006         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1007     test1.findAndReplace(2, 32, test3, test2);
1008     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1009     if (test1 != expectedValue)
1010         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1011 }
1012 
1013 void
TestReverse()1014 UnicodeStringTest::TestReverse()
1015 {
1016     UnicodeString test("backwards words say to used I");
1017 
1018     test.reverse();
1019     test.reverse(2, 4);
1020     test.reverse(7, 2);
1021     test.reverse(10, 3);
1022     test.reverse(14, 5);
1023     test.reverse(20, 9);
1024 
1025     if (test != "I used to say words backwards")
1026         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1027             + test + "\"");
1028 
1029     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1030     test.reverse();
1031     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1032         errln("reverse() failed with supplementary characters");
1033     }
1034 
1035     // Test case for ticket #8091:
1036     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1037     // an odd-length string that contains no other lead surrogates.
1038     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1039     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1040     test.reverse();
1041     if(test!=expected) {
1042         errln("reverse() failed with only lead surrogate in the middle");
1043     }
1044 }
1045 
1046 void
TestMiscellaneous()1047 UnicodeStringTest::TestMiscellaneous()
1048 {
1049     UnicodeString   test1("This is a test");
1050     UnicodeString   test2("This is a test");
1051     UnicodeString   test3("Me too!");
1052 
1053     // test getBuffer(minCapacity) and releaseBuffer()
1054     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1055     UChar *p=test1.getBuffer(20);
1056     if(test1.getCapacity()<20) {
1057         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1058     }
1059 
1060     test1.append((UChar)7); // must not be able to modify the string here
1061     test1.setCharAt(3, 7);
1062     test1.reverse();
1063     if( test1.length()!=0 ||
1064         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1065         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1066     ) {
1067         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1068     }
1069 
1070     p[0]=1;
1071     p[1]=2;
1072     p[2]=3;
1073     test1.releaseBuffer(3);
1074     test1.append((UChar)4);
1075 
1076     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1077         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1078     }
1079 
1080     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1081     test1.releaseBuffer(1);
1082     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1083         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1084     }
1085 
1086     // test getBuffer(const)
1087     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1088     if( test1.length()!=4 ||
1089         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1090         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1091     ) {
1092         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1093     }
1094 
1095     // test releaseBuffer() with a NUL-terminated buffer
1096     test1.getBuffer(20)[2]=0;
1097     test1.releaseBuffer(); // implicit -1
1098     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1099         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1100     }
1101 
1102     // test releaseBuffer() with a non-NUL-terminated buffer
1103     p=test1.getBuffer(256);
1104     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1105         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1106     }
1107     test1.releaseBuffer();  // implicit -1
1108     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1109         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1110     }
1111 
1112     // test getTerminatedBuffer()
1113     test1=UnicodeString("This is another test.", "");
1114     test2=UnicodeString("This is another test.", "");
1115     q=test1.getTerminatedBuffer();
1116     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1117         errln("getTerminatedBuffer()[length]!=0");
1118     }
1119 
1120     const UChar u[]={ 5, 6, 7, 8, 0 };
1121     test1.setTo(false, u, 3);
1122     q=test1.getTerminatedBuffer();
1123     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1124         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1125     }
1126 
1127     test1.setTo(true, u, -1);
1128     q=test1.getTerminatedBuffer();
1129     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1130         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1131     }
1132 
1133     // NOTE: Some compilers will optimize u"la" to point to the same static memory
1134     // as u" lila", offset by 3 code units
1135     test1=UnicodeString(true, u"la", 2);
1136     test1.append(UnicodeString(true, u" lila", 5).getTerminatedBuffer(), 0, -1);
1137     assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1138         u"la lila", test1);
1139 
1140     test1.insert(3, UnicodeString(true, u"dudum ", 6), 0, INT32_MAX);
1141     assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1142         u"la dudum lila", test1);
1143 
1144     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1145     test1.insert(9, ucs, -1);
1146     assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1147         u"la dudum hm lila", test1);
1148 
1149     test1.replace(9, 2, (UChar)0x2b);
1150     assertEquals("UnicodeString::replace(start, length, UChar) failed",
1151         u"la dudum + lila", test1);
1152 
1153     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1154         errln("UnicodeString::hasMetaData() returns true");
1155     }
1156 
1157     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1158     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1159     test1.truncate(36);  // ensure length()<getCapacity()
1160     test2=test1;  // share the buffer
1161     test1.truncate(5);
1162     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1163         errln("UnicodeString(shared buffer).truncate() failed");
1164     }
1165     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1166         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1167               "modified another copy of the string!");
1168     }
1169     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1170     test1.truncate(36);  // ensure length()<getCapacity()
1171     test2=test1;  // share the buffer
1172     test1.remove();
1173     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1174         errln("UnicodeString(shared buffer).remove() failed");
1175     }
1176     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1177         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1178               "modified another copy of the string!");
1179     }
1180 
1181     // ticket #9740
1182     test1.setTo(true, ucs, 3);
1183     assertEquals("length of read-only alias", 3, test1.length());
1184     test1.trim();
1185     assertEquals("length of read-only alias after trim()", 2, test1.length());
1186     assertEquals("length of terminated buffer of read-only alias + trim()",
1187                  2, u_strlen(test1.getTerminatedBuffer()));
1188 }
1189 
1190 void
TestStackAllocation()1191 UnicodeStringTest::TestStackAllocation()
1192 {
1193     UChar           testString[] ={
1194         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1195     UChar           guardWord = 0x4DED;
1196     UnicodeString*  test = 0;
1197 
1198     test = new  UnicodeString(testString);
1199     if (*test != "This is a crazy test.")
1200         errln("Test string failed to initialize properly.");
1201     if (guardWord != 0x04DED)
1202         errln("Test string initialization overwrote guard word!");
1203 
1204     test->insert(8, "only ");
1205     test->remove(15, 6);
1206     if (*test != "This is only a test.")
1207         errln("Manipulation of test string failed to work right.");
1208     if (guardWord != 0x4DED)
1209         errln("Manipulation of test string overwrote guard word!");
1210 
1211     // we have to deinitialize and release the backing store by calling the destructor
1212     // explicitly, since we can't overload operator delete
1213     delete test;
1214 
1215     UChar workingBuffer[] = {
1216         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1217         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1218         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1219         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1220         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1221     UChar guardWord2 = 0x4DED;
1222 
1223     test = new UnicodeString(workingBuffer, 35, 100);
1224     if (*test != "Now is the time for all men to come")
1225         errln("Stack-allocated backing store failed to initialize correctly.");
1226     if (guardWord2 != 0x4DED)
1227         errln("Stack-allocated backing store overwrote guard word!");
1228 
1229     test->insert(24, "good ");
1230     if (*test != "Now is the time for all good men to come")
1231         errln("insert() on stack-allocated UnicodeString didn't work right");
1232     if (guardWord2 != 0x4DED)
1233         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1234 
1235     if (workingBuffer[24] != 0x67)
1236         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1237 
1238     *test += " to the aid of their country.";
1239     if (*test != "Now is the time for all good men to come to the aid of their country.")
1240         errln("Stack-allocated UnicodeString overflow didn't work");
1241     if (guardWord2 != 0x4DED)
1242         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1243 
1244     *test = "ha!";
1245     if (*test != "ha!")
1246         errln("Assignment to stack-allocated UnicodeString didn't work");
1247     if (workingBuffer[0] != 0x4e)
1248         errln("Change to UnicodeString after overflow are still affecting original buffer");
1249     if (guardWord2 != 0x4DED)
1250         errln("Change to UnicodeString after overflow overwrote guard word!");
1251 
1252     // test read-only aliasing with setTo()
1253     workingBuffer[0] = 0x20ac;
1254     workingBuffer[1] = 0x125;
1255     workingBuffer[2] = 0;
1256     test->setTo(true, workingBuffer, 2);
1257     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1258         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1259     }
1260 
1261     UnicodeString *c=test->clone();
1262 
1263     workingBuffer[1] = 0x109;
1264     if(test->charAt(1) != 0x109) {
1265         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1266     }
1267 
1268     if(c->length() != 2 || c->charAt(1) != 0x125) {
1269         errln("clone(alias) did not copy the buffer");
1270     }
1271     delete c;
1272 
1273     test->setTo(true, workingBuffer, -1);
1274     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1275         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1276     }
1277 
1278     test->setTo(false, workingBuffer, -1);
1279     if(!test->isBogus()) {
1280         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1281     }
1282 
1283     delete test;
1284 
1285     test=new UnicodeString();
1286     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1287     test->setTo(buffer, 4, 10);
1288     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1289         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1290         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1291     }
1292     delete test;
1293 
1294 
1295     // test the UChar32 constructor
1296     UnicodeString c32Test((UChar32)0x10ff2a);
1297     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1298         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1299     ) {
1300         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1301     }
1302 
1303     // test the (new) capacity constructor
1304     UnicodeString capTest(5, (UChar32)0x2a, 5);
1305     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1306         capTest.char32At(0) != 0x2a ||
1307         capTest.char32At(4) != 0x2a
1308     ) {
1309         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1310     }
1311 
1312     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1313     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1314         capTest.char32At(0) != 0x10ff2a ||
1315         capTest.char32At(4) != 0x10ff2a
1316     ) {
1317         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1318     }
1319 
1320     capTest = UnicodeString(5, (UChar32)0, 0);
1321     if(capTest.length() != 0) {
1322         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1323     }
1324 }
1325 
1326 /**
1327  * Test the unescape() function.
1328  */
TestUnescape(void)1329 void UnicodeStringTest::TestUnescape(void) {
1330     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1331     UnicodeString OUT("abc");
1332     OUT.append((UChar)0x4567);
1333     OUT.append(" ");
1334     OUT.append((UChar)0xA);
1335     OUT.append((UChar)0xD);
1336     OUT.append(" ");
1337     OUT.append((UChar32)0x00101234);
1338     OUT.append("xyz");
1339     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1340     UnicodeString result = IN.unescape();
1341     if (result != OUT) {
1342         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1343               prettify(result) + ", expected " +
1344               prettify(OUT));
1345     }
1346 
1347     // test that an empty string is returned in case of an error
1348     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1349         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1350     }
1351 
1352     // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1353     UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1354     assertEquals("ASCII Euro", u"€", euro);
1355     UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1356     assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1357 }
1358 
1359 /* test code point counting functions --------------------------------------- */
1360 
1361 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1362 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1363 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1364     int32_t count=s.countChar32(start, length);
1365     return count>number;
1366 }
1367 
1368 /* compare the real function against the reference */
1369 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1370 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1371     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1372         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1373                 start, length, number, s.hasMoreChar32Than(start, length, number));
1374     }
1375 }
1376 
1377 void
TestCountChar32(void)1378 UnicodeStringTest::TestCountChar32(void) {
1379     {
1380         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1381 
1382         // test countChar32()
1383         // note that this also calls and tests u_countChar32(length>=0)
1384         if(
1385             s.countChar32()!=4 ||
1386             s.countChar32(1)!=4 ||
1387             s.countChar32(2)!=3 ||
1388             s.countChar32(2, 3)!=2 ||
1389             s.countChar32(2, 0)!=0
1390         ) {
1391             errln("UnicodeString::countChar32() failed");
1392         }
1393 
1394         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1395         const UChar *buffer=s.getTerminatedBuffer();
1396         if(
1397             u_countChar32(buffer, -1)!=4 ||
1398             u_countChar32(buffer+1, -1)!=4 ||
1399             u_countChar32(buffer+2, -1)!=3 ||
1400             u_countChar32(buffer+3, -1)!=3 ||
1401             u_countChar32(buffer+4, -1)!=2 ||
1402             u_countChar32(buffer+5, -1)!=1 ||
1403             u_countChar32(buffer+6, -1)!=0
1404         ) {
1405             errln("u_countChar32(length=-1) failed");
1406         }
1407 
1408         // test u_countChar32() with bad input
1409         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1410             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1411         }
1412     }
1413 
1414     /* test data and variables for hasMoreChar32Than() */
1415     static const UChar str[]={
1416         0x61, 0x62, 0xd800, 0xdc00,
1417         0xd801, 0xdc01, 0x63, 0xd802,
1418         0x64, 0xdc03, 0x65, 0x66,
1419         0xd804, 0xdc04, 0xd805, 0xdc05,
1420         0x67
1421     };
1422     UnicodeString string(str, UPRV_LENGTHOF(str));
1423     int32_t start, length, number;
1424 
1425     /* test hasMoreChar32Than() */
1426     for(length=string.length(); length>=0; --length) {
1427         for(start=0; start<=length; ++start) {
1428             for(number=-1; number<=((length-start)+2); ++number) {
1429                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1430             }
1431         }
1432     }
1433 
1434     /* test hasMoreChar32Than() with pinning */
1435     for(start=-1; start<=string.length()+1; ++start) {
1436         for(number=-1; number<=((string.length()-start)+2); ++number) {
1437             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1438         }
1439     }
1440 
1441     /* test hasMoreChar32Than() with a bogus string */
1442     string.setToBogus();
1443     for(length=-1; length<=1; ++length) {
1444         for(start=-1; start<=length; ++start) {
1445             for(number=-1; number<=((length-start)+2); ++number) {
1446                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1447             }
1448         }
1449     }
1450 }
1451 
1452 void
TestBogus()1453 UnicodeStringTest::TestBogus() {
1454     UnicodeString   test1("This is a test");
1455     UnicodeString   test2("This is a test");
1456     UnicodeString   test3("Me too!");
1457 
1458     // test isBogus() and setToBogus()
1459     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1460         errln("A string returned true for isBogus()!");
1461     }
1462 
1463     // NULL pointers are treated like empty strings
1464     // use other illegal arguments to make a bogus string
1465     test3.setTo(false, test1.getBuffer(), -2);
1466     if(!test3.isBogus()) {
1467         errln("A bogus string returned false for isBogus()!");
1468     }
1469     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1470         errln("hashCode() failed");
1471     }
1472     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1473         errln("bogus.getBuffer()!=0");
1474     }
1475     if (test1.indexOf(test3) != -1) {
1476         errln("bogus.indexOf() != -1");
1477     }
1478     if (test1.lastIndexOf(test3) != -1) {
1479         errln("bogus.lastIndexOf() != -1");
1480     }
1481     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1482         errln("caseCompare() doesn't work with bogus strings");
1483     }
1484     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1485         errln("compareCodePointOrder() doesn't work with bogus strings");
1486     }
1487 
1488     // verify that non-assignment modifications fail and do not revive a bogus string
1489     test3.setToBogus();
1490     test3.append((UChar)0x61);
1491     if(!test3.isBogus() || test3.getBuffer()!=0) {
1492         errln("bogus.append('a') worked but must not");
1493     }
1494 
1495     test3.setToBogus();
1496     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1497     if(!test3.isBogus() || test3.getBuffer()!=0) {
1498         errln("bogus.findAndReplace() worked but must not");
1499     }
1500 
1501     test3.setToBogus();
1502     test3.trim();
1503     if(!test3.isBogus() || test3.getBuffer()!=0) {
1504         errln("bogus.trim() revived bogus but must not");
1505     }
1506 
1507     test3.setToBogus();
1508     test3.remove(1);
1509     if(!test3.isBogus() || test3.getBuffer()!=0) {
1510         errln("bogus.remove(1) revived bogus but must not");
1511     }
1512 
1513     test3.setToBogus();
1514     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1515         errln("bogus.setCharAt(0, 'b') worked but must not");
1516     }
1517 
1518     test3.setToBogus();
1519     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1520         errln("bogus.truncate(1) revived bogus but must not");
1521     }
1522 
1523     // verify that assignments revive a bogus string
1524     test3.setToBogus();
1525     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1526         errln("bogus.operator=() failed");
1527     }
1528 
1529     test3.setToBogus();
1530     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1531         errln("bogus.fastCopyFrom() failed");
1532     }
1533 
1534     test3.setToBogus();
1535     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1536         errln("bogus.setTo(UniStr) failed");
1537     }
1538 
1539     test3.setToBogus();
1540     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1541         errln("bogus.setTo(UniStr, 0) failed");
1542     }
1543 
1544     test3.setToBogus();
1545     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1546         errln("bogus.setTo(UniStr, 0, len) failed");
1547     }
1548 
1549     test3.setToBogus();
1550     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1551         errln("bogus.setTo(const UChar *, len) failed");
1552     }
1553 
1554     test3.setToBogus();
1555     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1556         errln("bogus.setTo(UChar) failed");
1557     }
1558 
1559     test3.setToBogus();
1560     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1561         errln("bogus.setTo(UChar32) failed");
1562     }
1563 
1564     test3.setToBogus();
1565     if(!test3.isBogus() || test3.setTo(false, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1566         errln("bogus.setTo(readonly alias) failed");
1567     }
1568 
1569     // writable alias to another string's buffer: very bad idea, just convenient for this test
1570     test3.setToBogus();
1571     if(!test3.isBogus() ||
1572             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1573                         test1.length(), test1.getCapacity()).isBogus() ||
1574             test3!=test1) {
1575         errln("bogus.setTo(writable alias) failed");
1576     }
1577 
1578     // verify simple, documented ways to turn a bogus string into an empty one
1579     test3.setToBogus();
1580     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1581         errln("bogus.operator=(UnicodeString()) failed");
1582     }
1583 
1584     test3.setToBogus();
1585     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1586         errln("bogus.setTo(UnicodeString()) failed");
1587     }
1588 
1589     test3.setToBogus();
1590     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1591         errln("bogus.remove() failed");
1592     }
1593 
1594     test3.setToBogus();
1595     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1596         errln("bogus.remove(0, INT32_MAX) failed");
1597     }
1598 
1599     test3.setToBogus();
1600     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1601         errln("bogus.truncate(0) failed");
1602     }
1603 
1604     test3.setToBogus();
1605     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1606         errln("bogus.setTo((UChar32)-1) failed");
1607     }
1608 
1609     static const UChar nul=0;
1610 
1611     test3.setToBogus();
1612     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1613         errln("bogus.setTo(&nul, 0) failed");
1614     }
1615 
1616     test3.setToBogus();
1617     if(!test3.isBogus() || test3.getBuffer()!=0) {
1618         errln("setToBogus() failed to make a string bogus");
1619     }
1620 
1621     test3.setToBogus();
1622     if(test1.isBogus() || !(test1=test3).isBogus()) {
1623         errln("normal=bogus failed to make the left string bogus");
1624     }
1625 
1626     // test that NULL primitive input string values are treated like
1627     // empty strings, not errors (bogus)
1628     test2.setTo((UChar32)0x10005);
1629     if(test2.insert(1, nullptr, 1).length()!=2) {
1630         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1631     }
1632 
1633     UErrorCode errorCode=U_ZERO_ERROR;
1634     UnicodeString
1635         test4((const UChar *)NULL),
1636         test5(true, (const UChar *)NULL, 1),
1637         test6((UChar *)NULL, 5, 5),
1638         test7((const char *)NULL, 3, NULL, errorCode);
1639     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1640         errln("a constructor set to bogus for a NULL input string, should be empty");
1641     }
1642 
1643     test4.setTo(NULL, 3);
1644     test5.setTo(true, (const UChar *)NULL, 1);
1645     test6.setTo((UChar *)NULL, 5, 5);
1646     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1647         errln("a setTo() set to bogus for a NULL input string, should be empty");
1648     }
1649 
1650     // test that bogus==bogus<any
1651     if(test1!=test3 || test1.compare(test3)!=0) {
1652         errln("bogus==bogus failed");
1653     }
1654 
1655     test2.remove();
1656     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1657         errln("bogus<empty failed");
1658     }
1659 
1660     // test that copy constructor of bogus is bogus & clone of bogus is nullptr
1661     {
1662         test3.setToBogus();
1663         UnicodeString test3Copy(test3);
1664         UnicodeString *test3Clone = test3.clone();
1665         assertTrue(WHERE, test3.isBogus());
1666         assertTrue(WHERE, test3Copy.isBogus());
1667         assertTrue(WHERE, test3Clone == nullptr);
1668     }
1669 }
1670 
1671 // StringEnumeration ------------------------------------------------------- ***
1672 // most of StringEnumeration is tested elsewhere
1673 // this test improves code coverage
1674 
1675 static const char *const
1676 testEnumStrings[]={
1677     "a",
1678     "b",
1679     "c",
1680     "this is a long string which helps us test some buffer limits",
1681     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1682 };
1683 
1684 class TestEnumeration : public StringEnumeration {
1685 public:
TestEnumeration()1686     TestEnumeration() : i(0) {}
1687 
count(UErrorCode &) const1688     virtual int32_t count(UErrorCode& /*status*/) const override {
1689         return UPRV_LENGTHOF(testEnumStrings);
1690     }
1691 
snext(UErrorCode & status)1692     virtual const UnicodeString *snext(UErrorCode &status) override {
1693         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1694             unistr=UnicodeString(testEnumStrings[i++], "");
1695             return &unistr;
1696         }
1697 
1698         return NULL;
1699     }
1700 
reset(UErrorCode &)1701     virtual void reset(UErrorCode& /*status*/) override {
1702         i=0;
1703     }
1704 
getStaticClassID()1705     static inline UClassID getStaticClassID() {
1706         return (UClassID)&fgClassID;
1707     }
getDynamicClassID() const1708     virtual UClassID getDynamicClassID() const override {
1709         return getStaticClassID();
1710     }
1711 
1712 private:
1713     static const char fgClassID;
1714 
1715     int32_t i;
1716 };
1717 
1718 const char TestEnumeration::fgClassID=0;
1719 
1720 void
TestStringEnumeration()1721 UnicodeStringTest::TestStringEnumeration() {
1722     UnicodeString s;
1723     TestEnumeration ten;
1724     int32_t i, length;
1725     UErrorCode status;
1726 
1727     const UChar *pu;
1728     const char *pc;
1729 
1730     // test the next() default implementation and ensureCharsCapacity()
1731     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1732         status=U_ZERO_ERROR;
1733         pc=ten.next(&length, status);
1734         s=UnicodeString(testEnumStrings[i], "");
1735         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1736             errln("StringEnumeration.next(%d) failed", i);
1737         }
1738     }
1739     status=U_ZERO_ERROR;
1740     if(ten.next(&length, status)!=NULL) {
1741         errln("StringEnumeration.next(done)!=NULL");
1742     }
1743 
1744     // test the unext() default implementation
1745     ten.reset(status);
1746     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1747         status=U_ZERO_ERROR;
1748         pu=ten.unext(&length, status);
1749         s=UnicodeString(testEnumStrings[i], "");
1750         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1751             errln("StringEnumeration.unext(%d) failed", i);
1752         }
1753     }
1754     status=U_ZERO_ERROR;
1755     if(ten.unext(&length, status)!=NULL) {
1756         errln("StringEnumeration.unext(done)!=NULL");
1757     }
1758 
1759     // test that the default clone() implementation works, and returns NULL
1760     if(ten.clone()!=NULL) {
1761         errln("StringEnumeration.clone()!=NULL");
1762     }
1763 
1764     // test that uenum_openFromStringEnumeration() works
1765     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1766     StringEnumeration *newTen = new TestEnumeration;
1767     status=U_ZERO_ERROR;
1768     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1769     if (uten==NULL || U_FAILURE(status)) {
1770         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1771         return;
1772     }
1773 
1774     // test  uenum_next()
1775     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1776         status=U_ZERO_ERROR;
1777         pc=uenum_next(uten, &length, &status);
1778         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1779             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1780         }
1781     }
1782     status=U_ZERO_ERROR;
1783     if(uenum_next(uten, &length, &status)!=NULL) {
1784         errln("File %s, line %d, uenum_next(done)!=NULL");
1785     }
1786 
1787     // test the uenum_unext()
1788     uenum_reset(uten, &status);
1789     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1790         status=U_ZERO_ERROR;
1791         pu=uenum_unext(uten, &length, &status);
1792         s=UnicodeString(testEnumStrings[i], "");
1793         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1794             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1795         }
1796     }
1797     status=U_ZERO_ERROR;
1798     if(uenum_unext(uten, &length, &status)!=NULL) {
1799         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1800     }
1801 
1802     uenum_close(uten);
1803 }
1804 
1805 /*
1806  * Namespace test, to make sure that macros like UNICODE_STRING include the
1807  * namespace qualifier.
1808  *
1809  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1810  */
1811 namespace bogus {
1812     class UnicodeString {
1813     public:
1814         enum EInvariant { kInvariant };
UnicodeString()1815         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1816         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1817         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1818 ) : i(length) {}
1819     private:
1820         int32_t i;
1821     };
1822 }
1823 
1824 void
TestNameSpace()1825 UnicodeStringTest::TestNameSpace() {
1826     // Provoke name collision unless the UnicodeString macros properly
1827     // qualify the icu::UnicodeString class.
1828     using namespace bogus;
1829 
1830     // Use all UnicodeString macros from unistr.h.
1831     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1832     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1833     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1834 
1835     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1836     icu::UnicodeString s4=s1+s2+s3;
1837     if(s4.length()!=9) {
1838         errln("Something wrong with UnicodeString::operator+().");
1839     }
1840 }
1841 
1842 void
TestUTF32()1843 UnicodeStringTest::TestUTF32() {
1844     // Input string length US_STACKBUF_SIZE to cause overflow of the
1845     // initially chosen fStackBuffer due to supplementary characters.
1846     static const UChar32 utf32[] = {
1847         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1848         0x10000, 0x20000, 0xe0000, 0x10ffff
1849     };
1850     static const UChar expected_utf16[] = {
1851         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1852         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1853     };
1854     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1855     UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1856     if(from32 != expected) {
1857         errln("UnicodeString::fromUTF32() did not create the expected string.");
1858     }
1859 
1860     static const UChar utf16[] = {
1861         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1862     };
1863     static const UChar32 expected_utf32[] = {
1864         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1865     };
1866     UChar32 result32[16];
1867     UErrorCode errorCode = U_ZERO_ERROR;
1868     int32_t length32 =
1869         UnicodeString(false, utf16, UPRV_LENGTHOF(utf16)).
1870         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1871     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1872         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1873         result32[length32] != 0
1874     ) {
1875         errln("UnicodeString::toUTF32() did not create the expected string.");
1876     }
1877 }
1878 
1879 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1880 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1881     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1882             : CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
Flush()1883     virtual void Flush() override { calledFlush = true; }
1884     UBool calledFlush;
1885 };
1886 
1887 void
TestUTF8()1888 UnicodeStringTest::TestUTF8() {
1889     static const uint8_t utf8[] = {
1890         // Code points:
1891         // 0x41, 0xd900,
1892         // 0x61, 0xdc00,
1893         // 0x110000, 0x5a,
1894         // 0x50000, 0x7a,
1895         // 0x10000, 0x20000,
1896         // 0xe0000, 0x10ffff
1897         0x41, 0xed, 0xa4, 0x80,
1898         0x61, 0xed, 0xb0, 0x80,
1899         0xf4, 0x90, 0x80, 0x80, 0x5a,
1900         0xf1, 0x90, 0x80, 0x80, 0x7a,
1901         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1902         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1903     };
1904     static const UChar expected_utf16[] = {
1905         0x41, 0xfffd, 0xfffd, 0xfffd,
1906         0x61, 0xfffd, 0xfffd, 0xfffd,
1907         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1908         0xd900, 0xdc00, 0x7a,
1909         0xd800, 0xdc00, 0xd840, 0xdc00,
1910         0xdb40, 0xdc00, 0xdbff, 0xdfff
1911     };
1912     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1913     UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1914 
1915     if(from8 != expected) {
1916         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1917     }
1918     std::string utf8_string((const char *)utf8, sizeof(utf8));
1919     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1920     if(from8b != expected) {
1921         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1922     }
1923 
1924     static const UChar utf16[] = {
1925         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1926     };
1927     static const uint8_t expected_utf8[] = {
1928         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1929         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1930     };
1931     UnicodeString us(false, utf16, UPRV_LENGTHOF(utf16));
1932 
1933     char buffer[64];
1934     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1935     us.toUTF8(sink);
1936     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1937         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1938     ) {
1939         errln("UnicodeString::toUTF8() did not create the expected string.");
1940     }
1941     if(!sink.calledFlush) {
1942         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1943     }
1944     // Initial contents for testing that toUTF8String() appends.
1945     std::string result8 = "-->";
1946     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1947     // Use the return value just for testing.
1948     std::string &result8r = us.toUTF8String(result8);
1949     if(result8r != expected8 || &result8r != &result8) {
1950         errln("UnicodeString::toUTF8String() did not create the expected string.");
1951     }
1952 }
1953 
1954 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1955 static UnicodeString wrapUChars(const UChar *uchars) {
1956     return UnicodeString(true, uchars, -1);
1957 }
1958 
1959 void
TestReadOnlyAlias()1960 UnicodeStringTest::TestReadOnlyAlias() {
1961     UChar uchars[]={ 0x61, 0x62, 0 };
1962     UnicodeString alias(true, uchars, 2);
1963     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1964         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1965         return;
1966     }
1967     alias.truncate(1);
1968     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1969         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1970     }
1971     if(alias.getTerminatedBuffer()==uchars) {
1972         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1973               "did not allocate and copy as expected.");
1974     }
1975     if(uchars[1]!=0x62) {
1976         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1977               "modified the original buffer.");
1978     }
1979     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1980         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1981               "does not return a buffer terminated at the proper length.");
1982     }
1983 
1984     alias.setTo(true, uchars, 2);
1985     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1986         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1987         return;
1988     }
1989     alias.remove();
1990     if(alias.length()!=0) {
1991         errln("UnicodeString(read-only-alias).remove() did not work.");
1992     }
1993     if(alias.getTerminatedBuffer()==uchars) {
1994         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1995               "did not un-alias as expected.");
1996     }
1997     if(uchars[0]!=0x61) {
1998         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1999               "modified the original buffer.");
2000     }
2001     if(0!=u_strlen(alias.getTerminatedBuffer())) {
2002         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
2003               "does not return a buffer terminated at length 0.");
2004     }
2005 
2006     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
2007     alias.setTo(false, longString.getBuffer(), longString.length());
2008     alias.remove(0, 10);
2009     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
2010         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
2011     }
2012     alias.setTo(false, longString.getBuffer(), longString.length());
2013     alias.remove(27, 99);
2014     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2015         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2016     }
2017     alias.setTo(false, longString.getBuffer(), longString.length());
2018     alias.retainBetween(6, 30);
2019     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2020         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2021     }
2022 
2023     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2024     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2025 
2026     UnicodeString temp;
2027     temp.fastCopyFrom(longString.tempSubString());
2028     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2029         errln("UnicodeString.tempSubString() failed");
2030     }
2031     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2032     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2033         errln("UnicodeString.tempSubString(-3, 5) failed");
2034     }
2035     temp.fastCopyFrom(longString.tempSubString(17));
2036     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2037         errln("UnicodeString.tempSubString(17) failed");
2038     }
2039     temp.fastCopyFrom(longString.tempSubString(99));
2040     if(!temp.isEmpty()) {
2041         errln("UnicodeString.tempSubString(99) failed");
2042     }
2043     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2044     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2045         errln("UnicodeString.tempSubStringBetween(6) failed");
2046     }
2047     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2048     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2049         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2050     }
2051     UnicodeString bogusString;
2052     bogusString.setToBogus();
2053     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2054     if(!temp.isBogus()) {
2055         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2056     }
2057 }
2058 
2059 void
doTestAppendable(UnicodeString & dest,Appendable & app)2060 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2061     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2062     static const UChar fg[3]={ 0x66, 0x67, 0 };
2063     if(!app.reserveAppendCapacity(12)) {
2064         errln("Appendable.reserve(12) failed");
2065     }
2066     app.appendCodeUnit(0x61);
2067     app.appendCodePoint(0x62);
2068     app.appendCodePoint(0x50000);
2069     app.appendString(cde, 3);
2070     app.appendString(fg, -1);
2071     UChar scratch[3];
2072     int32_t capacity=-1;
2073     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2074     if(capacity<3) {
2075         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2076         return;
2077     }
2078     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2079     u_memcpy(buffer, hij, 3);
2080     app.appendString(buffer, 3);
2081     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2082         errln("Appendable.append(...) failed");
2083     }
2084     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2085     if(buffer!=NULL || capacity!=0) {
2086         errln("Appendable.getAppendBuffer(min=0) failed");
2087     }
2088     capacity=1;
2089     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2090     if(buffer!=NULL || capacity!=0) {
2091         errln("Appendable.getAppendBuffer(scratch<min) failed");
2092     }
2093 }
2094 
2095 class SimpleAppendable : public Appendable {
2096 public:
SimpleAppendable(UnicodeString & dest)2097     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2098     virtual UBool appendCodeUnit(UChar c) override { str.append(c); return true; }
reset()2099     SimpleAppendable &reset() { str.remove(); return *this; }
2100 private:
2101     UnicodeString &str;
2102 };
2103 
2104 void
TestAppendable()2105 UnicodeStringTest::TestAppendable() {
2106     UnicodeString dest;
2107     SimpleAppendable app(dest);
2108     doTestAppendable(dest, app);
2109 }
2110 
2111 void
TestUnicodeStringImplementsAppendable()2112 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2113     UnicodeString dest;
2114     UnicodeStringAppendable app(dest);
2115     doTestAppendable(dest, app);
2116 }
2117 
2118 void
TestSizeofUnicodeString()2119 UnicodeStringTest::TestSizeofUnicodeString() {
2120     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2121     // See the API comments for UNISTR_OBJECT_SIZE.
2122     size_t sizeofUniStr=sizeof(UnicodeString);
2123     size_t expected=UNISTR_OBJECT_SIZE;
2124     if(expected!=sizeofUniStr) {
2125         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2126         // of the compiler might add more internal padding than expected.
2127         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2128               (int)sizeofUniStr, (int)expected);
2129     }
2130     if(sizeofUniStr<32) {
2131         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2132     }
2133     // We assume that the entire UnicodeString object,
2134     // minus the vtable pointer and 2 bytes for flags and short length,
2135     // is available for internal storage of UChars.
2136     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2137     UnicodeString s;
2138     const UChar *emptyBuffer=s.getBuffer();
2139     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2140         s.append((UChar)0x2e);
2141     }
2142     const UChar *fullBuffer=s.getBuffer();
2143     if(fullBuffer!=emptyBuffer) {
2144         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2145               expectedStackBufferLength);
2146     }
2147     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2148     if(terminatedBuffer==emptyBuffer) {
2149         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2150               expectedStackBufferLength);
2151     }
2152 }
2153 
2154 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2155 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2156     dest = std::move(src);
2157 }
2158 
2159 void
TestMoveSwap()2160 UnicodeStringTest::TestMoveSwap() {
2161     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2162     UnicodeString s1(false, abc, UPRV_LENGTHOF(abc));  // read-only alias
2163     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2164     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2165     const UChar *p = s2.getBuffer();
2166     s1.swap(s2);
2167     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2168         errln("UnicodeString.swap() did not swap");
2169     }
2170     swap(s2, s3);
2171     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2172         errln("swap(UnicodeString) did not swap back");
2173     }
2174     UnicodeString s4;
2175     s4 = std::move(s1);
2176     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2177         errln("UnicodeString = std::move(heap) did not move");
2178     }
2179     UnicodeString s5;
2180     s5 = std::move(s2);
2181     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2182         errln("UnicodeString = std::move(stack) did not move");
2183     }
2184     UnicodeString s6;
2185     s6 = std::move(s3);
2186     if(s6.getBuffer() != abc || s6.length() != 3) {
2187         errln("UnicodeString = std::move(alias) did not move");
2188     }
2189     infoln("TestMoveSwap() with rvalue references");
2190     s1 = static_cast<UnicodeString &&>(s6);
2191     if(s1.getBuffer() != abc || s1.length() != 3) {
2192         errln("UnicodeString move assignment operator did not move");
2193     }
2194     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2195     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2196         errln("UnicodeString move constructor did not move");
2197     }
2198 
2199     // Move self assignment leaves the object valid but in an undefined state.
2200     // Do it to make sure there is no crash,
2201     // but do not check for any particular resulting value.
2202     moveFrom(s1, s1);
2203     moveFrom(s2, s2);
2204     moveFrom(s3, s3);
2205     moveFrom(s4, s4);
2206     moveFrom(s5, s5);
2207     moveFrom(s6, s6);
2208     moveFrom(s7, s7);
2209     // Simple copy assignment must work.
2210     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2211     s1 = s6 = s4 = s7 = simple;
2212     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2213         errln("UnicodeString copy after self-move did not work");
2214     }
2215 }
2216 
2217 void
TestUInt16Pointers()2218 UnicodeStringTest::TestUInt16Pointers() {
2219     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2220     uint16_t arr[4];
2221 
2222     UnicodeString expected(u"abc");
2223     assertEquals("abc from pointer", expected, UnicodeString(carr));
2224     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2225     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2226 
2227     UnicodeString alias(arr, 0, 4);
2228     alias.append(u'a').append(u'b').append(u'c');
2229     assertEquals("abc from writable alias", expected, alias);
2230     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2231 
2232     UErrorCode errorCode = U_ZERO_ERROR;
2233     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2234     assertSuccess(WHERE, errorCode);
2235     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2236 }
2237 
2238 void
TestWCharPointers()2239 UnicodeStringTest::TestWCharPointers() {
2240 #if U_SIZEOF_WCHAR_T==2
2241     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2242     wchar_t arr[4];
2243 
2244     UnicodeString expected(u"abc");
2245     assertEquals("abc from pointer", expected, UnicodeString(carr));
2246     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2247     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2248 
2249     UnicodeString alias(arr, 0, 4);
2250     alias.append(u'a').append(u'b').append(u'c');
2251     assertEquals("abc from writable alias", expected, alias);
2252     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2253 
2254     UErrorCode errorCode = U_ZERO_ERROR;
2255     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2256     assertSuccess(WHERE, errorCode);
2257     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2258 #endif
2259 }
2260 
2261 void
TestNullPointers()2262 UnicodeStringTest::TestNullPointers() {
2263     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2264     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2265     assertTrue("empty from read-only-alias nullptr", UnicodeString(true, nullptr, 3).isEmpty());
2266 
2267     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2268     assertTrue("empty from writable alias", alias.isEmpty());
2269     alias.append(u'a').append(u'b').append(u'c');
2270     UnicodeString expected(u"abc");
2271     assertEquals("abc from writable alias", expected, alias);
2272 
2273     UErrorCode errorCode = U_ZERO_ERROR;
2274     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2275     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2276 }
2277 
TestUnicodeStringInsertAppendToSelf()2278 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2279     IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2280 
2281     // Test append operation
2282     UnicodeString str(u"foo ");
2283     str.append(str);
2284     str.append(str);
2285     str.append(str);
2286     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2287 
2288     // Test append operation with readonly alias to start
2289     str = UnicodeString(true, u"foo ", 4);
2290     str.append(str);
2291     str.append(str);
2292     str.append(str);
2293     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2294 
2295     // Test append operation with aliased substring
2296     str = u"abcde";
2297     UnicodeString sub = str.tempSubString(1, 2);
2298     str.append(sub);
2299     assertEquals("", u"abcdebc", str);
2300 
2301     // Test append operation with double-aliased substring
2302     str = UnicodeString(true, u"abcde", 5);
2303     sub = str.tempSubString(1, 2);
2304     str.append(sub);
2305     assertEquals("", u"abcdebc", str);
2306 
2307     // Test insert operation
2308     str = u"a-*b";
2309     str.insert(2, str);
2310     str.insert(4, str);
2311     str.insert(8, str);
2312     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2313 
2314     // Test insert operation with readonly alias to start
2315     str = UnicodeString(true, u"a-*b", 4);
2316     str.insert(2, str);
2317     str.insert(4, str);
2318     str.insert(8, str);
2319     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2320 
2321     // Test insert operation with aliased substring
2322     str = u"abcde";
2323     sub = str.tempSubString(1, 3);
2324     str.insert(2, sub);
2325     assertEquals("", u"abbcdcde", str);
2326 
2327     // Test insert operation with double-aliased substring
2328     str = UnicodeString(true, u"abcde", 5);
2329     sub = str.tempSubString(1, 3);
2330     str.insert(2, sub);
2331     assertEquals("", u"abbcdcde", str);
2332 }
2333 
2334 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 begin */
TestLargeMemory()2335 void UnicodeStringTest::TestLargeMemory() {
2336 #if U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
2337     if(quick) { return; }
2338     IcuTestErrorCode status(*this, "TestLargeMemory");
2339     constexpr uint32_t len = 2147483643;
2340     char16_t *buf = new char16_t[len];
2341     if (buf == nullptr) { return; }
2342     uprv_memset(buf, 0x4e, len * 2);
2343     icu::UnicodeString test(buf, len);
2344     delete [] buf;
2345 #endif
2346 }
2347 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 end */
2348 
TestLargeAppend()2349 void UnicodeStringTest::TestLargeAppend() {
2350     if(quick) return;
2351 
2352     IcuTestErrorCode status(*this, "TestLargeAppend");
2353     // Make a large UnicodeString
2354     int32_t len = 0xAFFFFFF;
2355     UnicodeString str;
2356     char16_t *buf = str.getBuffer(len);
2357     // A fast way to set buffer to valid Unicode.
2358     // 4E4E is a valid unicode character
2359     uprv_memset(buf, 0x4e, len * 2);
2360     str.releaseBuffer(len);
2361     UnicodeString dest;
2362     // Append it 16 times
2363     // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2364     // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2365     int64_t total = 0;
2366     for (int32_t i = 0; i < 16; i++) {
2367         dest.append(str);
2368         total += len;
2369         if (total <= INT32_MAX) {
2370             assertFalse("dest is not bogus", dest.isBogus());
2371         } else {
2372             assertTrue("dest should be bogus", dest.isBogus());
2373         }
2374     }
2375     dest.remove();
2376     total = 0;
2377     for (int32_t i = 0; i < 16; i++) {
2378         dest.append(str);
2379         total += len;
2380         if (total + len <= INT32_MAX) {
2381             assertFalse("dest is not bogus", dest.isBogus());
2382         } else if (total <= INT32_MAX) {
2383             // Check that a string of exactly the maximum size works
2384             UnicodeString str2;
2385             int32_t remain = static_cast<int32_t>(INT32_MAX - total);
2386             char16_t *buf2 = str2.getBuffer(remain);
2387             if (buf2 == nullptr) {
2388                 // if somehow memory allocation fail, return the test
2389                 return;
2390             }
2391             uprv_memset(buf2, 0x4e, remain * 2);
2392             str2.releaseBuffer(remain);
2393             dest.append(str2);
2394             total += remain;
2395             assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2396             assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2397             assertFalse("dest is not bogus", dest.isBogus());
2398 
2399             // Check that a string size+1 goes bogus
2400             str2.truncate(1);
2401             dest.append(str2);
2402             total++;
2403             assertTrue("dest should be bogus", dest.isBogus());
2404         } else {
2405             assertTrue("dest should be bogus", dest.isBogus());
2406         }
2407     }
2408 }
2409