• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include <utility>
10 
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24 
25 #if 0
26 #include "unicode/ustream.h"
27 
28 #include <iostream>
29 using namespace std;
30 
31 #endif
32 
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34 
35 extern IntlTest *createStringCaseTest();
36 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39     if (exec) logln("TestSuite UnicodeStringTest: ");
40     TESTCASE_AUTO_BEGIN;
41     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42     TESTCASE_AUTO(TestBasicManipulation);
43     TESTCASE_AUTO(TestCompare);
44     TESTCASE_AUTO(TestExtract);
45     TESTCASE_AUTO(TestRemoveReplace);
46     TESTCASE_AUTO(TestSearching);
47     TESTCASE_AUTO(TestSpacePadding);
48     TESTCASE_AUTO(TestPrefixAndSuffix);
49     TESTCASE_AUTO(TestFindAndReplace);
50     TESTCASE_AUTO(TestBogus);
51     TESTCASE_AUTO(TestReverse);
52     TESTCASE_AUTO(TestMiscellaneous);
53     TESTCASE_AUTO(TestStackAllocation);
54     TESTCASE_AUTO(TestUnescape);
55     TESTCASE_AUTO(TestCountChar32);
56     TESTCASE_AUTO(TestStringEnumeration);
57     TESTCASE_AUTO(TestNameSpace);
58     TESTCASE_AUTO(TestUTF32);
59     TESTCASE_AUTO(TestUTF8);
60     TESTCASE_AUTO(TestReadOnlyAlias);
61     TESTCASE_AUTO(TestAppendable);
62     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63     TESTCASE_AUTO(TestSizeofUnicodeString);
64     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65     TESTCASE_AUTO(TestMoveSwap);
66     TESTCASE_AUTO(TestUInt16Pointers);
67     TESTCASE_AUTO(TestWCharPointers);
68     TESTCASE_AUTO(TestNullPointers);
69     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70     TESTCASE_AUTO(TestLargeAppend);
71     TESTCASE_AUTO_END;
72 }
73 
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78     UnicodeString   expectedValue;
79     UnicodeString   *c;
80 
81     c=test1.clone();
82     test1.insert(24, "good ");
83     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84     if (test1 != expectedValue)
85         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86 
87     c->insert(24, "good ");
88     if(*c != expectedValue) {
89         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90     }
91     delete c;
92 
93     test1.remove(41, 8);
94     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95     if (test1 != expectedValue)
96         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97 
98     test1.replace(58, 6, "ir country");
99     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100     if (test1 != expectedValue)
101         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102 
103     UChar     temp[80];
104     test1.extract(0, 15, temp);
105 
106     UnicodeString       test2(temp, 15);
107 
108     expectedValue = "Now is the time";
109     if (test2 != expectedValue)
110         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111 
112     test2 += " for me to go!\n";
113     expectedValue = "Now is the time for me to go!\n";
114     if (test2 != expectedValue)
115         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116 
117     if (test1.length() != 70)
118         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119     if (test2.length() != 30)
120         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121 
122     UnicodeString test3;
123     test3.append((UChar32)0x20402);
124     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126     }
127     if(test3.length() != 2){
128         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129     }
130     test3.append((UChar32)0x0074);
131     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133     }
134     if(test3.length() != 3){
135         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136     }
137 
138     // test some UChar32 overloads
139     if( test3.setTo((UChar32)0x10330).length() != 2 ||
140         test3.insert(0, (UChar32)0x20100).length() != 4 ||
141         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142         (test3 = (UChar32)0x14001).length() != 2
143     ) {
144         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145     }
146 
147     {
148         // test moveIndex32()
149         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150 
151         if(
152             s.moveIndex32(2, -1)!=0 ||
153             s.moveIndex32(2, 1)!=4 ||
154             s.moveIndex32(2, 2)!=5 ||
155             s.moveIndex32(5, -2)!=2 ||
156             s.moveIndex32(0, -1)!=0 ||
157             s.moveIndex32(6, 1)!=6
158         ) {
159             errln("UnicodeString::moveIndex32() failed");
160         }
161 
162         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163             errln("UnicodeString::getChar32Start() failed");
164         }
165 
166         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167             errln("UnicodeString::getChar32Limit() failed");
168         }
169     }
170 
171     {
172         // test new 2.2 constructors and setTo function that parallel Java's substring function.
173         UnicodeString src("Hello folks how are you?");
174         UnicodeString target1("how are you?");
175         if (target1 != UnicodeString(src, 12)) {
176             errln("UnicodeString(const UnicodeString&, int32_t) failed");
177         }
178         UnicodeString target2("folks");
179         if (target2 != UnicodeString(src, 6, 5)) {
180             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181         }
182         if (target1 != target2.setTo(src, 12)) {
183             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184         }
185     }
186 
187     {
188         // op+ is new in ICU 2.8
189         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190         if(s!=UnicodeString("abcdefghi", "")) {
191             errln("operator+(UniStr, UniStr) failed");
192         }
193     }
194 
195     {
196         // tests for Jitterbug 2360
197         // verify that APIs with source pointer + length accept length == -1
198         // mostly test only where modified, only few functions did not already do this
199         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201         }
202 
203         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
204         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205 
206         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208         }
209         if(t.length()!=u_strlen(buffer)) {
210             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211         }
212 
213         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215         }
216         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218         }
219 
220         buffer[u_strlen(buffer)]=0xe4;
221         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224         }
225         if(u.length()!=UPRV_LENGTHOF(buffer)) {
226             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227         }
228 
229         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230         UConverter *cnv;
231         UErrorCode errorCode=U_ZERO_ERROR;
232 
233         cnv=ucnv_open("ISO-8859-1", &errorCode);
234         UnicodeString v(cs, -1, cnv, errorCode);
235         ucnv_close(cnv);
236         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238         }
239     }
240 
241 #if U_CHARSET_IS_UTF8
242     {
243         // Test the hardcoded-UTF-8 UnicodeString optimizations.
244         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246         UnicodeString from8a = UnicodeString((const char *)utf8);
247         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248         UnicodeString from16(false, utf16, UPRV_LENGTHOF(utf16));
249         if(from8a != from16 || from8b != from16) {
250             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251         }
252         char buffer[16];
253         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256         }
257         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260         }
261     }
262 #endif
263 }
264 
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268     UnicodeString   test1("this is a test");
269     UnicodeString   test2("this is a test");
270     UnicodeString   test3("this is a test of the emergency broadcast system");
271     UnicodeString   test4("never say, \"this is a test\"!!");
272 
273     UnicodeString   test5((UChar)0x5000);
274     UnicodeString   test6((UChar)0x5100);
275 
276     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278     char            chars[] = "this is a test";
279 
280     // test operator== and operator!=
281     if (test1 != test2 || test1 == test3 || test1 == test4)
282         errln("operator== or operator!= failed");
283 
284     // test operator> and operator<
285     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286         !(test5 < test6)
287     ) {
288         errln("operator> or operator< failed");
289     }
290 
291     // test operator>= and operator<=
292     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293         errln("operator>= or operator<= failed");
294 
295     // test compare(UnicodeString)
296     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297         errln("compare(UnicodeString) failed");
298 
299     //test compare(offset, length, UnicodeString)
300     if(test1.compare(0, 14, test2) != 0 ||
301         test3.compare(0, 14, test2) != 0 ||
302         test4.compare(12, 14, test2) != 0 ||
303         test3.compare(0, 18, test1) <=0  )
304         errln("compare(offset, length, UnicodeString) fails");
305 
306     // test compare(UChar*)
307     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308         errln("compare(UChar*) failed");
309 
310     // test compare(char*)
311     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312         errln("compare(char*) failed");
313 
314     // test compare(UChar*, length)
315     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316         errln("compare(UChar*, length) failed");
317 
318     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319     if (test1.compare(0, 14, test2, 0, 14) != 0
320     || test1.compare(0, 14, test3, 0, 14) != 0
321     || test1.compare(0, 14, test4, 12, 14) != 0)
322         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323 
324     if (test1.compare(10, 4, test2, 0, 4) >= 0
325     || test1.compare(10, 4, test3, 22, 9) <= 0
326     || test1.compare(10, 4, test4, 22, 4) != 0)
327         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328 
329     // test compareBetween
330     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332         errln("compareBetween failed");
333 
334     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336         errln("compareBetween failed");
337 
338     // test compare() etc. with strings that share a buffer but are not equal
339     test2=test1; // share the buffer, length() too large for the stackBuffer
340     test2.truncate(1); // change only the length, not the buffer
341     if( test1==test2 || test1<=test2 ||
342         test1.compare(test2)<=0 ||
343         test1.compareCodePointOrder(test2)<=0 ||
344         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348     ) {
349         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350     }
351 
352     /* test compareCodePointOrder() */
353     {
354         /* these strings are in ascending order */
355         static const UChar strings[][4]={
356             { 0x61, 0 },                    /* U+0061 */
357             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
358             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
359             { 0xd800, 0 },                  /* U+d800 */
360             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
361             { 0xdfff, 0 },                  /* U+dfff */
362             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
363             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
364             { 0xd800, 0xdc02, 0 },          /* U+10002 */
365             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
366         };
367         UnicodeString u[20]; // must be at least as long as strings[]
368         int32_t i;
369 
370         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371             u[i]=UnicodeString(true, strings[i], -1);
372         }
373 
374         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377             }
378         }
379     }
380 
381     /* test caseCompare() */
382     {
383         static const UChar
384         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
385         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388 
389         UnicodeString
390             mixed(true, _mixed, -1),
391             otherDefault(true, _otherDefault, -1),
392             otherExcludeSpecialI(true, _otherExcludeSpecialI, -1),
393             different(true, _different, -1);
394 
395         int8_t result;
396 
397         /* test caseCompare() */
398         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401         }
402         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403         if(result!=0) {
404             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405         }
406         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409         }
410 
411         /* test caseCompare() */
412         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413         if(result<=0) {
414             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415         }
416 
417         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421         }
422 
423         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425         if(result<=0) {
426             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427         }
428     }
429 
430     // test that srcLength=-1 is handled in functions that
431     // take input const UChar */int32_t srcLength (j785)
432     {
433         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435 
436         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438         }
439 
440         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442         }
443 
444         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446         }
447 
448         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450         }
451 
452         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454         }
455 
456         UnicodeString s2, s3;
457         s2.replace(0, 0, u+1, -1);
458         s3.replace(0, 0, u, 1, -1);
459         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461         }
462     }
463 }
464 
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
469     UnicodeString  test2;
470     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472     UnicodeString  test5;
473     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474 
475     test1.extract(11, 12, test2);
476     test1.extract(11, 12, test3);
477     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479     }
480 
481     // test proper pinning in extractBetween()
482     test1.extractBetween(-3, 7, test5);
483     if(test5!=UNICODE_STRING("Now is ", 7)) {
484         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485     }
486 
487     test1.extractBetween(11, 23, test5);
488     if (test1.extract(60, 71, test6) != 9) {
489         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490     }
491     if (test1.extract(11, 12, test6) != 12) {
492         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493     }
494 
495     // convert test4 back to Unicode for comparison
496     UnicodeString test4b(test4, 12);
497 
498     if (test1.extract(11, 12, (char *)NULL) != 12) {
499         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500     }
501     if (test1.extract(11, -1, test6) != 0) {
502         errln("UnicodeString.extract(-1) failed to stop reading the string.");
503     }
504 
505     for (int32_t i = 0; i < 12; i++) {
506         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508             break;
509         }
510         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512             break;
513         }
514         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515             errln(UnicodeString("extracting into an array of char failed at position ") + i);
516             break;
517         }
518         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520             break;
521         }
522     }
523 
524     // test preflighting and overflows with invariant conversion
525     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527     }
528 
529     test4[2] = (char)0xff;
530     if (test1.extract(0, 10, test4, 2, "") != 10) {
531         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532     }
533     if (test4[2] != (char)0xff) {
534         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535     }
536 
537     {
538         // test new, NUL-terminating extract() function
539         UnicodeString s("terminate", "");
540         UChar dest[20]={
541             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543         };
544         UErrorCode errorCode;
545         int32_t length;
546 
547         errorCode=U_ZERO_ERROR;
548         length=s.extract((UChar *)NULL, 0, errorCode);
549         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551         }
552 
553         errorCode=U_ZERO_ERROR;
554         length=s.extract(dest, s.length()-1, errorCode);
555         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557                 length, u_errorName(errorCode), s.length());
558         }
559 
560         errorCode=U_ZERO_ERROR;
561         length=s.extract(dest, s.length(), errorCode);
562         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564                 length, u_errorName(errorCode), s.length());
565         }
566         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568         }
569 
570         errorCode=U_ZERO_ERROR;
571         length=s.extract(dest, s.length()+1, errorCode);
572         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574                 length, u_errorName(errorCode), s.length());
575         }
576         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578         }
579     }
580 
581     {
582         // test new UConverter extract() and constructor
583         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584         char buffer[32];
585         static const char expect[]={
586             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588             (char)0xc3, (char)0x84,
589             (char)0xe1, (char)0xbb, (char)0x90
590         };
591         UErrorCode errorCode=U_ZERO_ERROR;
592         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593         int32_t length;
594 
595         if(U_SUCCESS(errorCode)) {
596             // test preflighting
597             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598                 errorCode!=U_BUFFER_OVERFLOW_ERROR
599             ) {
600                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601                       length, u_errorName(errorCode));
602             }
603             errorCode=U_ZERO_ERROR;
604             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605                 errorCode!=U_BUFFER_OVERFLOW_ERROR
606             ) {
607                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608                       length, u_errorName(errorCode));
609             }
610 
611             // try error cases
612             errorCode=U_ZERO_ERROR;
613             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615             }
616             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619             }
620             errorCode=U_ZERO_ERROR;
621 
622             // extract for real
623             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624                 uprv_memcmp(buffer, expect, 13)!=0 ||
625                 buffer[13]!=0 ||
626                 U_FAILURE(errorCode)
627             ) {
628                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629                       length, u_errorName(errorCode));
630             }
631             // Test again with just the converter name.
632             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633                 uprv_memcmp(buffer, expect, 13)!=0 ||
634                 buffer[13]!=0 ||
635                 U_FAILURE(errorCode)
636             ) {
637                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638                       length, u_errorName(errorCode));
639             }
640 
641             // try the constructor
642             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643             if(U_FAILURE(errorCode) || s!=t) {
644                 errln("UnicodeString(UConverter) conversion failed (%s)",
645                       u_errorName(errorCode));
646             }
647 
648             ucnv_close(cnv);
649         }
650     }
651 }
652 
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656     UnicodeString   test1("The rain in Spain stays mainly on the plain");
657     UnicodeString   test2("eat SPAMburgers!");
658     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659     char            test4[] = "SPAM";
660     UnicodeString&  test5 = test1;
661 
662     test1.replace(4, 4, test2, 4, 4);
663     test1.replace(12, 5, test3, 4);
664     test3[4] = 0;
665     test1.replace(17, 4, test3);
666     test1.replace(23, 4, test4);
667     test1.replaceBetween(37, 42, test2, 4, 8);
668 
669     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670         errln("One of the replace methods failed:\n"
671               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672               "  got \"" + test1 + "\"");
673 
674     test1.remove(21, 1);
675     test1.removeBetween(26, 28);
676 
677     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678         errln("One of the remove methods failed:\n"
679               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680               "  got \"" + test1 + "\"");
681 
682     for (int32_t i = 0; i < test1.length(); i++) {
683         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684             test1.setCharAt(i, 0x78);
685         }
686     }
687 
688     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689         errln("One of the remove methods failed:\n"
690               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691               "  got \"" + test1 + "\"");
692 
693     test1.remove();
694     if (test1.length() != 0)
695         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697 
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701     UnicodeString test1("test test ttest tetest testesteststt");
702     UnicodeString test2("test");
703     UChar testChar = 0x74;
704 
705     UChar32 testChar32 = 0x20402;
706     UChar testData[]={
707         //   0       1       2       3       4       5       6       7
708         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709 
710         //   8       9      10      11      12      13      14      15
711         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712 
713         //  16      17      18      19
714         0xdc02, 0xd841, 0x0073, 0x0000
715     };
716     UnicodeString test3(testData);
717     UnicodeString test4(testChar32);
718 
719     uint16_t occurrences = 0;
720     int32_t startPos = 0;
721     for ( ;
722           startPos != -1 && startPos < test1.length();
723           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724         ;
725     if (occurrences != 6)
726         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727 
728     for ( occurrences = 0, startPos = 10;
729           startPos != -1 && startPos < test1.length();
730           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731         ;
732     if (occurrences != 4)
733         errln(UnicodeString("indexOf with starting offset failed: "
734                             "expected to find 4 occurrences, found ") + occurrences);
735 
736     int32_t endPos = 28;
737     for ( occurrences = 0, startPos = 5;
738           startPos != -1 && startPos < test1.length();
739           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740         ;
741     if (occurrences != 4)
742         errln(UnicodeString("indexOf with starting and ending offsets failed: "
743                             "expected to find 4 occurrences, found ") + occurrences);
744 
745     //using UChar32 string
746     for ( startPos=0, occurrences=0;
747           startPos != -1 && startPos < test3.length();
748           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749         ;
750     if (occurrences != 4)
751         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752 
753     for ( startPos=10, occurrences=0;
754           startPos != -1 && startPos < test3.length();
755           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756         ;
757     if (occurrences != 2)
758         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759     //---
760 
761     for ( occurrences = 0, startPos = 0;
762           startPos != -1 && startPos < test1.length();
763           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764         ;
765     if (occurrences != 16)
766         errln(UnicodeString("indexOf with character failed: "
767                             "expected to find 16 occurrences, found ") + occurrences);
768 
769     for ( occurrences = 0, startPos = 10;
770           startPos != -1 && startPos < test1.length();
771           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772         ;
773     if (occurrences != 12)
774         errln(UnicodeString("indexOf with character & start offset failed: "
775                             "expected to find 12 occurrences, found ") + occurrences);
776 
777     for ( occurrences = 0, startPos = 5, endPos = 28;
778           startPos != -1 && startPos < test1.length();
779           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780         ;
781     if (occurrences != 10)
782         errln(UnicodeString("indexOf with character & start & end offsets failed: "
783                             "expected to find 10 occurrences, found ") + occurrences);
784 
785     //testing for UChar32
786     UnicodeString subString;
787     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788         subString.append(test3, startPos, test3.length());
789         if(subString.indexOf(testChar32) != -1 ){
790              ++occurrences;
791         }
792         subString.remove();
793     }
794     if (occurrences != 14)
795         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796 
797     for ( occurrences = 0, startPos = 0;
798           startPos != -1 && startPos < test3.length();
799           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800         ;
801     if (occurrences != 4)
802         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803 
804     endPos=test3.length();
805     for ( occurrences = 0, startPos = 5;
806           startPos != -1 && startPos < test3.length();
807           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808         ;
809     if (occurrences != 3)
810         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811     //---
812 
813     if(test1.lastIndexOf(test2)!=29) {
814         errln("test1.lastIndexOf(test2)!=29");
815     }
816 
817     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818         errln("test1.lastIndexOf(test2, start) failed");
819     }
820 
821     for ( occurrences = 0, startPos = 32;
822           startPos != -1;
823           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824         ;
825     if (occurrences != 4)
826         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827                             "expected to find 4 occurrences, found ") + occurrences);
828 
829     for ( occurrences = 0, startPos = 32;
830           startPos != -1;
831           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832         ;
833     if (occurrences != 11)
834         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835                             "expected to find 11 occurrences, found ") + occurrences);
836 
837     //testing UChar32
838     startPos=test3.length();
839     for ( occurrences = 0;
840           startPos != -1;
841           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842         ;
843     if (occurrences != 3)
844         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845 
846 
847     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
848         subString.remove();
849         subString.append(test3, 0, endPos);
850         if(subString.lastIndexOf(testChar32) != -1 ){
851             ++occurrences;
852         }
853     }
854     if (occurrences != 18)
855         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856     //---
857 
858     // test that indexOf(UChar32) and lastIndexOf(UChar32)
859     // do not find surrogate code points when they are part of matched pairs
860     // (= part of supplementary code points)
861     // Jitterbug 1542
862     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864     }
865     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868     ) {
869         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870     }
871 }
872 
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876     UnicodeString test1("hello");
877     UnicodeString test2("   there");
878     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
879     UnicodeString test4;
880     UBool returnVal;
881     UnicodeString expectedValue;
882 
883     returnVal = test1.padLeading(15);
884     expectedValue = "          hello";
885     if (returnVal == false || test1 != expectedValue)
886         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887 
888     returnVal = test2.padTrailing(15);
889     expectedValue = "   there       ";
890     if (returnVal == false || test2 != expectedValue)
891         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892 
893     expectedValue = test3;
894     returnVal = test3.padTrailing(15);
895     if (returnVal == true || test3 != expectedValue)
896         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897 
898     expectedValue = "hello";
899     test4.setTo(test1).trim();
900 
901     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902         errln("trim(UnicodeString&) failed");
903 
904     test1.trim();
905     if (test1 != expectedValue)
906         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907 
908     test2.trim();
909     expectedValue = "there";
910     if (test2 != expectedValue)
911         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912 
913     test3.trim();
914     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
915     if (test3 != expectedValue)
916         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917 
918     returnVal = test1.truncate(15);
919     expectedValue = "hello";
920     if (returnVal == true || test1 != expectedValue)
921         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922 
923     returnVal = test2.truncate(15);
924     expectedValue = "there";
925     if (returnVal == true || test2 != expectedValue)
926         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927 
928     returnVal = test3.truncate(15);
929     expectedValue = "Hi!  How ya doi";
930     if (returnVal == false || test3 != expectedValue)
931         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933 
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938     UnicodeString test2("Now");
939     UnicodeString test3("country.");
940     UnicodeString test4("count");
941 
942     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944     }
945 
946     if (test1.startsWith(test3) ||
947         test1.startsWith(test3.getBuffer(), test3.length()) ||
948         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949     ) {
950         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951     }
952 
953     if (test1.endsWith(test2)) {
954         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955     }
956 
957     if (!test1.endsWith(test3)) {
958         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959     }
960     if (!test1.endsWith(test3, 0, INT32_MAX)) {
961         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962     }
963 
964     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966     }
967     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969     }
970 
971     if (!test3.startsWith(test4)) {
972         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973     }
974 
975     if (test4.startsWith(test3)) {
976         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977     }
978 }
979 
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982     UnicodeString test("abcde");
983     const UChar ab[] = { 0x61, 0x62, 0 };
984     const UChar de[] = { 0x64, 0x65, 0 };
985     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990 
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994     UnicodeString test1("One potato, two potato, three potato, four\n");
995     UnicodeString test2("potato");
996     UnicodeString test3("MISSISSIPPI");
997 
998     UnicodeString expectedValue;
999 
1000     test1.findAndReplace(test2, test3);
1001     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002     if (test1 != expectedValue)
1003         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004     test1.findAndReplace(2, 32, test3, test2);
1005     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006     if (test1 != expectedValue)
1007         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009 
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013     UnicodeString test("backwards words say to used I");
1014 
1015     test.reverse();
1016     test.reverse(2, 4);
1017     test.reverse(7, 2);
1018     test.reverse(10, 3);
1019     test.reverse(14, 5);
1020     test.reverse(20, 9);
1021 
1022     if (test != "I used to say words backwards")
1023         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1024             + test + "\"");
1025 
1026     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027     test.reverse();
1028     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029         errln("reverse() failed with supplementary characters");
1030     }
1031 
1032     // Test case for ticket #8091:
1033     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034     // an odd-length string that contains no other lead surrogates.
1035     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037     test.reverse();
1038     if(test!=expected) {
1039         errln("reverse() failed with only lead surrogate in the middle");
1040     }
1041 }
1042 
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046     UnicodeString   test1("This is a test");
1047     UnicodeString   test2("This is a test");
1048     UnicodeString   test3("Me too!");
1049 
1050     // test getBuffer(minCapacity) and releaseBuffer()
1051     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052     UChar *p=test1.getBuffer(20);
1053     if(test1.getCapacity()<20) {
1054         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055     }
1056 
1057     test1.append((UChar)7); // must not be able to modify the string here
1058     test1.setCharAt(3, 7);
1059     test1.reverse();
1060     if( test1.length()!=0 ||
1061         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063     ) {
1064         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065     }
1066 
1067     p[0]=1;
1068     p[1]=2;
1069     p[2]=3;
1070     test1.releaseBuffer(3);
1071     test1.append((UChar)4);
1072 
1073     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075     }
1076 
1077     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078     test1.releaseBuffer(1);
1079     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081     }
1082 
1083     // test getBuffer(const)
1084     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085     if( test1.length()!=4 ||
1086         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088     ) {
1089         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090     }
1091 
1092     // test releaseBuffer() with a NUL-terminated buffer
1093     test1.getBuffer(20)[2]=0;
1094     test1.releaseBuffer(); // implicit -1
1095     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097     }
1098 
1099     // test releaseBuffer() with a non-NUL-terminated buffer
1100     p=test1.getBuffer(256);
1101     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1103     }
1104     test1.releaseBuffer();  // implicit -1
1105     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107     }
1108 
1109     // test getTerminatedBuffer()
1110     test1=UnicodeString("This is another test.", "");
1111     test2=UnicodeString("This is another test.", "");
1112     q=test1.getTerminatedBuffer();
1113     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114         errln("getTerminatedBuffer()[length]!=0");
1115     }
1116 
1117     const UChar u[]={ 5, 6, 7, 8, 0 };
1118     test1.setTo(false, u, 3);
1119     q=test1.getTerminatedBuffer();
1120     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122     }
1123 
1124     test1.setTo(true, u, -1);
1125     q=test1.getTerminatedBuffer();
1126     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128     }
1129 
1130     // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131     // as u" lila", offset by 3 code units
1132     test1=UnicodeString(true, u"la", 2);
1133     test1.append(UnicodeString(true, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134     assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135         u"la lila", test1);
1136 
1137     test1.insert(3, UnicodeString(true, u"dudum ", 6), 0, INT32_MAX);
1138     assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139         u"la dudum lila", test1);
1140 
1141     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142     test1.insert(9, ucs, -1);
1143     assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144         u"la dudum hm lila", test1);
1145 
1146     test1.replace(9, 2, (UChar)0x2b);
1147     assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148         u"la dudum + lila", test1);
1149 
1150     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151         errln("UnicodeString::hasMetaData() returns true");
1152     }
1153 
1154     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156     test1.truncate(36);  // ensure length()<getCapacity()
1157     test2=test1;  // share the buffer
1158     test1.truncate(5);
1159     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160         errln("UnicodeString(shared buffer).truncate() failed");
1161     }
1162     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164               "modified another copy of the string!");
1165     }
1166     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167     test1.truncate(36);  // ensure length()<getCapacity()
1168     test2=test1;  // share the buffer
1169     test1.remove();
1170     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171         errln("UnicodeString(shared buffer).remove() failed");
1172     }
1173     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175               "modified another copy of the string!");
1176     }
1177 
1178     // ticket #9740
1179     test1.setTo(true, ucs, 3);
1180     assertEquals("length of read-only alias", 3, test1.length());
1181     test1.trim();
1182     assertEquals("length of read-only alias after trim()", 2, test1.length());
1183     assertEquals("length of terminated buffer of read-only alias + trim()",
1184                  2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186 
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190     UChar           testString[] ={
1191         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192     UChar           guardWord = 0x4DED;
1193     UnicodeString*  test = 0;
1194 
1195     test = new  UnicodeString(testString);
1196     if (*test != "This is a crazy test.")
1197         errln("Test string failed to initialize properly.");
1198     if (guardWord != 0x04DED)
1199         errln("Test string initialization overwrote guard word!");
1200 
1201     test->insert(8, "only ");
1202     test->remove(15, 6);
1203     if (*test != "This is only a test.")
1204         errln("Manipulation of test string failed to work right.");
1205     if (guardWord != 0x4DED)
1206         errln("Manipulation of test string overwrote guard word!");
1207 
1208     // we have to deinitialize and release the backing store by calling the destructor
1209     // explicitly, since we can't overload operator delete
1210     delete test;
1211 
1212     UChar workingBuffer[] = {
1213         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218     UChar guardWord2 = 0x4DED;
1219 
1220     test = new UnicodeString(workingBuffer, 35, 100);
1221     if (*test != "Now is the time for all men to come")
1222         errln("Stack-allocated backing store failed to initialize correctly.");
1223     if (guardWord2 != 0x4DED)
1224         errln("Stack-allocated backing store overwrote guard word!");
1225 
1226     test->insert(24, "good ");
1227     if (*test != "Now is the time for all good men to come")
1228         errln("insert() on stack-allocated UnicodeString didn't work right");
1229     if (guardWord2 != 0x4DED)
1230         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231 
1232     if (workingBuffer[24] != 0x67)
1233         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234 
1235     *test += " to the aid of their country.";
1236     if (*test != "Now is the time for all good men to come to the aid of their country.")
1237         errln("Stack-allocated UnicodeString overflow didn't work");
1238     if (guardWord2 != 0x4DED)
1239         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240 
1241     *test = "ha!";
1242     if (*test != "ha!")
1243         errln("Assignment to stack-allocated UnicodeString didn't work");
1244     if (workingBuffer[0] != 0x4e)
1245         errln("Change to UnicodeString after overflow are still affecting original buffer");
1246     if (guardWord2 != 0x4DED)
1247         errln("Change to UnicodeString after overflow overwrote guard word!");
1248 
1249     // test read-only aliasing with setTo()
1250     workingBuffer[0] = 0x20ac;
1251     workingBuffer[1] = 0x125;
1252     workingBuffer[2] = 0;
1253     test->setTo(true, workingBuffer, 2);
1254     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256     }
1257 
1258     UnicodeString *c=test->clone();
1259 
1260     workingBuffer[1] = 0x109;
1261     if(test->charAt(1) != 0x109) {
1262         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263     }
1264 
1265     if(c->length() != 2 || c->charAt(1) != 0x125) {
1266         errln("clone(alias) did not copy the buffer");
1267     }
1268     delete c;
1269 
1270     test->setTo(true, workingBuffer, -1);
1271     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273     }
1274 
1275     test->setTo(false, workingBuffer, -1);
1276     if(!test->isBogus()) {
1277         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278     }
1279 
1280     delete test;
1281 
1282     test=new UnicodeString();
1283     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284     test->setTo(buffer, 4, 10);
1285     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288     }
1289     delete test;
1290 
1291 
1292     // test the UChar32 constructor
1293     UnicodeString c32Test((UChar32)0x10ff2a);
1294     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296     ) {
1297         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298     }
1299 
1300     // test the (new) capacity constructor
1301     UnicodeString capTest(5, (UChar32)0x2a, 5);
1302     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303         capTest.char32At(0) != 0x2a ||
1304         capTest.char32At(4) != 0x2a
1305     ) {
1306         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307     }
1308 
1309     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311         capTest.char32At(0) != 0x10ff2a ||
1312         capTest.char32At(4) != 0x10ff2a
1313     ) {
1314         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315     }
1316 
1317     capTest = UnicodeString(5, (UChar32)0, 0);
1318     if(capTest.length() != 0) {
1319         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320     }
1321 }
1322 
1323 /**
1324  * Test the unescape() function.
1325  */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328     UnicodeString OUT("abc");
1329     OUT.append((UChar)0x4567);
1330     OUT.append(" ");
1331     OUT.append((UChar)0xA);
1332     OUT.append((UChar)0xD);
1333     OUT.append(" ");
1334     OUT.append((UChar32)0x00101234);
1335     OUT.append("xyz");
1336     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337     UnicodeString result = IN.unescape();
1338     if (result != OUT) {
1339         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340               prettify(result) + ", expected " +
1341               prettify(OUT));
1342     }
1343 
1344     // test that an empty string is returned in case of an error
1345     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347     }
1348 
1349     // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1350     UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1351     assertEquals("ASCII Euro", u"€", euro);
1352     UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1353     assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1354 }
1355 
1356 /* test code point counting functions --------------------------------------- */
1357 
1358 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1359 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1360 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1361     int32_t count=s.countChar32(start, length);
1362     return count>number;
1363 }
1364 
1365 /* compare the real function against the reference */
1366 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1367 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1368     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1369         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1370                 start, length, number, s.hasMoreChar32Than(start, length, number));
1371     }
1372 }
1373 
1374 void
TestCountChar32(void)1375 UnicodeStringTest::TestCountChar32(void) {
1376     {
1377         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1378 
1379         // test countChar32()
1380         // note that this also calls and tests u_countChar32(length>=0)
1381         if(
1382             s.countChar32()!=4 ||
1383             s.countChar32(1)!=4 ||
1384             s.countChar32(2)!=3 ||
1385             s.countChar32(2, 3)!=2 ||
1386             s.countChar32(2, 0)!=0
1387         ) {
1388             errln("UnicodeString::countChar32() failed");
1389         }
1390 
1391         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1392         const UChar *buffer=s.getTerminatedBuffer();
1393         if(
1394             u_countChar32(buffer, -1)!=4 ||
1395             u_countChar32(buffer+1, -1)!=4 ||
1396             u_countChar32(buffer+2, -1)!=3 ||
1397             u_countChar32(buffer+3, -1)!=3 ||
1398             u_countChar32(buffer+4, -1)!=2 ||
1399             u_countChar32(buffer+5, -1)!=1 ||
1400             u_countChar32(buffer+6, -1)!=0
1401         ) {
1402             errln("u_countChar32(length=-1) failed");
1403         }
1404 
1405         // test u_countChar32() with bad input
1406         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1407             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1408         }
1409     }
1410 
1411     /* test data and variables for hasMoreChar32Than() */
1412     static const UChar str[]={
1413         0x61, 0x62, 0xd800, 0xdc00,
1414         0xd801, 0xdc01, 0x63, 0xd802,
1415         0x64, 0xdc03, 0x65, 0x66,
1416         0xd804, 0xdc04, 0xd805, 0xdc05,
1417         0x67
1418     };
1419     UnicodeString string(str, UPRV_LENGTHOF(str));
1420     int32_t start, length, number;
1421 
1422     /* test hasMoreChar32Than() */
1423     for(length=string.length(); length>=0; --length) {
1424         for(start=0; start<=length; ++start) {
1425             for(number=-1; number<=((length-start)+2); ++number) {
1426                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1427             }
1428         }
1429     }
1430 
1431     /* test hasMoreChar32Than() with pinning */
1432     for(start=-1; start<=string.length()+1; ++start) {
1433         for(number=-1; number<=((string.length()-start)+2); ++number) {
1434             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1435         }
1436     }
1437 
1438     /* test hasMoreChar32Than() with a bogus string */
1439     string.setToBogus();
1440     for(length=-1; length<=1; ++length) {
1441         for(start=-1; start<=length; ++start) {
1442             for(number=-1; number<=((length-start)+2); ++number) {
1443                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1444             }
1445         }
1446     }
1447 }
1448 
1449 void
TestBogus()1450 UnicodeStringTest::TestBogus() {
1451     UnicodeString   test1("This is a test");
1452     UnicodeString   test2("This is a test");
1453     UnicodeString   test3("Me too!");
1454 
1455     // test isBogus() and setToBogus()
1456     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1457         errln("A string returned true for isBogus()!");
1458     }
1459 
1460     // NULL pointers are treated like empty strings
1461     // use other illegal arguments to make a bogus string
1462     test3.setTo(false, test1.getBuffer(), -2);
1463     if(!test3.isBogus()) {
1464         errln("A bogus string returned false for isBogus()!");
1465     }
1466     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1467         errln("hashCode() failed");
1468     }
1469     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1470         errln("bogus.getBuffer()!=0");
1471     }
1472     if (test1.indexOf(test3) != -1) {
1473         errln("bogus.indexOf() != -1");
1474     }
1475     if (test1.lastIndexOf(test3) != -1) {
1476         errln("bogus.lastIndexOf() != -1");
1477     }
1478     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1479         errln("caseCompare() doesn't work with bogus strings");
1480     }
1481     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1482         errln("compareCodePointOrder() doesn't work with bogus strings");
1483     }
1484 
1485     // verify that non-assignment modifications fail and do not revive a bogus string
1486     test3.setToBogus();
1487     test3.append((UChar)0x61);
1488     if(!test3.isBogus() || test3.getBuffer()!=0) {
1489         errln("bogus.append('a') worked but must not");
1490     }
1491 
1492     test3.setToBogus();
1493     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1494     if(!test3.isBogus() || test3.getBuffer()!=0) {
1495         errln("bogus.findAndReplace() worked but must not");
1496     }
1497 
1498     test3.setToBogus();
1499     test3.trim();
1500     if(!test3.isBogus() || test3.getBuffer()!=0) {
1501         errln("bogus.trim() revived bogus but must not");
1502     }
1503 
1504     test3.setToBogus();
1505     test3.remove(1);
1506     if(!test3.isBogus() || test3.getBuffer()!=0) {
1507         errln("bogus.remove(1) revived bogus but must not");
1508     }
1509 
1510     test3.setToBogus();
1511     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1512         errln("bogus.setCharAt(0, 'b') worked but must not");
1513     }
1514 
1515     test3.setToBogus();
1516     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1517         errln("bogus.truncate(1) revived bogus but must not");
1518     }
1519 
1520     // verify that assignments revive a bogus string
1521     test3.setToBogus();
1522     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1523         errln("bogus.operator=() failed");
1524     }
1525 
1526     test3.setToBogus();
1527     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1528         errln("bogus.fastCopyFrom() failed");
1529     }
1530 
1531     test3.setToBogus();
1532     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1533         errln("bogus.setTo(UniStr) failed");
1534     }
1535 
1536     test3.setToBogus();
1537     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1538         errln("bogus.setTo(UniStr, 0) failed");
1539     }
1540 
1541     test3.setToBogus();
1542     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1543         errln("bogus.setTo(UniStr, 0, len) failed");
1544     }
1545 
1546     test3.setToBogus();
1547     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1548         errln("bogus.setTo(const UChar *, len) failed");
1549     }
1550 
1551     test3.setToBogus();
1552     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1553         errln("bogus.setTo(UChar) failed");
1554     }
1555 
1556     test3.setToBogus();
1557     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1558         errln("bogus.setTo(UChar32) failed");
1559     }
1560 
1561     test3.setToBogus();
1562     if(!test3.isBogus() || test3.setTo(false, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1563         errln("bogus.setTo(readonly alias) failed");
1564     }
1565 
1566     // writable alias to another string's buffer: very bad idea, just convenient for this test
1567     test3.setToBogus();
1568     if(!test3.isBogus() ||
1569             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1570                         test1.length(), test1.getCapacity()).isBogus() ||
1571             test3!=test1) {
1572         errln("bogus.setTo(writable alias) failed");
1573     }
1574 
1575     // verify simple, documented ways to turn a bogus string into an empty one
1576     test3.setToBogus();
1577     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1578         errln("bogus.operator=(UnicodeString()) failed");
1579     }
1580 
1581     test3.setToBogus();
1582     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1583         errln("bogus.setTo(UnicodeString()) failed");
1584     }
1585 
1586     test3.setToBogus();
1587     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1588         errln("bogus.remove() failed");
1589     }
1590 
1591     test3.setToBogus();
1592     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1593         errln("bogus.remove(0, INT32_MAX) failed");
1594     }
1595 
1596     test3.setToBogus();
1597     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1598         errln("bogus.truncate(0) failed");
1599     }
1600 
1601     test3.setToBogus();
1602     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1603         errln("bogus.setTo((UChar32)-1) failed");
1604     }
1605 
1606     static const UChar nul=0;
1607 
1608     test3.setToBogus();
1609     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1610         errln("bogus.setTo(&nul, 0) failed");
1611     }
1612 
1613     test3.setToBogus();
1614     if(!test3.isBogus() || test3.getBuffer()!=0) {
1615         errln("setToBogus() failed to make a string bogus");
1616     }
1617 
1618     test3.setToBogus();
1619     if(test1.isBogus() || !(test1=test3).isBogus()) {
1620         errln("normal=bogus failed to make the left string bogus");
1621     }
1622 
1623     // test that NULL primitive input string values are treated like
1624     // empty strings, not errors (bogus)
1625     test2.setTo((UChar32)0x10005);
1626     if(test2.insert(1, nullptr, 1).length()!=2) {
1627         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1628     }
1629 
1630     UErrorCode errorCode=U_ZERO_ERROR;
1631     UnicodeString
1632         test4((const UChar *)NULL),
1633         test5(true, (const UChar *)NULL, 1),
1634         test6((UChar *)NULL, 5, 5),
1635         test7((const char *)NULL, 3, NULL, errorCode);
1636     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1637         errln("a constructor set to bogus for a NULL input string, should be empty");
1638     }
1639 
1640     test4.setTo(NULL, 3);
1641     test5.setTo(true, (const UChar *)NULL, 1);
1642     test6.setTo((UChar *)NULL, 5, 5);
1643     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1644         errln("a setTo() set to bogus for a NULL input string, should be empty");
1645     }
1646 
1647     // test that bogus==bogus<any
1648     if(test1!=test3 || test1.compare(test3)!=0) {
1649         errln("bogus==bogus failed");
1650     }
1651 
1652     test2.remove();
1653     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1654         errln("bogus<empty failed");
1655     }
1656 
1657     // test that copy constructor of bogus is bogus & clone of bogus is nullptr
1658     {
1659         test3.setToBogus();
1660         UnicodeString test3Copy(test3);
1661         UnicodeString *test3Clone = test3.clone();
1662         assertTrue(WHERE, test3.isBogus());
1663         assertTrue(WHERE, test3Copy.isBogus());
1664         assertTrue(WHERE, test3Clone == nullptr);
1665     }
1666 }
1667 
1668 // StringEnumeration ------------------------------------------------------- ***
1669 // most of StringEnumeration is tested elsewhere
1670 // this test improves code coverage
1671 
1672 static const char *const
1673 testEnumStrings[]={
1674     "a",
1675     "b",
1676     "c",
1677     "this is a long string which helps us test some buffer limits",
1678     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1679 };
1680 
1681 class TestEnumeration : public StringEnumeration {
1682 public:
TestEnumeration()1683     TestEnumeration() : i(0) {}
1684 
count(UErrorCode &) const1685     virtual int32_t count(UErrorCode& /*status*/) const override {
1686         return UPRV_LENGTHOF(testEnumStrings);
1687     }
1688 
snext(UErrorCode & status)1689     virtual const UnicodeString *snext(UErrorCode &status) override {
1690         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1691             unistr=UnicodeString(testEnumStrings[i++], "");
1692             return &unistr;
1693         }
1694 
1695         return NULL;
1696     }
1697 
reset(UErrorCode &)1698     virtual void reset(UErrorCode& /*status*/) override {
1699         i=0;
1700     }
1701 
getStaticClassID()1702     static inline UClassID getStaticClassID() {
1703         return (UClassID)&fgClassID;
1704     }
getDynamicClassID() const1705     virtual UClassID getDynamicClassID() const override {
1706         return getStaticClassID();
1707     }
1708 
1709 private:
1710     static const char fgClassID;
1711 
1712     int32_t i;
1713 };
1714 
1715 const char TestEnumeration::fgClassID=0;
1716 
1717 void
TestStringEnumeration()1718 UnicodeStringTest::TestStringEnumeration() {
1719     UnicodeString s;
1720     TestEnumeration ten;
1721     int32_t i, length;
1722     UErrorCode status;
1723 
1724     const UChar *pu;
1725     const char *pc;
1726 
1727     // test the next() default implementation and ensureCharsCapacity()
1728     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1729         status=U_ZERO_ERROR;
1730         pc=ten.next(&length, status);
1731         s=UnicodeString(testEnumStrings[i], "");
1732         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1733             errln("StringEnumeration.next(%d) failed", i);
1734         }
1735     }
1736     status=U_ZERO_ERROR;
1737     if(ten.next(&length, status)!=NULL) {
1738         errln("StringEnumeration.next(done)!=NULL");
1739     }
1740 
1741     // test the unext() default implementation
1742     ten.reset(status);
1743     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1744         status=U_ZERO_ERROR;
1745         pu=ten.unext(&length, status);
1746         s=UnicodeString(testEnumStrings[i], "");
1747         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1748             errln("StringEnumeration.unext(%d) failed", i);
1749         }
1750     }
1751     status=U_ZERO_ERROR;
1752     if(ten.unext(&length, status)!=NULL) {
1753         errln("StringEnumeration.unext(done)!=NULL");
1754     }
1755 
1756     // test that the default clone() implementation works, and returns NULL
1757     if(ten.clone()!=NULL) {
1758         errln("StringEnumeration.clone()!=NULL");
1759     }
1760 
1761     // test that uenum_openFromStringEnumeration() works
1762     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1763     StringEnumeration *newTen = new TestEnumeration;
1764     status=U_ZERO_ERROR;
1765     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1766     if (uten==NULL || U_FAILURE(status)) {
1767         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1768         return;
1769     }
1770 
1771     // test  uenum_next()
1772     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1773         status=U_ZERO_ERROR;
1774         pc=uenum_next(uten, &length, &status);
1775         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1776             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1777         }
1778     }
1779     status=U_ZERO_ERROR;
1780     if(uenum_next(uten, &length, &status)!=NULL) {
1781         errln("File %s, line %d, uenum_next(done)!=NULL");
1782     }
1783 
1784     // test the uenum_unext()
1785     uenum_reset(uten, &status);
1786     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1787         status=U_ZERO_ERROR;
1788         pu=uenum_unext(uten, &length, &status);
1789         s=UnicodeString(testEnumStrings[i], "");
1790         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1791             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1792         }
1793     }
1794     status=U_ZERO_ERROR;
1795     if(uenum_unext(uten, &length, &status)!=NULL) {
1796         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1797     }
1798 
1799     uenum_close(uten);
1800 }
1801 
1802 /*
1803  * Namespace test, to make sure that macros like UNICODE_STRING include the
1804  * namespace qualifier.
1805  *
1806  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1807  */
1808 namespace bogus {
1809     class UnicodeString {
1810     public:
1811         enum EInvariant { kInvariant };
UnicodeString()1812         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1813         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1814         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1815 ) : i(length) {}
1816     private:
1817         int32_t i;
1818     };
1819 }
1820 
1821 void
TestNameSpace()1822 UnicodeStringTest::TestNameSpace() {
1823     // Provoke name collision unless the UnicodeString macros properly
1824     // qualify the icu::UnicodeString class.
1825     using namespace bogus;
1826 
1827     // Use all UnicodeString macros from unistr.h.
1828     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1829     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1830     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1831 
1832     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1833     icu::UnicodeString s4=s1+s2+s3;
1834     if(s4.length()!=9) {
1835         errln("Something wrong with UnicodeString::operator+().");
1836     }
1837 }
1838 
1839 void
TestUTF32()1840 UnicodeStringTest::TestUTF32() {
1841     // Input string length US_STACKBUF_SIZE to cause overflow of the
1842     // initially chosen fStackBuffer due to supplementary characters.
1843     static const UChar32 utf32[] = {
1844         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1845         0x10000, 0x20000, 0xe0000, 0x10ffff
1846     };
1847     static const UChar expected_utf16[] = {
1848         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1849         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1850     };
1851     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1852     UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1853     if(from32 != expected) {
1854         errln("UnicodeString::fromUTF32() did not create the expected string.");
1855     }
1856 
1857     static const UChar utf16[] = {
1858         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1859     };
1860     static const UChar32 expected_utf32[] = {
1861         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1862     };
1863     UChar32 result32[16];
1864     UErrorCode errorCode = U_ZERO_ERROR;
1865     int32_t length32 =
1866         UnicodeString(false, utf16, UPRV_LENGTHOF(utf16)).
1867         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1868     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1869         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1870         result32[length32] != 0
1871     ) {
1872         errln("UnicodeString::toUTF32() did not create the expected string.");
1873     }
1874 }
1875 
1876 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1877 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1878     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1879             : CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
Flush()1880     virtual void Flush() override { calledFlush = true; }
1881     UBool calledFlush;
1882 };
1883 
1884 void
TestUTF8()1885 UnicodeStringTest::TestUTF8() {
1886     static const uint8_t utf8[] = {
1887         // Code points:
1888         // 0x41, 0xd900,
1889         // 0x61, 0xdc00,
1890         // 0x110000, 0x5a,
1891         // 0x50000, 0x7a,
1892         // 0x10000, 0x20000,
1893         // 0xe0000, 0x10ffff
1894         0x41, 0xed, 0xa4, 0x80,
1895         0x61, 0xed, 0xb0, 0x80,
1896         0xf4, 0x90, 0x80, 0x80, 0x5a,
1897         0xf1, 0x90, 0x80, 0x80, 0x7a,
1898         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1899         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1900     };
1901     static const UChar expected_utf16[] = {
1902         0x41, 0xfffd, 0xfffd, 0xfffd,
1903         0x61, 0xfffd, 0xfffd, 0xfffd,
1904         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1905         0xd900, 0xdc00, 0x7a,
1906         0xd800, 0xdc00, 0xd840, 0xdc00,
1907         0xdb40, 0xdc00, 0xdbff, 0xdfff
1908     };
1909     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1910     UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1911 
1912     if(from8 != expected) {
1913         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1914     }
1915     std::string utf8_string((const char *)utf8, sizeof(utf8));
1916     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1917     if(from8b != expected) {
1918         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1919     }
1920 
1921     static const UChar utf16[] = {
1922         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1923     };
1924     static const uint8_t expected_utf8[] = {
1925         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1926         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1927     };
1928     UnicodeString us(false, utf16, UPRV_LENGTHOF(utf16));
1929 
1930     char buffer[64];
1931     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1932     us.toUTF8(sink);
1933     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1934         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1935     ) {
1936         errln("UnicodeString::toUTF8() did not create the expected string.");
1937     }
1938     if(!sink.calledFlush) {
1939         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1940     }
1941     // Initial contents for testing that toUTF8String() appends.
1942     std::string result8 = "-->";
1943     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1944     // Use the return value just for testing.
1945     std::string &result8r = us.toUTF8String(result8);
1946     if(result8r != expected8 || &result8r != &result8) {
1947         errln("UnicodeString::toUTF8String() did not create the expected string.");
1948     }
1949 }
1950 
1951 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1952 static UnicodeString wrapUChars(const UChar *uchars) {
1953     return UnicodeString(true, uchars, -1);
1954 }
1955 
1956 void
TestReadOnlyAlias()1957 UnicodeStringTest::TestReadOnlyAlias() {
1958     UChar uchars[]={ 0x61, 0x62, 0 };
1959     UnicodeString alias(true, uchars, 2);
1960     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1961         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1962         return;
1963     }
1964     alias.truncate(1);
1965     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1966         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1967     }
1968     if(alias.getTerminatedBuffer()==uchars) {
1969         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1970               "did not allocate and copy as expected.");
1971     }
1972     if(uchars[1]!=0x62) {
1973         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1974               "modified the original buffer.");
1975     }
1976     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1977         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1978               "does not return a buffer terminated at the proper length.");
1979     }
1980 
1981     alias.setTo(true, uchars, 2);
1982     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1983         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1984         return;
1985     }
1986     alias.remove();
1987     if(alias.length()!=0) {
1988         errln("UnicodeString(read-only-alias).remove() did not work.");
1989     }
1990     if(alias.getTerminatedBuffer()==uchars) {
1991         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1992               "did not un-alias as expected.");
1993     }
1994     if(uchars[0]!=0x61) {
1995         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1996               "modified the original buffer.");
1997     }
1998     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1999         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
2000               "does not return a buffer terminated at length 0.");
2001     }
2002 
2003     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
2004     alias.setTo(false, longString.getBuffer(), longString.length());
2005     alias.remove(0, 10);
2006     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
2007         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
2008     }
2009     alias.setTo(false, longString.getBuffer(), longString.length());
2010     alias.remove(27, 99);
2011     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2012         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2013     }
2014     alias.setTo(false, longString.getBuffer(), longString.length());
2015     alias.retainBetween(6, 30);
2016     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2017         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2018     }
2019 
2020     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2021     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2022 
2023     UnicodeString temp;
2024     temp.fastCopyFrom(longString.tempSubString());
2025     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2026         errln("UnicodeString.tempSubString() failed");
2027     }
2028     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2029     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2030         errln("UnicodeString.tempSubString(-3, 5) failed");
2031     }
2032     temp.fastCopyFrom(longString.tempSubString(17));
2033     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2034         errln("UnicodeString.tempSubString(17) failed");
2035     }
2036     temp.fastCopyFrom(longString.tempSubString(99));
2037     if(!temp.isEmpty()) {
2038         errln("UnicodeString.tempSubString(99) failed");
2039     }
2040     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2041     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2042         errln("UnicodeString.tempSubStringBetween(6) failed");
2043     }
2044     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2045     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2046         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2047     }
2048     UnicodeString bogusString;
2049     bogusString.setToBogus();
2050     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2051     if(!temp.isBogus()) {
2052         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2053     }
2054 }
2055 
2056 void
doTestAppendable(UnicodeString & dest,Appendable & app)2057 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2058     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2059     static const UChar fg[3]={ 0x66, 0x67, 0 };
2060     if(!app.reserveAppendCapacity(12)) {
2061         errln("Appendable.reserve(12) failed");
2062     }
2063     app.appendCodeUnit(0x61);
2064     app.appendCodePoint(0x62);
2065     app.appendCodePoint(0x50000);
2066     app.appendString(cde, 3);
2067     app.appendString(fg, -1);
2068     UChar scratch[3];
2069     int32_t capacity=-1;
2070     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2071     if(capacity<3) {
2072         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2073         return;
2074     }
2075     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2076     u_memcpy(buffer, hij, 3);
2077     app.appendString(buffer, 3);
2078     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2079         errln("Appendable.append(...) failed");
2080     }
2081     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2082     if(buffer!=NULL || capacity!=0) {
2083         errln("Appendable.getAppendBuffer(min=0) failed");
2084     }
2085     capacity=1;
2086     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2087     if(buffer!=NULL || capacity!=0) {
2088         errln("Appendable.getAppendBuffer(scratch<min) failed");
2089     }
2090 }
2091 
2092 class SimpleAppendable : public Appendable {
2093 public:
SimpleAppendable(UnicodeString & dest)2094     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2095     virtual UBool appendCodeUnit(UChar c) override { str.append(c); return true; }
reset()2096     SimpleAppendable &reset() { str.remove(); return *this; }
2097 private:
2098     UnicodeString &str;
2099 };
2100 
2101 void
TestAppendable()2102 UnicodeStringTest::TestAppendable() {
2103     UnicodeString dest;
2104     SimpleAppendable app(dest);
2105     doTestAppendable(dest, app);
2106 }
2107 
2108 void
TestUnicodeStringImplementsAppendable()2109 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2110     UnicodeString dest;
2111     UnicodeStringAppendable app(dest);
2112     doTestAppendable(dest, app);
2113 }
2114 
2115 void
TestSizeofUnicodeString()2116 UnicodeStringTest::TestSizeofUnicodeString() {
2117     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2118     // See the API comments for UNISTR_OBJECT_SIZE.
2119     size_t sizeofUniStr=sizeof(UnicodeString);
2120     size_t expected=UNISTR_OBJECT_SIZE;
2121     if(expected!=sizeofUniStr) {
2122         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2123         // of the compiler might add more internal padding than expected.
2124         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2125               (int)sizeofUniStr, (int)expected);
2126     }
2127     if(sizeofUniStr<32) {
2128         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2129     }
2130     // We assume that the entire UnicodeString object,
2131     // minus the vtable pointer and 2 bytes for flags and short length,
2132     // is available for internal storage of UChars.
2133     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2134     UnicodeString s;
2135     const UChar *emptyBuffer=s.getBuffer();
2136     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2137         s.append((UChar)0x2e);
2138     }
2139     const UChar *fullBuffer=s.getBuffer();
2140     if(fullBuffer!=emptyBuffer) {
2141         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2142               expectedStackBufferLength);
2143     }
2144     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2145     if(terminatedBuffer==emptyBuffer) {
2146         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2147               expectedStackBufferLength);
2148     }
2149 }
2150 
2151 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2152 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2153     dest = std::move(src);
2154 }
2155 
2156 void
TestMoveSwap()2157 UnicodeStringTest::TestMoveSwap() {
2158     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2159     UnicodeString s1(false, abc, UPRV_LENGTHOF(abc));  // read-only alias
2160     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2161     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2162     const UChar *p = s2.getBuffer();
2163     s1.swap(s2);
2164     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2165         errln("UnicodeString.swap() did not swap");
2166     }
2167     swap(s2, s3);
2168     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2169         errln("swap(UnicodeString) did not swap back");
2170     }
2171     UnicodeString s4;
2172     s4 = std::move(s1);
2173     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2174         errln("UnicodeString = std::move(heap) did not move");
2175     }
2176     UnicodeString s5;
2177     s5 = std::move(s2);
2178     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2179         errln("UnicodeString = std::move(stack) did not move");
2180     }
2181     UnicodeString s6;
2182     s6 = std::move(s3);
2183     if(s6.getBuffer() != abc || s6.length() != 3) {
2184         errln("UnicodeString = std::move(alias) did not move");
2185     }
2186     infoln("TestMoveSwap() with rvalue references");
2187     s1 = static_cast<UnicodeString &&>(s6);
2188     if(s1.getBuffer() != abc || s1.length() != 3) {
2189         errln("UnicodeString move assignment operator did not move");
2190     }
2191     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2192     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2193         errln("UnicodeString move constructor did not move");
2194     }
2195 
2196     // Move self assignment leaves the object valid but in an undefined state.
2197     // Do it to make sure there is no crash,
2198     // but do not check for any particular resulting value.
2199     moveFrom(s1, s1);
2200     moveFrom(s2, s2);
2201     moveFrom(s3, s3);
2202     moveFrom(s4, s4);
2203     moveFrom(s5, s5);
2204     moveFrom(s6, s6);
2205     moveFrom(s7, s7);
2206     // Simple copy assignment must work.
2207     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2208     s1 = s6 = s4 = s7 = simple;
2209     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2210         errln("UnicodeString copy after self-move did not work");
2211     }
2212 }
2213 
2214 void
TestUInt16Pointers()2215 UnicodeStringTest::TestUInt16Pointers() {
2216     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2217     uint16_t arr[4];
2218 
2219     UnicodeString expected(u"abc");
2220     assertEquals("abc from pointer", expected, UnicodeString(carr));
2221     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2222     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2223 
2224     UnicodeString alias(arr, 0, 4);
2225     alias.append(u'a').append(u'b').append(u'c');
2226     assertEquals("abc from writable alias", expected, alias);
2227     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2228 
2229     UErrorCode errorCode = U_ZERO_ERROR;
2230     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2231     assertSuccess(WHERE, errorCode);
2232     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2233 }
2234 
2235 void
TestWCharPointers()2236 UnicodeStringTest::TestWCharPointers() {
2237 #if U_SIZEOF_WCHAR_T==2
2238     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2239     wchar_t arr[4];
2240 
2241     UnicodeString expected(u"abc");
2242     assertEquals("abc from pointer", expected, UnicodeString(carr));
2243     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2244     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2245 
2246     UnicodeString alias(arr, 0, 4);
2247     alias.append(u'a').append(u'b').append(u'c');
2248     assertEquals("abc from writable alias", expected, alias);
2249     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2250 
2251     UErrorCode errorCode = U_ZERO_ERROR;
2252     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2253     assertSuccess(WHERE, errorCode);
2254     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2255 #endif
2256 }
2257 
2258 void
TestNullPointers()2259 UnicodeStringTest::TestNullPointers() {
2260     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2261     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2262     assertTrue("empty from read-only-alias nullptr", UnicodeString(true, nullptr, 3).isEmpty());
2263 
2264     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2265     assertTrue("empty from writable alias", alias.isEmpty());
2266     alias.append(u'a').append(u'b').append(u'c');
2267     UnicodeString expected(u"abc");
2268     assertEquals("abc from writable alias", expected, alias);
2269 
2270     UErrorCode errorCode = U_ZERO_ERROR;
2271     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2272     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2273 }
2274 
TestUnicodeStringInsertAppendToSelf()2275 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2276     IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2277 
2278     // Test append operation
2279     UnicodeString str(u"foo ");
2280     str.append(str);
2281     str.append(str);
2282     str.append(str);
2283     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2284 
2285     // Test append operation with readonly alias to start
2286     str = UnicodeString(true, u"foo ", 4);
2287     str.append(str);
2288     str.append(str);
2289     str.append(str);
2290     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2291 
2292     // Test append operation with aliased substring
2293     str = u"abcde";
2294     UnicodeString sub = str.tempSubString(1, 2);
2295     str.append(sub);
2296     assertEquals("", u"abcdebc", str);
2297 
2298     // Test append operation with double-aliased substring
2299     str = UnicodeString(true, u"abcde", 5);
2300     sub = str.tempSubString(1, 2);
2301     str.append(sub);
2302     assertEquals("", u"abcdebc", str);
2303 
2304     // Test insert operation
2305     str = u"a-*b";
2306     str.insert(2, str);
2307     str.insert(4, str);
2308     str.insert(8, str);
2309     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2310 
2311     // Test insert operation with readonly alias to start
2312     str = UnicodeString(true, u"a-*b", 4);
2313     str.insert(2, str);
2314     str.insert(4, str);
2315     str.insert(8, str);
2316     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2317 
2318     // Test insert operation with aliased substring
2319     str = u"abcde";
2320     sub = str.tempSubString(1, 3);
2321     str.insert(2, sub);
2322     assertEquals("", u"abbcdcde", str);
2323 
2324     // Test insert operation with double-aliased substring
2325     str = UnicodeString(true, u"abcde", 5);
2326     sub = str.tempSubString(1, 3);
2327     str.insert(2, sub);
2328     assertEquals("", u"abbcdcde", str);
2329 }
2330 
TestLargeAppend()2331 void UnicodeStringTest::TestLargeAppend() {
2332     if(quick) return;
2333 
2334     IcuTestErrorCode status(*this, "TestLargeAppend");
2335     // Make a large UnicodeString
2336     int32_t len = 0xAFFFFFF;
2337     UnicodeString str;
2338     char16_t *buf = str.getBuffer(len);
2339     // A fast way to set buffer to valid Unicode.
2340     // 4E4E is a valid unicode character
2341     uprv_memset(buf, 0x4e, len * 2);
2342     str.releaseBuffer(len);
2343     UnicodeString dest;
2344     // Append it 16 times
2345     // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2346     // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2347     int64_t total = 0;
2348     for (int32_t i = 0; i < 16; i++) {
2349         dest.append(str);
2350         total += len;
2351         if (total <= INT32_MAX) {
2352             assertFalse("dest is not bogus", dest.isBogus());
2353         } else {
2354             assertTrue("dest should be bogus", dest.isBogus());
2355         }
2356     }
2357     dest.remove();
2358     total = 0;
2359     for (int32_t i = 0; i < 16; i++) {
2360         dest.append(str);
2361         total += len;
2362         if (total + len <= INT32_MAX) {
2363             assertFalse("dest is not bogus", dest.isBogus());
2364         } else if (total <= INT32_MAX) {
2365             // Check that a string of exactly the maximum size works
2366             UnicodeString str2;
2367             int32_t remain = static_cast<int32_t>(INT32_MAX - total);
2368             char16_t *buf2 = str2.getBuffer(remain);
2369             if (buf2 == nullptr) {
2370                 // if somehow memory allocation fail, return the test
2371                 return;
2372             }
2373             uprv_memset(buf2, 0x4e, remain * 2);
2374             str2.releaseBuffer(remain);
2375             dest.append(str2);
2376             total += remain;
2377             assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2378             assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2379             assertFalse("dest is not bogus", dest.isBogus());
2380 
2381             // Check that a string size+1 goes bogus
2382             str2.truncate(1);
2383             dest.append(str2);
2384             total++;
2385             assertTrue("dest should be bogus", dest.isBogus());
2386         } else {
2387             assertTrue("dest should be bogus", dest.isBogus());
2388         }
2389     }
2390 }
2391