• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include <utility>
10 
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24 
25 #if 0
26 #include "unicode/ustream.h"
27 
28 #include <iostream>
29 using namespace std;
30 
31 #endif
32 
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34 
35 extern IntlTest *createStringCaseTest();
36 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39     if (exec) logln("TestSuite UnicodeStringTest: ");
40     TESTCASE_AUTO_BEGIN;
41     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42     TESTCASE_AUTO(TestBasicManipulation);
43     TESTCASE_AUTO(TestCompare);
44     TESTCASE_AUTO(TestExtract);
45     TESTCASE_AUTO(TestRemoveReplace);
46     TESTCASE_AUTO(TestSearching);
47     TESTCASE_AUTO(TestSpacePadding);
48     TESTCASE_AUTO(TestPrefixAndSuffix);
49     TESTCASE_AUTO(TestFindAndReplace);
50     TESTCASE_AUTO(TestBogus);
51     TESTCASE_AUTO(TestReverse);
52     TESTCASE_AUTO(TestMiscellaneous);
53     TESTCASE_AUTO(TestStackAllocation);
54     TESTCASE_AUTO(TestUnescape);
55     TESTCASE_AUTO(TestCountChar32);
56     TESTCASE_AUTO(TestStringEnumeration);
57     TESTCASE_AUTO(TestNameSpace);
58     TESTCASE_AUTO(TestUTF32);
59     TESTCASE_AUTO(TestUTF8);
60     TESTCASE_AUTO(TestReadOnlyAlias);
61     TESTCASE_AUTO(TestAppendable);
62     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63     TESTCASE_AUTO(TestSizeofUnicodeString);
64     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65     TESTCASE_AUTO(TestMoveSwap);
66     TESTCASE_AUTO(TestUInt16Pointers);
67     TESTCASE_AUTO(TestWCharPointers);
68     TESTCASE_AUTO(TestNullPointers);
69     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70     TESTCASE_AUTO(TestLargeAppend);
71     TESTCASE_AUTO_END;
72 }
73 
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78     UnicodeString   expectedValue;
79     UnicodeString   *c;
80 
81     c=test1.clone();
82     test1.insert(24, "good ");
83     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84     if (test1 != expectedValue)
85         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86 
87     c->insert(24, "good ");
88     if(*c != expectedValue) {
89         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90     }
91     delete c;
92 
93     test1.remove(41, 8);
94     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95     if (test1 != expectedValue)
96         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97 
98     test1.replace(58, 6, "ir country");
99     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100     if (test1 != expectedValue)
101         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102 
103     UChar     temp[80];
104     test1.extract(0, 15, temp);
105 
106     UnicodeString       test2(temp, 15);
107 
108     expectedValue = "Now is the time";
109     if (test2 != expectedValue)
110         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111 
112     test2 += " for me to go!\n";
113     expectedValue = "Now is the time for me to go!\n";
114     if (test2 != expectedValue)
115         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116 
117     if (test1.length() != 70)
118         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119     if (test2.length() != 30)
120         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121 
122     UnicodeString test3;
123     test3.append((UChar32)0x20402);
124     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126     }
127     if(test3.length() != 2){
128         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129     }
130     test3.append((UChar32)0x0074);
131     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133     }
134     if(test3.length() != 3){
135         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136     }
137 
138     // test some UChar32 overloads
139     if( test3.setTo((UChar32)0x10330).length() != 2 ||
140         test3.insert(0, (UChar32)0x20100).length() != 4 ||
141         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142         (test3 = (UChar32)0x14001).length() != 2
143     ) {
144         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145     }
146 
147     {
148         // test moveIndex32()
149         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150 
151         if(
152             s.moveIndex32(2, -1)!=0 ||
153             s.moveIndex32(2, 1)!=4 ||
154             s.moveIndex32(2, 2)!=5 ||
155             s.moveIndex32(5, -2)!=2 ||
156             s.moveIndex32(0, -1)!=0 ||
157             s.moveIndex32(6, 1)!=6
158         ) {
159             errln("UnicodeString::moveIndex32() failed");
160         }
161 
162         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163             errln("UnicodeString::getChar32Start() failed");
164         }
165 
166         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167             errln("UnicodeString::getChar32Limit() failed");
168         }
169     }
170 
171     {
172         // test new 2.2 constructors and setTo function that parallel Java's substring function.
173         UnicodeString src("Hello folks how are you?");
174         UnicodeString target1("how are you?");
175         if (target1 != UnicodeString(src, 12)) {
176             errln("UnicodeString(const UnicodeString&, int32_t) failed");
177         }
178         UnicodeString target2("folks");
179         if (target2 != UnicodeString(src, 6, 5)) {
180             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181         }
182         if (target1 != target2.setTo(src, 12)) {
183             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184         }
185     }
186 
187     {
188         // op+ is new in ICU 2.8
189         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190         if(s!=UnicodeString("abcdefghi", "")) {
191             errln("operator+(UniStr, UniStr) failed");
192         }
193     }
194 
195     {
196         // tests for Jitterbug 2360
197         // verify that APIs with source pointer + length accept length == -1
198         // mostly test only where modified, only few functions did not already do this
199         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201         }
202 
203         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
204         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205 
206         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208         }
209         if(t.length()!=u_strlen(buffer)) {
210             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211         }
212 
213         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215         }
216         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218         }
219 
220         buffer[u_strlen(buffer)]=0xe4;
221         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224         }
225         if(u.length()!=UPRV_LENGTHOF(buffer)) {
226             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227         }
228 
229         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230         UConverter *cnv;
231         UErrorCode errorCode=U_ZERO_ERROR;
232 
233         cnv=ucnv_open("ISO-8859-1", &errorCode);
234         UnicodeString v(cs, -1, cnv, errorCode);
235         ucnv_close(cnv);
236         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238         }
239     }
240 
241 #if U_CHARSET_IS_UTF8
242     {
243         // Test the hardcoded-UTF-8 UnicodeString optimizations.
244         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246         UnicodeString from8a = UnicodeString((const char *)utf8);
247         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
249         if(from8a != from16 || from8b != from16) {
250             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251         }
252         char buffer[16];
253         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256         }
257         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260         }
261     }
262 #endif
263 }
264 
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268     UnicodeString   test1("this is a test");
269     UnicodeString   test2("this is a test");
270     UnicodeString   test3("this is a test of the emergency broadcast system");
271     UnicodeString   test4("never say, \"this is a test\"!!");
272 
273     UnicodeString   test5((UChar)0x5000);
274     UnicodeString   test6((UChar)0x5100);
275 
276     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278     char            chars[] = "this is a test";
279 
280     // test operator== and operator!=
281     if (test1 != test2 || test1 == test3 || test1 == test4)
282         errln("operator== or operator!= failed");
283 
284     // test operator> and operator<
285     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286         !(test5 < test6)
287     ) {
288         errln("operator> or operator< failed");
289     }
290 
291     // test operator>= and operator<=
292     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293         errln("operator>= or operator<= failed");
294 
295     // test compare(UnicodeString)
296     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297         errln("compare(UnicodeString) failed");
298 
299     //test compare(offset, length, UnicodeString)
300     if(test1.compare(0, 14, test2) != 0 ||
301         test3.compare(0, 14, test2) != 0 ||
302         test4.compare(12, 14, test2) != 0 ||
303         test3.compare(0, 18, test1) <=0  )
304         errln("compare(offset, length, UnicodeString) fails");
305 
306     // test compare(UChar*)
307     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308         errln("compare(UChar*) failed");
309 
310     // test compare(char*)
311     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312         errln("compare(char*) failed");
313 
314     // test compare(UChar*, length)
315     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316         errln("compare(UChar*, length) failed");
317 
318     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319     if (test1.compare(0, 14, test2, 0, 14) != 0
320     || test1.compare(0, 14, test3, 0, 14) != 0
321     || test1.compare(0, 14, test4, 12, 14) != 0)
322         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323 
324     if (test1.compare(10, 4, test2, 0, 4) >= 0
325     || test1.compare(10, 4, test3, 22, 9) <= 0
326     || test1.compare(10, 4, test4, 22, 4) != 0)
327         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328 
329     // test compareBetween
330     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332         errln("compareBetween failed");
333 
334     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336         errln("compareBetween failed");
337 
338     // test compare() etc. with strings that share a buffer but are not equal
339     test2=test1; // share the buffer, length() too large for the stackBuffer
340     test2.truncate(1); // change only the length, not the buffer
341     if( test1==test2 || test1<=test2 ||
342         test1.compare(test2)<=0 ||
343         test1.compareCodePointOrder(test2)<=0 ||
344         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348     ) {
349         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350     }
351 
352     /* test compareCodePointOrder() */
353     {
354         /* these strings are in ascending order */
355         static const UChar strings[][4]={
356             { 0x61, 0 },                    /* U+0061 */
357             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
358             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
359             { 0xd800, 0 },                  /* U+d800 */
360             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
361             { 0xdfff, 0 },                  /* U+dfff */
362             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
363             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
364             { 0xd800, 0xdc02, 0 },          /* U+10002 */
365             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
366         };
367         UnicodeString u[20]; // must be at least as long as strings[]
368         int32_t i;
369 
370         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371             u[i]=UnicodeString(TRUE, strings[i], -1);
372         }
373 
374         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377             }
378         }
379     }
380 
381     /* test caseCompare() */
382     {
383         static const UChar
384         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
385         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388 
389         UnicodeString
390             mixed(TRUE, _mixed, -1),
391             otherDefault(TRUE, _otherDefault, -1),
392             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
393             different(TRUE, _different, -1);
394 
395         int8_t result;
396 
397         /* test caseCompare() */
398         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401         }
402         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403         if(result!=0) {
404             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405         }
406         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409         }
410 
411         /* test caseCompare() */
412         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413         if(result<=0) {
414             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415         }
416 
417         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421         }
422 
423         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425         if(result<=0) {
426             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427         }
428     }
429 
430     // test that srcLength=-1 is handled in functions that
431     // take input const UChar */int32_t srcLength (j785)
432     {
433         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435 
436         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438         }
439 
440         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442         }
443 
444         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446         }
447 
448         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450         }
451 
452         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454         }
455 
456         UnicodeString s2, s3;
457         s2.replace(0, 0, u+1, -1);
458         s3.replace(0, 0, u, 1, -1);
459         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461         }
462     }
463 }
464 
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
469     UnicodeString  test2;
470     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472     UnicodeString  test5;
473     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474 
475     test1.extract(11, 12, test2);
476     test1.extract(11, 12, test3);
477     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479     }
480 
481     // test proper pinning in extractBetween()
482     test1.extractBetween(-3, 7, test5);
483     if(test5!=UNICODE_STRING("Now is ", 7)) {
484         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485     }
486 
487     test1.extractBetween(11, 23, test5);
488     if (test1.extract(60, 71, test6) != 9) {
489         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490     }
491     if (test1.extract(11, 12, test6) != 12) {
492         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493     }
494 
495     // convert test4 back to Unicode for comparison
496     UnicodeString test4b(test4, 12);
497 
498     if (test1.extract(11, 12, (char *)NULL) != 12) {
499         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500     }
501     if (test1.extract(11, -1, test6) != 0) {
502         errln("UnicodeString.extract(-1) failed to stop reading the string.");
503     }
504 
505     for (int32_t i = 0; i < 12; i++) {
506         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508             break;
509         }
510         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512             break;
513         }
514         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515             errln(UnicodeString("extracting into an array of char failed at position ") + i);
516             break;
517         }
518         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520             break;
521         }
522     }
523 
524     // test preflighting and overflows with invariant conversion
525     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527     }
528 
529     test4[2] = (char)0xff;
530     if (test1.extract(0, 10, test4, 2, "") != 10) {
531         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532     }
533     if (test4[2] != (char)0xff) {
534         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535     }
536 
537     {
538         // test new, NUL-terminating extract() function
539         UnicodeString s("terminate", "");
540         UChar dest[20]={
541             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543         };
544         UErrorCode errorCode;
545         int32_t length;
546 
547         errorCode=U_ZERO_ERROR;
548         length=s.extract((UChar *)NULL, 0, errorCode);
549         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551         }
552 
553         errorCode=U_ZERO_ERROR;
554         length=s.extract(dest, s.length()-1, errorCode);
555         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557                 length, u_errorName(errorCode), s.length());
558         }
559 
560         errorCode=U_ZERO_ERROR;
561         length=s.extract(dest, s.length(), errorCode);
562         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564                 length, u_errorName(errorCode), s.length());
565         }
566         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568         }
569 
570         errorCode=U_ZERO_ERROR;
571         length=s.extract(dest, s.length()+1, errorCode);
572         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574                 length, u_errorName(errorCode), s.length());
575         }
576         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578         }
579     }
580 
581     {
582         // test new UConverter extract() and constructor
583         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584         char buffer[32];
585         static const char expect[]={
586             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588             (char)0xc3, (char)0x84,
589             (char)0xe1, (char)0xbb, (char)0x90
590         };
591         UErrorCode errorCode=U_ZERO_ERROR;
592         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593         int32_t length;
594 
595         if(U_SUCCESS(errorCode)) {
596             // test preflighting
597             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598                 errorCode!=U_BUFFER_OVERFLOW_ERROR
599             ) {
600                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601                       length, u_errorName(errorCode));
602             }
603             errorCode=U_ZERO_ERROR;
604             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605                 errorCode!=U_BUFFER_OVERFLOW_ERROR
606             ) {
607                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608                       length, u_errorName(errorCode));
609             }
610 
611             // try error cases
612             errorCode=U_ZERO_ERROR;
613             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615             }
616             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619             }
620             errorCode=U_ZERO_ERROR;
621 
622             // extract for real
623             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624                 uprv_memcmp(buffer, expect, 13)!=0 ||
625                 buffer[13]!=0 ||
626                 U_FAILURE(errorCode)
627             ) {
628                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629                       length, u_errorName(errorCode));
630             }
631             // Test again with just the converter name.
632             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633                 uprv_memcmp(buffer, expect, 13)!=0 ||
634                 buffer[13]!=0 ||
635                 U_FAILURE(errorCode)
636             ) {
637                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638                       length, u_errorName(errorCode));
639             }
640 
641             // try the constructor
642             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643             if(U_FAILURE(errorCode) || s!=t) {
644                 errln("UnicodeString(UConverter) conversion failed (%s)",
645                       u_errorName(errorCode));
646             }
647 
648             ucnv_close(cnv);
649         }
650     }
651 }
652 
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656     UnicodeString   test1("The rain in Spain stays mainly on the plain");
657     UnicodeString   test2("eat SPAMburgers!");
658     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659     char            test4[] = "SPAM";
660     UnicodeString&  test5 = test1;
661 
662     test1.replace(4, 4, test2, 4, 4);
663     test1.replace(12, 5, test3, 4);
664     test3[4] = 0;
665     test1.replace(17, 4, test3);
666     test1.replace(23, 4, test4);
667     test1.replaceBetween(37, 42, test2, 4, 8);
668 
669     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670         errln("One of the replace methods failed:\n"
671               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672               "  got \"" + test1 + "\"");
673 
674     test1.remove(21, 1);
675     test1.removeBetween(26, 28);
676 
677     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678         errln("One of the remove methods failed:\n"
679               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680               "  got \"" + test1 + "\"");
681 
682     for (int32_t i = 0; i < test1.length(); i++) {
683         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684             test1.setCharAt(i, 0x78);
685         }
686     }
687 
688     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689         errln("One of the remove methods failed:\n"
690               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691               "  got \"" + test1 + "\"");
692 
693     test1.remove();
694     if (test1.length() != 0)
695         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697 
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701     UnicodeString test1("test test ttest tetest testesteststt");
702     UnicodeString test2("test");
703     UChar testChar = 0x74;
704 
705     UChar32 testChar32 = 0x20402;
706     UChar testData[]={
707         //   0       1       2       3       4       5       6       7
708         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709 
710         //   8       9      10      11      12      13      14      15
711         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712 
713         //  16      17      18      19
714         0xdc02, 0xd841, 0x0073, 0x0000
715     };
716     UnicodeString test3(testData);
717     UnicodeString test4(testChar32);
718 
719     uint16_t occurrences = 0;
720     int32_t startPos = 0;
721     for ( ;
722           startPos != -1 && startPos < test1.length();
723           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724         ;
725     if (occurrences != 6)
726         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727 
728     for ( occurrences = 0, startPos = 10;
729           startPos != -1 && startPos < test1.length();
730           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731         ;
732     if (occurrences != 4)
733         errln(UnicodeString("indexOf with starting offset failed: "
734                             "expected to find 4 occurrences, found ") + occurrences);
735 
736     int32_t endPos = 28;
737     for ( occurrences = 0, startPos = 5;
738           startPos != -1 && startPos < test1.length();
739           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740         ;
741     if (occurrences != 4)
742         errln(UnicodeString("indexOf with starting and ending offsets failed: "
743                             "expected to find 4 occurrences, found ") + occurrences);
744 
745     //using UChar32 string
746     for ( startPos=0, occurrences=0;
747           startPos != -1 && startPos < test3.length();
748           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749         ;
750     if (occurrences != 4)
751         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752 
753     for ( startPos=10, occurrences=0;
754           startPos != -1 && startPos < test3.length();
755           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756         ;
757     if (occurrences != 2)
758         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759     //---
760 
761     for ( occurrences = 0, startPos = 0;
762           startPos != -1 && startPos < test1.length();
763           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764         ;
765     if (occurrences != 16)
766         errln(UnicodeString("indexOf with character failed: "
767                             "expected to find 16 occurrences, found ") + occurrences);
768 
769     for ( occurrences = 0, startPos = 10;
770           startPos != -1 && startPos < test1.length();
771           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772         ;
773     if (occurrences != 12)
774         errln(UnicodeString("indexOf with character & start offset failed: "
775                             "expected to find 12 occurrences, found ") + occurrences);
776 
777     for ( occurrences = 0, startPos = 5, endPos = 28;
778           startPos != -1 && startPos < test1.length();
779           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780         ;
781     if (occurrences != 10)
782         errln(UnicodeString("indexOf with character & start & end offsets failed: "
783                             "expected to find 10 occurrences, found ") + occurrences);
784 
785     //testing for UChar32
786     UnicodeString subString;
787     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788         subString.append(test3, startPos, test3.length());
789         if(subString.indexOf(testChar32) != -1 ){
790              ++occurrences;
791         }
792         subString.remove();
793     }
794     if (occurrences != 14)
795         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796 
797     for ( occurrences = 0, startPos = 0;
798           startPos != -1 && startPos < test3.length();
799           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800         ;
801     if (occurrences != 4)
802         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803 
804     endPos=test3.length();
805     for ( occurrences = 0, startPos = 5;
806           startPos != -1 && startPos < test3.length();
807           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808         ;
809     if (occurrences != 3)
810         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811     //---
812 
813     if(test1.lastIndexOf(test2)!=29) {
814         errln("test1.lastIndexOf(test2)!=29");
815     }
816 
817     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818         errln("test1.lastIndexOf(test2, start) failed");
819     }
820 
821     for ( occurrences = 0, startPos = 32;
822           startPos != -1;
823           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824         ;
825     if (occurrences != 4)
826         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827                             "expected to find 4 occurrences, found ") + occurrences);
828 
829     for ( occurrences = 0, startPos = 32;
830           startPos != -1;
831           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832         ;
833     if (occurrences != 11)
834         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835                             "expected to find 11 occurrences, found ") + occurrences);
836 
837     //testing UChar32
838     startPos=test3.length();
839     for ( occurrences = 0;
840           startPos != -1;
841           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842         ;
843     if (occurrences != 3)
844         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845 
846 
847     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
848         subString.remove();
849         subString.append(test3, 0, endPos);
850         if(subString.lastIndexOf(testChar32) != -1 ){
851             ++occurrences;
852         }
853     }
854     if (occurrences != 18)
855         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856     //---
857 
858     // test that indexOf(UChar32) and lastIndexOf(UChar32)
859     // do not find surrogate code points when they are part of matched pairs
860     // (= part of supplementary code points)
861     // Jitterbug 1542
862     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864     }
865     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868     ) {
869         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870     }
871 }
872 
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876     UnicodeString test1("hello");
877     UnicodeString test2("   there");
878     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
879     UnicodeString test4;
880     UBool returnVal;
881     UnicodeString expectedValue;
882 
883     returnVal = test1.padLeading(15);
884     expectedValue = "          hello";
885     if (returnVal == FALSE || test1 != expectedValue)
886         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887 
888     returnVal = test2.padTrailing(15);
889     expectedValue = "   there       ";
890     if (returnVal == FALSE || test2 != expectedValue)
891         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892 
893     expectedValue = test3;
894     returnVal = test3.padTrailing(15);
895     if (returnVal == TRUE || test3 != expectedValue)
896         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897 
898     expectedValue = "hello";
899     test4.setTo(test1).trim();
900 
901     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902         errln("trim(UnicodeString&) failed");
903 
904     test1.trim();
905     if (test1 != expectedValue)
906         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907 
908     test2.trim();
909     expectedValue = "there";
910     if (test2 != expectedValue)
911         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912 
913     test3.trim();
914     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
915     if (test3 != expectedValue)
916         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917 
918     returnVal = test1.truncate(15);
919     expectedValue = "hello";
920     if (returnVal == TRUE || test1 != expectedValue)
921         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922 
923     returnVal = test2.truncate(15);
924     expectedValue = "there";
925     if (returnVal == TRUE || test2 != expectedValue)
926         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927 
928     returnVal = test3.truncate(15);
929     expectedValue = "Hi!  How ya doi";
930     if (returnVal == FALSE || test3 != expectedValue)
931         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933 
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938     UnicodeString test2("Now");
939     UnicodeString test3("country.");
940     UnicodeString test4("count");
941 
942     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944     }
945 
946     if (test1.startsWith(test3) ||
947         test1.startsWith(test3.getBuffer(), test3.length()) ||
948         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949     ) {
950         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951     }
952 
953     if (test1.endsWith(test2)) {
954         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955     }
956 
957     if (!test1.endsWith(test3)) {
958         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959     }
960     if (!test1.endsWith(test3, 0, INT32_MAX)) {
961         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962     }
963 
964     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966     }
967     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969     }
970 
971     if (!test3.startsWith(test4)) {
972         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973     }
974 
975     if (test4.startsWith(test3)) {
976         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977     }
978 }
979 
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982     UnicodeString test("abcde");
983     const UChar ab[] = { 0x61, 0x62, 0 };
984     const UChar de[] = { 0x64, 0x65, 0 };
985     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990 
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994     UnicodeString test1("One potato, two potato, three potato, four\n");
995     UnicodeString test2("potato");
996     UnicodeString test3("MISSISSIPPI");
997 
998     UnicodeString expectedValue;
999 
1000     test1.findAndReplace(test2, test3);
1001     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002     if (test1 != expectedValue)
1003         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004     test1.findAndReplace(2, 32, test3, test2);
1005     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006     if (test1 != expectedValue)
1007         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009 
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013     UnicodeString test("backwards words say to used I");
1014 
1015     test.reverse();
1016     test.reverse(2, 4);
1017     test.reverse(7, 2);
1018     test.reverse(10, 3);
1019     test.reverse(14, 5);
1020     test.reverse(20, 9);
1021 
1022     if (test != "I used to say words backwards")
1023         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1024             + test + "\"");
1025 
1026     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027     test.reverse();
1028     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029         errln("reverse() failed with supplementary characters");
1030     }
1031 
1032     // Test case for ticket #8091:
1033     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034     // an odd-length string that contains no other lead surrogates.
1035     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037     test.reverse();
1038     if(test!=expected) {
1039         errln("reverse() failed with only lead surrogate in the middle");
1040     }
1041 }
1042 
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046     UnicodeString   test1("This is a test");
1047     UnicodeString   test2("This is a test");
1048     UnicodeString   test3("Me too!");
1049 
1050     // test getBuffer(minCapacity) and releaseBuffer()
1051     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052     UChar *p=test1.getBuffer(20);
1053     if(test1.getCapacity()<20) {
1054         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055     }
1056 
1057     test1.append((UChar)7); // must not be able to modify the string here
1058     test1.setCharAt(3, 7);
1059     test1.reverse();
1060     if( test1.length()!=0 ||
1061         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063     ) {
1064         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065     }
1066 
1067     p[0]=1;
1068     p[1]=2;
1069     p[2]=3;
1070     test1.releaseBuffer(3);
1071     test1.append((UChar)4);
1072 
1073     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075     }
1076 
1077     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078     test1.releaseBuffer(1);
1079     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081     }
1082 
1083     // test getBuffer(const)
1084     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085     if( test1.length()!=4 ||
1086         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088     ) {
1089         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090     }
1091 
1092     // test releaseBuffer() with a NUL-terminated buffer
1093     test1.getBuffer(20)[2]=0;
1094     test1.releaseBuffer(); // implicit -1
1095     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097     }
1098 
1099     // test releaseBuffer() with a non-NUL-terminated buffer
1100     p=test1.getBuffer(256);
1101     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1103     }
1104     test1.releaseBuffer();  // implicit -1
1105     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107     }
1108 
1109     // test getTerminatedBuffer()
1110     test1=UnicodeString("This is another test.", "");
1111     test2=UnicodeString("This is another test.", "");
1112     q=test1.getTerminatedBuffer();
1113     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114         errln("getTerminatedBuffer()[length]!=0");
1115     }
1116 
1117     const UChar u[]={ 5, 6, 7, 8, 0 };
1118     test1.setTo(FALSE, u, 3);
1119     q=test1.getTerminatedBuffer();
1120     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122     }
1123 
1124     test1.setTo(TRUE, u, -1);
1125     q=test1.getTerminatedBuffer();
1126     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128     }
1129 
1130     // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131     // as u" lila", offset by 3 code units
1132     test1=UnicodeString(TRUE, u"la", 2);
1133     test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134     assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135         u"la lila", test1);
1136 
1137     test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1138     assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139         u"la dudum lila", test1);
1140 
1141     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142     test1.insert(9, ucs, -1);
1143     assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144         u"la dudum hm lila", test1);
1145 
1146     test1.replace(9, 2, (UChar)0x2b);
1147     assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148         u"la dudum + lila", test1);
1149 
1150     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151         errln("UnicodeString::hasMetaData() returns TRUE");
1152     }
1153 
1154     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156     test1.truncate(36);  // ensure length()<getCapacity()
1157     test2=test1;  // share the buffer
1158     test1.truncate(5);
1159     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160         errln("UnicodeString(shared buffer).truncate() failed");
1161     }
1162     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164               "modified another copy of the string!");
1165     }
1166     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167     test1.truncate(36);  // ensure length()<getCapacity()
1168     test2=test1;  // share the buffer
1169     test1.remove();
1170     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171         errln("UnicodeString(shared buffer).remove() failed");
1172     }
1173     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175               "modified another copy of the string!");
1176     }
1177 
1178     // ticket #9740
1179     test1.setTo(TRUE, ucs, 3);
1180     assertEquals("length of read-only alias", 3, test1.length());
1181     test1.trim();
1182     assertEquals("length of read-only alias after trim()", 2, test1.length());
1183     assertEquals("length of terminated buffer of read-only alias + trim()",
1184                  2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186 
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190     UChar           testString[] ={
1191         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192     UChar           guardWord = 0x4DED;
1193     UnicodeString*  test = 0;
1194 
1195     test = new  UnicodeString(testString);
1196     if (*test != "This is a crazy test.")
1197         errln("Test string failed to initialize properly.");
1198     if (guardWord != 0x04DED)
1199         errln("Test string initialization overwrote guard word!");
1200 
1201     test->insert(8, "only ");
1202     test->remove(15, 6);
1203     if (*test != "This is only a test.")
1204         errln("Manipulation of test string failed to work right.");
1205     if (guardWord != 0x4DED)
1206         errln("Manipulation of test string overwrote guard word!");
1207 
1208     // we have to deinitialize and release the backing store by calling the destructor
1209     // explicitly, since we can't overload operator delete
1210     delete test;
1211 
1212     UChar workingBuffer[] = {
1213         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218     UChar guardWord2 = 0x4DED;
1219 
1220     test = new UnicodeString(workingBuffer, 35, 100);
1221     if (*test != "Now is the time for all men to come")
1222         errln("Stack-allocated backing store failed to initialize correctly.");
1223     if (guardWord2 != 0x4DED)
1224         errln("Stack-allocated backing store overwrote guard word!");
1225 
1226     test->insert(24, "good ");
1227     if (*test != "Now is the time for all good men to come")
1228         errln("insert() on stack-allocated UnicodeString didn't work right");
1229     if (guardWord2 != 0x4DED)
1230         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231 
1232     if (workingBuffer[24] != 0x67)
1233         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234 
1235     *test += " to the aid of their country.";
1236     if (*test != "Now is the time for all good men to come to the aid of their country.")
1237         errln("Stack-allocated UnicodeString overflow didn't work");
1238     if (guardWord2 != 0x4DED)
1239         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240 
1241     *test = "ha!";
1242     if (*test != "ha!")
1243         errln("Assignment to stack-allocated UnicodeString didn't work");
1244     if (workingBuffer[0] != 0x4e)
1245         errln("Change to UnicodeString after overflow are still affecting original buffer");
1246     if (guardWord2 != 0x4DED)
1247         errln("Change to UnicodeString after overflow overwrote guard word!");
1248 
1249     // test read-only aliasing with setTo()
1250     workingBuffer[0] = 0x20ac;
1251     workingBuffer[1] = 0x125;
1252     workingBuffer[2] = 0;
1253     test->setTo(TRUE, workingBuffer, 2);
1254     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256     }
1257 
1258     UnicodeString *c=test->clone();
1259 
1260     workingBuffer[1] = 0x109;
1261     if(test->charAt(1) != 0x109) {
1262         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263     }
1264 
1265     if(c->length() != 2 || c->charAt(1) != 0x125) {
1266         errln("clone(alias) did not copy the buffer");
1267     }
1268     delete c;
1269 
1270     test->setTo(TRUE, workingBuffer, -1);
1271     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273     }
1274 
1275     test->setTo(FALSE, workingBuffer, -1);
1276     if(!test->isBogus()) {
1277         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278     }
1279 
1280     delete test;
1281 
1282     test=new UnicodeString();
1283     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284     test->setTo(buffer, 4, 10);
1285     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288     }
1289     delete test;
1290 
1291 
1292     // test the UChar32 constructor
1293     UnicodeString c32Test((UChar32)0x10ff2a);
1294     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296     ) {
1297         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298     }
1299 
1300     // test the (new) capacity constructor
1301     UnicodeString capTest(5, (UChar32)0x2a, 5);
1302     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303         capTest.char32At(0) != 0x2a ||
1304         capTest.char32At(4) != 0x2a
1305     ) {
1306         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307     }
1308 
1309     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311         capTest.char32At(0) != 0x10ff2a ||
1312         capTest.char32At(4) != 0x10ff2a
1313     ) {
1314         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315     }
1316 
1317     capTest = UnicodeString(5, (UChar32)0, 0);
1318     if(capTest.length() != 0) {
1319         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320     }
1321 }
1322 
1323 /**
1324  * Test the unescape() function.
1325  */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328     UnicodeString OUT("abc");
1329     OUT.append((UChar)0x4567);
1330     OUT.append(" ");
1331     OUT.append((UChar)0xA);
1332     OUT.append((UChar)0xD);
1333     OUT.append(" ");
1334     OUT.append((UChar32)0x00101234);
1335     OUT.append("xyz");
1336     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337     UnicodeString result = IN.unescape();
1338     if (result != OUT) {
1339         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340               prettify(result) + ", expected " +
1341               prettify(OUT));
1342     }
1343 
1344     // test that an empty string is returned in case of an error
1345     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347     }
1348 
1349     // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1350     UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1351     assertEquals("ASCII Euro", u"€", euro);
1352     UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1353     assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1354 }
1355 
1356 /* test code point counting functions --------------------------------------- */
1357 
1358 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1359 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1360 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1361     int32_t count=s.countChar32(start, length);
1362     return count>number;
1363 }
1364 
1365 /* compare the real function against the reference */
1366 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1367 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1368     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1369         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1370                 start, length, number, s.hasMoreChar32Than(start, length, number));
1371     }
1372 }
1373 
1374 void
TestCountChar32(void)1375 UnicodeStringTest::TestCountChar32(void) {
1376     {
1377         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1378 
1379         // test countChar32()
1380         // note that this also calls and tests u_countChar32(length>=0)
1381         if(
1382             s.countChar32()!=4 ||
1383             s.countChar32(1)!=4 ||
1384             s.countChar32(2)!=3 ||
1385             s.countChar32(2, 3)!=2 ||
1386             s.countChar32(2, 0)!=0
1387         ) {
1388             errln("UnicodeString::countChar32() failed");
1389         }
1390 
1391         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1392         const UChar *buffer=s.getTerminatedBuffer();
1393         if(
1394             u_countChar32(buffer, -1)!=4 ||
1395             u_countChar32(buffer+1, -1)!=4 ||
1396             u_countChar32(buffer+2, -1)!=3 ||
1397             u_countChar32(buffer+3, -1)!=3 ||
1398             u_countChar32(buffer+4, -1)!=2 ||
1399             u_countChar32(buffer+5, -1)!=1 ||
1400             u_countChar32(buffer+6, -1)!=0
1401         ) {
1402             errln("u_countChar32(length=-1) failed");
1403         }
1404 
1405         // test u_countChar32() with bad input
1406         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1407             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1408         }
1409     }
1410 
1411     /* test data and variables for hasMoreChar32Than() */
1412     static const UChar str[]={
1413         0x61, 0x62, 0xd800, 0xdc00,
1414         0xd801, 0xdc01, 0x63, 0xd802,
1415         0x64, 0xdc03, 0x65, 0x66,
1416         0xd804, 0xdc04, 0xd805, 0xdc05,
1417         0x67
1418     };
1419     UnicodeString string(str, UPRV_LENGTHOF(str));
1420     int32_t start, length, number;
1421 
1422     /* test hasMoreChar32Than() */
1423     for(length=string.length(); length>=0; --length) {
1424         for(start=0; start<=length; ++start) {
1425             for(number=-1; number<=((length-start)+2); ++number) {
1426                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1427             }
1428         }
1429     }
1430 
1431     /* test hasMoreChar32Than() with pinning */
1432     for(start=-1; start<=string.length()+1; ++start) {
1433         for(number=-1; number<=((string.length()-start)+2); ++number) {
1434             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1435         }
1436     }
1437 
1438     /* test hasMoreChar32Than() with a bogus string */
1439     string.setToBogus();
1440     for(length=-1; length<=1; ++length) {
1441         for(start=-1; start<=length; ++start) {
1442             for(number=-1; number<=((length-start)+2); ++number) {
1443                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1444             }
1445         }
1446     }
1447 }
1448 
1449 void
TestBogus()1450 UnicodeStringTest::TestBogus() {
1451     UnicodeString   test1("This is a test");
1452     UnicodeString   test2("This is a test");
1453     UnicodeString   test3("Me too!");
1454 
1455     // test isBogus() and setToBogus()
1456     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1457         errln("A string returned TRUE for isBogus()!");
1458     }
1459 
1460     // NULL pointers are treated like empty strings
1461     // use other illegal arguments to make a bogus string
1462     test3.setTo(FALSE, test1.getBuffer(), -2);
1463     if(!test3.isBogus()) {
1464         errln("A bogus string returned FALSE for isBogus()!");
1465     }
1466     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1467         errln("hashCode() failed");
1468     }
1469     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1470         errln("bogus.getBuffer()!=0");
1471     }
1472     if (test1.indexOf(test3) != -1) {
1473         errln("bogus.indexOf() != -1");
1474     }
1475     if (test1.lastIndexOf(test3) != -1) {
1476         errln("bogus.lastIndexOf() != -1");
1477     }
1478     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1479         errln("caseCompare() doesn't work with bogus strings");
1480     }
1481     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1482         errln("compareCodePointOrder() doesn't work with bogus strings");
1483     }
1484 
1485     // verify that non-assignment modifications fail and do not revive a bogus string
1486     test3.setToBogus();
1487     test3.append((UChar)0x61);
1488     if(!test3.isBogus() || test3.getBuffer()!=0) {
1489         errln("bogus.append('a') worked but must not");
1490     }
1491 
1492     test3.setToBogus();
1493     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1494     if(!test3.isBogus() || test3.getBuffer()!=0) {
1495         errln("bogus.findAndReplace() worked but must not");
1496     }
1497 
1498     test3.setToBogus();
1499     test3.trim();
1500     if(!test3.isBogus() || test3.getBuffer()!=0) {
1501         errln("bogus.trim() revived bogus but must not");
1502     }
1503 
1504     test3.setToBogus();
1505     test3.remove(1);
1506     if(!test3.isBogus() || test3.getBuffer()!=0) {
1507         errln("bogus.remove(1) revived bogus but must not");
1508     }
1509 
1510     test3.setToBogus();
1511     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1512         errln("bogus.setCharAt(0, 'b') worked but must not");
1513     }
1514 
1515     test3.setToBogus();
1516     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1517         errln("bogus.truncate(1) revived bogus but must not");
1518     }
1519 
1520     // verify that assignments revive a bogus string
1521     test3.setToBogus();
1522     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1523         errln("bogus.operator=() failed");
1524     }
1525 
1526     test3.setToBogus();
1527     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1528         errln("bogus.fastCopyFrom() failed");
1529     }
1530 
1531     test3.setToBogus();
1532     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1533         errln("bogus.setTo(UniStr) failed");
1534     }
1535 
1536     test3.setToBogus();
1537     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1538         errln("bogus.setTo(UniStr, 0) failed");
1539     }
1540 
1541     test3.setToBogus();
1542     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1543         errln("bogus.setTo(UniStr, 0, len) failed");
1544     }
1545 
1546     test3.setToBogus();
1547     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1548         errln("bogus.setTo(const UChar *, len) failed");
1549     }
1550 
1551     test3.setToBogus();
1552     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1553         errln("bogus.setTo(UChar) failed");
1554     }
1555 
1556     test3.setToBogus();
1557     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1558         errln("bogus.setTo(UChar32) failed");
1559     }
1560 
1561     test3.setToBogus();
1562     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1563         errln("bogus.setTo(readonly alias) failed");
1564     }
1565 
1566     // writable alias to another string's buffer: very bad idea, just convenient for this test
1567     test3.setToBogus();
1568     if(!test3.isBogus() ||
1569             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1570                         test1.length(), test1.getCapacity()).isBogus() ||
1571             test3!=test1) {
1572         errln("bogus.setTo(writable alias) failed");
1573     }
1574 
1575     // verify simple, documented ways to turn a bogus string into an empty one
1576     test3.setToBogus();
1577     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1578         errln("bogus.operator=(UnicodeString()) failed");
1579     }
1580 
1581     test3.setToBogus();
1582     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1583         errln("bogus.setTo(UnicodeString()) failed");
1584     }
1585 
1586     test3.setToBogus();
1587     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1588         errln("bogus.remove() failed");
1589     }
1590 
1591     test3.setToBogus();
1592     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1593         errln("bogus.remove(0, INT32_MAX) failed");
1594     }
1595 
1596     test3.setToBogus();
1597     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1598         errln("bogus.truncate(0) failed");
1599     }
1600 
1601     test3.setToBogus();
1602     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1603         errln("bogus.setTo((UChar32)-1) failed");
1604     }
1605 
1606     static const UChar nul=0;
1607 
1608     test3.setToBogus();
1609     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1610         errln("bogus.setTo(&nul, 0) failed");
1611     }
1612 
1613     test3.setToBogus();
1614     if(!test3.isBogus() || test3.getBuffer()!=0) {
1615         errln("setToBogus() failed to make a string bogus");
1616     }
1617 
1618     test3.setToBogus();
1619     if(test1.isBogus() || !(test1=test3).isBogus()) {
1620         errln("normal=bogus failed to make the left string bogus");
1621     }
1622 
1623     // test that NULL primitive input string values are treated like
1624     // empty strings, not errors (bogus)
1625     test2.setTo((UChar32)0x10005);
1626     if(test2.insert(1, nullptr, 1).length()!=2) {
1627         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1628     }
1629 
1630     UErrorCode errorCode=U_ZERO_ERROR;
1631     UnicodeString
1632         test4((const UChar *)NULL),
1633         test5(TRUE, (const UChar *)NULL, 1),
1634         test6((UChar *)NULL, 5, 5),
1635         test7((const char *)NULL, 3, NULL, errorCode);
1636     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1637         errln("a constructor set to bogus for a NULL input string, should be empty");
1638     }
1639 
1640     test4.setTo(NULL, 3);
1641     test5.setTo(TRUE, (const UChar *)NULL, 1);
1642     test6.setTo((UChar *)NULL, 5, 5);
1643     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1644         errln("a setTo() set to bogus for a NULL input string, should be empty");
1645     }
1646 
1647     // test that bogus==bogus<any
1648     if(test1!=test3 || test1.compare(test3)!=0) {
1649         errln("bogus==bogus failed");
1650     }
1651 
1652     test2.remove();
1653     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1654         errln("bogus<empty failed");
1655     }
1656 }
1657 
1658 // StringEnumeration ------------------------------------------------------- ***
1659 // most of StringEnumeration is tested elsewhere
1660 // this test improves code coverage
1661 
1662 static const char *const
1663 testEnumStrings[]={
1664     "a",
1665     "b",
1666     "c",
1667     "this is a long string which helps us test some buffer limits",
1668     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1669 };
1670 
1671 class TestEnumeration : public StringEnumeration {
1672 public:
TestEnumeration()1673     TestEnumeration() : i(0) {}
1674 
count(UErrorCode &) const1675     virtual int32_t count(UErrorCode& /*status*/) const override {
1676         return UPRV_LENGTHOF(testEnumStrings);
1677     }
1678 
snext(UErrorCode & status)1679     virtual const UnicodeString *snext(UErrorCode &status) override {
1680         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1681             unistr=UnicodeString(testEnumStrings[i++], "");
1682             return &unistr;
1683         }
1684 
1685         return NULL;
1686     }
1687 
reset(UErrorCode &)1688     virtual void reset(UErrorCode& /*status*/) override {
1689         i=0;
1690     }
1691 
getStaticClassID()1692     static inline UClassID getStaticClassID() {
1693         return (UClassID)&fgClassID;
1694     }
getDynamicClassID() const1695     virtual UClassID getDynamicClassID() const override {
1696         return getStaticClassID();
1697     }
1698 
1699 private:
1700     static const char fgClassID;
1701 
1702     int32_t i;
1703 };
1704 
1705 const char TestEnumeration::fgClassID=0;
1706 
1707 void
TestStringEnumeration()1708 UnicodeStringTest::TestStringEnumeration() {
1709     UnicodeString s;
1710     TestEnumeration ten;
1711     int32_t i, length;
1712     UErrorCode status;
1713 
1714     const UChar *pu;
1715     const char *pc;
1716 
1717     // test the next() default implementation and ensureCharsCapacity()
1718     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1719         status=U_ZERO_ERROR;
1720         pc=ten.next(&length, status);
1721         s=UnicodeString(testEnumStrings[i], "");
1722         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1723             errln("StringEnumeration.next(%d) failed", i);
1724         }
1725     }
1726     status=U_ZERO_ERROR;
1727     if(ten.next(&length, status)!=NULL) {
1728         errln("StringEnumeration.next(done)!=NULL");
1729     }
1730 
1731     // test the unext() default implementation
1732     ten.reset(status);
1733     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1734         status=U_ZERO_ERROR;
1735         pu=ten.unext(&length, status);
1736         s=UnicodeString(testEnumStrings[i], "");
1737         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1738             errln("StringEnumeration.unext(%d) failed", i);
1739         }
1740     }
1741     status=U_ZERO_ERROR;
1742     if(ten.unext(&length, status)!=NULL) {
1743         errln("StringEnumeration.unext(done)!=NULL");
1744     }
1745 
1746     // test that the default clone() implementation works, and returns NULL
1747     if(ten.clone()!=NULL) {
1748         errln("StringEnumeration.clone()!=NULL");
1749     }
1750 
1751     // test that uenum_openFromStringEnumeration() works
1752     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1753     StringEnumeration *newTen = new TestEnumeration;
1754     status=U_ZERO_ERROR;
1755     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1756     if (uten==NULL || U_FAILURE(status)) {
1757         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1758         return;
1759     }
1760 
1761     // test  uenum_next()
1762     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1763         status=U_ZERO_ERROR;
1764         pc=uenum_next(uten, &length, &status);
1765         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1766             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1767         }
1768     }
1769     status=U_ZERO_ERROR;
1770     if(uenum_next(uten, &length, &status)!=NULL) {
1771         errln("File %s, line %d, uenum_next(done)!=NULL");
1772     }
1773 
1774     // test the uenum_unext()
1775     uenum_reset(uten, &status);
1776     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1777         status=U_ZERO_ERROR;
1778         pu=uenum_unext(uten, &length, &status);
1779         s=UnicodeString(testEnumStrings[i], "");
1780         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1781             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1782         }
1783     }
1784     status=U_ZERO_ERROR;
1785     if(uenum_unext(uten, &length, &status)!=NULL) {
1786         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1787     }
1788 
1789     uenum_close(uten);
1790 }
1791 
1792 /*
1793  * Namespace test, to make sure that macros like UNICODE_STRING include the
1794  * namespace qualifier.
1795  *
1796  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1797  */
1798 namespace bogus {
1799     class UnicodeString {
1800     public:
1801         enum EInvariant { kInvariant };
UnicodeString()1802         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1803         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1804         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1805 ) : i(length) {}
1806     private:
1807         int32_t i;
1808     };
1809 }
1810 
1811 void
TestNameSpace()1812 UnicodeStringTest::TestNameSpace() {
1813     // Provoke name collision unless the UnicodeString macros properly
1814     // qualify the icu::UnicodeString class.
1815     using namespace bogus;
1816 
1817     // Use all UnicodeString macros from unistr.h.
1818     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1819     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1820     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1821 
1822     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1823     icu::UnicodeString s4=s1+s2+s3;
1824     if(s4.length()!=9) {
1825         errln("Something wrong with UnicodeString::operator+().");
1826     }
1827 }
1828 
1829 void
TestUTF32()1830 UnicodeStringTest::TestUTF32() {
1831     // Input string length US_STACKBUF_SIZE to cause overflow of the
1832     // initially chosen fStackBuffer due to supplementary characters.
1833     static const UChar32 utf32[] = {
1834         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1835         0x10000, 0x20000, 0xe0000, 0x10ffff
1836     };
1837     static const UChar expected_utf16[] = {
1838         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1839         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1840     };
1841     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1842     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1843     if(from32 != expected) {
1844         errln("UnicodeString::fromUTF32() did not create the expected string.");
1845     }
1846 
1847     static const UChar utf16[] = {
1848         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1849     };
1850     static const UChar32 expected_utf32[] = {
1851         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1852     };
1853     UChar32 result32[16];
1854     UErrorCode errorCode = U_ZERO_ERROR;
1855     int32_t length32 =
1856         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1857         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1858     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1859         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1860         result32[length32] != 0
1861     ) {
1862         errln("UnicodeString::toUTF32() did not create the expected string.");
1863     }
1864 }
1865 
1866 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1867 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1868     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1869             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1870     virtual void Flush() override { calledFlush = TRUE; }
1871     UBool calledFlush;
1872 };
1873 
1874 void
TestUTF8()1875 UnicodeStringTest::TestUTF8() {
1876     static const uint8_t utf8[] = {
1877         // Code points:
1878         // 0x41, 0xd900,
1879         // 0x61, 0xdc00,
1880         // 0x110000, 0x5a,
1881         // 0x50000, 0x7a,
1882         // 0x10000, 0x20000,
1883         // 0xe0000, 0x10ffff
1884         0x41, 0xed, 0xa4, 0x80,
1885         0x61, 0xed, 0xb0, 0x80,
1886         0xf4, 0x90, 0x80, 0x80, 0x5a,
1887         0xf1, 0x90, 0x80, 0x80, 0x7a,
1888         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1889         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1890     };
1891     static const UChar expected_utf16[] = {
1892         0x41, 0xfffd, 0xfffd, 0xfffd,
1893         0x61, 0xfffd, 0xfffd, 0xfffd,
1894         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1895         0xd900, 0xdc00, 0x7a,
1896         0xd800, 0xdc00, 0xd840, 0xdc00,
1897         0xdb40, 0xdc00, 0xdbff, 0xdfff
1898     };
1899     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1900     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1901 
1902     if(from8 != expected) {
1903         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1904     }
1905     std::string utf8_string((const char *)utf8, sizeof(utf8));
1906     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1907     if(from8b != expected) {
1908         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1909     }
1910 
1911     static const UChar utf16[] = {
1912         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1913     };
1914     static const uint8_t expected_utf8[] = {
1915         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1916         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1917     };
1918     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1919 
1920     char buffer[64];
1921     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1922     us.toUTF8(sink);
1923     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1924         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1925     ) {
1926         errln("UnicodeString::toUTF8() did not create the expected string.");
1927     }
1928     if(!sink.calledFlush) {
1929         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1930     }
1931     // Initial contents for testing that toUTF8String() appends.
1932     std::string result8 = "-->";
1933     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1934     // Use the return value just for testing.
1935     std::string &result8r = us.toUTF8String(result8);
1936     if(result8r != expected8 || &result8r != &result8) {
1937         errln("UnicodeString::toUTF8String() did not create the expected string.");
1938     }
1939 }
1940 
1941 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1942 static UnicodeString wrapUChars(const UChar *uchars) {
1943     return UnicodeString(TRUE, uchars, -1);
1944 }
1945 
1946 void
TestReadOnlyAlias()1947 UnicodeStringTest::TestReadOnlyAlias() {
1948     UChar uchars[]={ 0x61, 0x62, 0 };
1949     UnicodeString alias(TRUE, uchars, 2);
1950     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1951         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1952         return;
1953     }
1954     alias.truncate(1);
1955     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1956         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1957     }
1958     if(alias.getTerminatedBuffer()==uchars) {
1959         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1960               "did not allocate and copy as expected.");
1961     }
1962     if(uchars[1]!=0x62) {
1963         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1964               "modified the original buffer.");
1965     }
1966     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1967         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1968               "does not return a buffer terminated at the proper length.");
1969     }
1970 
1971     alias.setTo(TRUE, uchars, 2);
1972     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1973         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1974         return;
1975     }
1976     alias.remove();
1977     if(alias.length()!=0) {
1978         errln("UnicodeString(read-only-alias).remove() did not work.");
1979     }
1980     if(alias.getTerminatedBuffer()==uchars) {
1981         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1982               "did not un-alias as expected.");
1983     }
1984     if(uchars[0]!=0x61) {
1985         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1986               "modified the original buffer.");
1987     }
1988     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1989         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1990               "does not return a buffer terminated at length 0.");
1991     }
1992 
1993     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1994     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1995     alias.remove(0, 10);
1996     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1997         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1998     }
1999     alias.setTo(FALSE, longString.getBuffer(), longString.length());
2000     alias.remove(27, 99);
2001     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2002         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2003     }
2004     alias.setTo(FALSE, longString.getBuffer(), longString.length());
2005     alias.retainBetween(6, 30);
2006     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2007         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2008     }
2009 
2010     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2011     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2012 
2013     UnicodeString temp;
2014     temp.fastCopyFrom(longString.tempSubString());
2015     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2016         errln("UnicodeString.tempSubString() failed");
2017     }
2018     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2019     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2020         errln("UnicodeString.tempSubString(-3, 5) failed");
2021     }
2022     temp.fastCopyFrom(longString.tempSubString(17));
2023     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2024         errln("UnicodeString.tempSubString(17) failed");
2025     }
2026     temp.fastCopyFrom(longString.tempSubString(99));
2027     if(!temp.isEmpty()) {
2028         errln("UnicodeString.tempSubString(99) failed");
2029     }
2030     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2031     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2032         errln("UnicodeString.tempSubStringBetween(6) failed");
2033     }
2034     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2035     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2036         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2037     }
2038     UnicodeString bogusString;
2039     bogusString.setToBogus();
2040     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2041     if(!temp.isBogus()) {
2042         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2043     }
2044 }
2045 
2046 void
doTestAppendable(UnicodeString & dest,Appendable & app)2047 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2048     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2049     static const UChar fg[3]={ 0x66, 0x67, 0 };
2050     if(!app.reserveAppendCapacity(12)) {
2051         errln("Appendable.reserve(12) failed");
2052     }
2053     app.appendCodeUnit(0x61);
2054     app.appendCodePoint(0x62);
2055     app.appendCodePoint(0x50000);
2056     app.appendString(cde, 3);
2057     app.appendString(fg, -1);
2058     UChar scratch[3];
2059     int32_t capacity=-1;
2060     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2061     if(capacity<3) {
2062         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2063         return;
2064     }
2065     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2066     u_memcpy(buffer, hij, 3);
2067     app.appendString(buffer, 3);
2068     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2069         errln("Appendable.append(...) failed");
2070     }
2071     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2072     if(buffer!=NULL || capacity!=0) {
2073         errln("Appendable.getAppendBuffer(min=0) failed");
2074     }
2075     capacity=1;
2076     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2077     if(buffer!=NULL || capacity!=0) {
2078         errln("Appendable.getAppendBuffer(scratch<min) failed");
2079     }
2080 }
2081 
2082 class SimpleAppendable : public Appendable {
2083 public:
SimpleAppendable(UnicodeString & dest)2084     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2085     virtual UBool appendCodeUnit(UChar c) override { str.append(c); return TRUE; }
reset()2086     SimpleAppendable &reset() { str.remove(); return *this; }
2087 private:
2088     UnicodeString &str;
2089 };
2090 
2091 void
TestAppendable()2092 UnicodeStringTest::TestAppendable() {
2093     UnicodeString dest;
2094     SimpleAppendable app(dest);
2095     doTestAppendable(dest, app);
2096 }
2097 
2098 void
TestUnicodeStringImplementsAppendable()2099 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2100     UnicodeString dest;
2101     UnicodeStringAppendable app(dest);
2102     doTestAppendable(dest, app);
2103 }
2104 
2105 void
TestSizeofUnicodeString()2106 UnicodeStringTest::TestSizeofUnicodeString() {
2107     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2108     // See the API comments for UNISTR_OBJECT_SIZE.
2109     size_t sizeofUniStr=sizeof(UnicodeString);
2110     size_t expected=UNISTR_OBJECT_SIZE;
2111     if(expected!=sizeofUniStr) {
2112         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2113         // of the compiler might add more internal padding than expected.
2114         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2115               (int)sizeofUniStr, (int)expected);
2116     }
2117     if(sizeofUniStr<32) {
2118         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2119     }
2120     // We assume that the entire UnicodeString object,
2121     // minus the vtable pointer and 2 bytes for flags and short length,
2122     // is available for internal storage of UChars.
2123     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2124     UnicodeString s;
2125     const UChar *emptyBuffer=s.getBuffer();
2126     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2127         s.append((UChar)0x2e);
2128     }
2129     const UChar *fullBuffer=s.getBuffer();
2130     if(fullBuffer!=emptyBuffer) {
2131         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2132               expectedStackBufferLength);
2133     }
2134     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2135     if(terminatedBuffer==emptyBuffer) {
2136         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2137               expectedStackBufferLength);
2138     }
2139 }
2140 
2141 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2142 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2143     dest = std::move(src);
2144 }
2145 
2146 void
TestMoveSwap()2147 UnicodeStringTest::TestMoveSwap() {
2148     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2149     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2150     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2151     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2152     const UChar *p = s2.getBuffer();
2153     s1.swap(s2);
2154     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2155         errln("UnicodeString.swap() did not swap");
2156     }
2157     swap(s2, s3);
2158     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2159         errln("swap(UnicodeString) did not swap back");
2160     }
2161     UnicodeString s4;
2162     s4 = std::move(s1);
2163     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2164         errln("UnicodeString = std::move(heap) did not move");
2165     }
2166     UnicodeString s5;
2167     s5 = std::move(s2);
2168     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2169         errln("UnicodeString = std::move(stack) did not move");
2170     }
2171     UnicodeString s6;
2172     s6 = std::move(s3);
2173     if(s6.getBuffer() != abc || s6.length() != 3) {
2174         errln("UnicodeString = std::move(alias) did not move");
2175     }
2176     infoln("TestMoveSwap() with rvalue references");
2177     s1 = static_cast<UnicodeString &&>(s6);
2178     if(s1.getBuffer() != abc || s1.length() != 3) {
2179         errln("UnicodeString move assignment operator did not move");
2180     }
2181     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2182     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2183         errln("UnicodeString move constructor did not move");
2184     }
2185 
2186     // Move self assignment leaves the object valid but in an undefined state.
2187     // Do it to make sure there is no crash,
2188     // but do not check for any particular resulting value.
2189     moveFrom(s1, s1);
2190     moveFrom(s2, s2);
2191     moveFrom(s3, s3);
2192     moveFrom(s4, s4);
2193     moveFrom(s5, s5);
2194     moveFrom(s6, s6);
2195     moveFrom(s7, s7);
2196     // Simple copy assignment must work.
2197     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2198     s1 = s6 = s4 = s7 = simple;
2199     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2200         errln("UnicodeString copy after self-move did not work");
2201     }
2202 }
2203 
2204 void
TestUInt16Pointers()2205 UnicodeStringTest::TestUInt16Pointers() {
2206     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2207     uint16_t arr[4];
2208 
2209     UnicodeString expected(u"abc");
2210     assertEquals("abc from pointer", expected, UnicodeString(carr));
2211     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2212     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2213 
2214     UnicodeString alias(arr, 0, 4);
2215     alias.append(u'a').append(u'b').append(u'c');
2216     assertEquals("abc from writable alias", expected, alias);
2217     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2218 
2219     UErrorCode errorCode = U_ZERO_ERROR;
2220     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2221     assertSuccess(WHERE, errorCode);
2222     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2223 }
2224 
2225 void
TestWCharPointers()2226 UnicodeStringTest::TestWCharPointers() {
2227 #if U_SIZEOF_WCHAR_T==2
2228     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2229     wchar_t arr[4];
2230 
2231     UnicodeString expected(u"abc");
2232     assertEquals("abc from pointer", expected, UnicodeString(carr));
2233     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2234     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2235 
2236     UnicodeString alias(arr, 0, 4);
2237     alias.append(u'a').append(u'b').append(u'c');
2238     assertEquals("abc from writable alias", expected, alias);
2239     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2240 
2241     UErrorCode errorCode = U_ZERO_ERROR;
2242     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2243     assertSuccess(WHERE, errorCode);
2244     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2245 #endif
2246 }
2247 
2248 void
TestNullPointers()2249 UnicodeStringTest::TestNullPointers() {
2250     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2251     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2252     assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2253 
2254     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2255     assertTrue("empty from writable alias", alias.isEmpty());
2256     alias.append(u'a').append(u'b').append(u'c');
2257     UnicodeString expected(u"abc");
2258     assertEquals("abc from writable alias", expected, alias);
2259 
2260     UErrorCode errorCode = U_ZERO_ERROR;
2261     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2262     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2263 }
2264 
TestUnicodeStringInsertAppendToSelf()2265 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2266     IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2267 
2268     // Test append operation
2269     UnicodeString str(u"foo ");
2270     str.append(str);
2271     str.append(str);
2272     str.append(str);
2273     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2274 
2275     // Test append operation with readonly alias to start
2276     str = UnicodeString(TRUE, u"foo ", 4);
2277     str.append(str);
2278     str.append(str);
2279     str.append(str);
2280     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2281 
2282     // Test append operation with aliased substring
2283     str = u"abcde";
2284     UnicodeString sub = str.tempSubString(1, 2);
2285     str.append(sub);
2286     assertEquals("", u"abcdebc", str);
2287 
2288     // Test append operation with double-aliased substring
2289     str = UnicodeString(TRUE, u"abcde", 5);
2290     sub = str.tempSubString(1, 2);
2291     str.append(sub);
2292     assertEquals("", u"abcdebc", str);
2293 
2294     // Test insert operation
2295     str = u"a-*b";
2296     str.insert(2, str);
2297     str.insert(4, str);
2298     str.insert(8, str);
2299     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2300 
2301     // Test insert operation with readonly alias to start
2302     str = UnicodeString(TRUE, u"a-*b", 4);
2303     str.insert(2, str);
2304     str.insert(4, str);
2305     str.insert(8, str);
2306     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2307 
2308     // Test insert operation with aliased substring
2309     str = u"abcde";
2310     sub = str.tempSubString(1, 3);
2311     str.insert(2, sub);
2312     assertEquals("", u"abbcdcde", str);
2313 
2314     // Test insert operation with double-aliased substring
2315     str = UnicodeString(TRUE, u"abcde", 5);
2316     sub = str.tempSubString(1, 3);
2317     str.insert(2, sub);
2318     assertEquals("", u"abbcdcde", str);
2319 }
2320 
TestLargeAppend()2321 void UnicodeStringTest::TestLargeAppend() {
2322     if(quick) return;
2323 
2324     IcuTestErrorCode status(*this, "TestLargeAppend");
2325     // Make a large UnicodeString
2326     int32_t len = 0xAFFFFFF;
2327     UnicodeString str;
2328     char16_t *buf = str.getBuffer(len);
2329     // A fast way to set buffer to valid Unicode.
2330     // 4E4E is a valid unicode character
2331     uprv_memset(buf, 0x4e, len * 2);
2332     str.releaseBuffer(len);
2333     UnicodeString dest;
2334     // Append it 16 times
2335     // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2336     // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2337     int64_t total = 0;
2338     for (int32_t i = 0; i < 16; i++) {
2339         dest.append(str);
2340         total += len;
2341         if (total <= INT32_MAX) {
2342             assertFalse("dest is not bogus", dest.isBogus());
2343         } else {
2344             assertTrue("dest should be bogus", dest.isBogus());
2345         }
2346     }
2347     dest.remove();
2348     total = 0;
2349     for (int32_t i = 0; i < 16; i++) {
2350         dest.append(str);
2351         total += len;
2352         if (total + len <= INT32_MAX) {
2353             assertFalse("dest is not bogus", dest.isBogus());
2354         } else if (total <= INT32_MAX) {
2355             // Check that a string of exactly the maximum size works
2356             UnicodeString str2;
2357             int32_t remain = INT32_MAX - total;
2358             char16_t *buf2 = str2.getBuffer(remain);
2359             if (buf2 == nullptr) {
2360                 // if somehow memory allocation fail, return the test
2361                 return;
2362             }
2363             uprv_memset(buf2, 0x4e, remain * 2);
2364             str2.releaseBuffer(remain);
2365             dest.append(str2);
2366             total += remain;
2367             assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2368             assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2369             assertFalse("dest is not bogus", dest.isBogus());
2370 
2371             // Check that a string size+1 goes bogus
2372             str2.truncate(1);
2373             dest.append(str2);
2374             total++;
2375             assertTrue("dest should be bogus", dest.isBogus());
2376         } else {
2377             assertTrue("dest should be bogus", dest.isBogus());
2378         }
2379     }
2380 }
2381