• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include <utility>
10 
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24 
25 #if 0
26 #include "unicode/ustream.h"
27 
28 #include <iostream>
29 using namespace std;
30 
31 #endif
32 
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34 
35 extern IntlTest *createStringCaseTest();
36 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39     if (exec) logln("TestSuite UnicodeStringTest: ");
40     TESTCASE_AUTO_BEGIN;
41     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42     TESTCASE_AUTO(TestBasicManipulation);
43     TESTCASE_AUTO(TestCompare);
44     TESTCASE_AUTO(TestExtract);
45     TESTCASE_AUTO(TestRemoveReplace);
46     TESTCASE_AUTO(TestSearching);
47     TESTCASE_AUTO(TestSpacePadding);
48     TESTCASE_AUTO(TestPrefixAndSuffix);
49     TESTCASE_AUTO(TestFindAndReplace);
50     TESTCASE_AUTO(TestBogus);
51     TESTCASE_AUTO(TestReverse);
52     TESTCASE_AUTO(TestMiscellaneous);
53     TESTCASE_AUTO(TestStackAllocation);
54     TESTCASE_AUTO(TestUnescape);
55     TESTCASE_AUTO(TestCountChar32);
56     TESTCASE_AUTO(TestStringEnumeration);
57     TESTCASE_AUTO(TestNameSpace);
58     TESTCASE_AUTO(TestUTF32);
59     TESTCASE_AUTO(TestUTF8);
60     TESTCASE_AUTO(TestReadOnlyAlias);
61     TESTCASE_AUTO(TestAppendable);
62     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63     TESTCASE_AUTO(TestSizeofUnicodeString);
64     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65     TESTCASE_AUTO(TestMoveSwap);
66     TESTCASE_AUTO(TestUInt16Pointers);
67     TESTCASE_AUTO(TestWCharPointers);
68     TESTCASE_AUTO(TestNullPointers);
69     TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70     TESTCASE_AUTO(TestLargeAppend);
71     TESTCASE_AUTO_END;
72 }
73 
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78     UnicodeString   expectedValue;
79     UnicodeString   *c;
80 
81     c=test1.clone();
82     test1.insert(24, "good ");
83     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84     if (test1 != expectedValue)
85         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86 
87     c->insert(24, "good ");
88     if(*c != expectedValue) {
89         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90     }
91     delete c;
92 
93     test1.remove(41, 8);
94     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95     if (test1 != expectedValue)
96         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97 
98     test1.replace(58, 6, "ir country");
99     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100     if (test1 != expectedValue)
101         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102 
103     UChar     temp[80];
104     test1.extract(0, 15, temp);
105 
106     UnicodeString       test2(temp, 15);
107 
108     expectedValue = "Now is the time";
109     if (test2 != expectedValue)
110         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111 
112     test2 += " for me to go!\n";
113     expectedValue = "Now is the time for me to go!\n";
114     if (test2 != expectedValue)
115         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116 
117     if (test1.length() != 70)
118         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119     if (test2.length() != 30)
120         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121 
122     UnicodeString test3;
123     test3.append((UChar32)0x20402);
124     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126     }
127     if(test3.length() != 2){
128         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129     }
130     test3.append((UChar32)0x0074);
131     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133     }
134     if(test3.length() != 3){
135         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136     }
137 
138     // test some UChar32 overloads
139     if( test3.setTo((UChar32)0x10330).length() != 2 ||
140         test3.insert(0, (UChar32)0x20100).length() != 4 ||
141         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142         (test3 = (UChar32)0x14001).length() != 2
143     ) {
144         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145     }
146 
147     {
148         // test moveIndex32()
149         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150 
151         if(
152             s.moveIndex32(2, -1)!=0 ||
153             s.moveIndex32(2, 1)!=4 ||
154             s.moveIndex32(2, 2)!=5 ||
155             s.moveIndex32(5, -2)!=2 ||
156             s.moveIndex32(0, -1)!=0 ||
157             s.moveIndex32(6, 1)!=6
158         ) {
159             errln("UnicodeString::moveIndex32() failed");
160         }
161 
162         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163             errln("UnicodeString::getChar32Start() failed");
164         }
165 
166         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167             errln("UnicodeString::getChar32Limit() failed");
168         }
169     }
170 
171     {
172         // test new 2.2 constructors and setTo function that parallel Java's substring function.
173         UnicodeString src("Hello folks how are you?");
174         UnicodeString target1("how are you?");
175         if (target1 != UnicodeString(src, 12)) {
176             errln("UnicodeString(const UnicodeString&, int32_t) failed");
177         }
178         UnicodeString target2("folks");
179         if (target2 != UnicodeString(src, 6, 5)) {
180             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181         }
182         if (target1 != target2.setTo(src, 12)) {
183             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184         }
185     }
186 
187     {
188         // op+ is new in ICU 2.8
189         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190         if(s!=UnicodeString("abcdefghi", "")) {
191             errln("operator+(UniStr, UniStr) failed");
192         }
193     }
194 
195     {
196         // tests for Jitterbug 2360
197         // verify that APIs with source pointer + length accept length == -1
198         // mostly test only where modified, only few functions did not already do this
199         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201         }
202 
203         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
204         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205 
206         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208         }
209         if(t.length()!=u_strlen(buffer)) {
210             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211         }
212 
213         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215         }
216         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218         }
219 
220         buffer[u_strlen(buffer)]=0xe4;
221         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224         }
225         if(u.length()!=UPRV_LENGTHOF(buffer)) {
226             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227         }
228 
229         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230         UConverter *cnv;
231         UErrorCode errorCode=U_ZERO_ERROR;
232 
233         cnv=ucnv_open("ISO-8859-1", &errorCode);
234         UnicodeString v(cs, -1, cnv, errorCode);
235         ucnv_close(cnv);
236         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238         }
239     }
240 
241 #if U_CHARSET_IS_UTF8
242     {
243         // Test the hardcoded-UTF-8 UnicodeString optimizations.
244         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246         UnicodeString from8a = UnicodeString((const char *)utf8);
247         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
249         if(from8a != from16 || from8b != from16) {
250             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251         }
252         char buffer[16];
253         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256         }
257         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260         }
261     }
262 #endif
263 }
264 
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268     UnicodeString   test1("this is a test");
269     UnicodeString   test2("this is a test");
270     UnicodeString   test3("this is a test of the emergency broadcast system");
271     UnicodeString   test4("never say, \"this is a test\"!!");
272 
273     UnicodeString   test5((UChar)0x5000);
274     UnicodeString   test6((UChar)0x5100);
275 
276     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278     char            chars[] = "this is a test";
279 
280     // test operator== and operator!=
281     if (test1 != test2 || test1 == test3 || test1 == test4)
282         errln("operator== or operator!= failed");
283 
284     // test operator> and operator<
285     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286         !(test5 < test6)
287     ) {
288         errln("operator> or operator< failed");
289     }
290 
291     // test operator>= and operator<=
292     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293         errln("operator>= or operator<= failed");
294 
295     // test compare(UnicodeString)
296     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297         errln("compare(UnicodeString) failed");
298 
299     //test compare(offset, length, UnicodeString)
300     if(test1.compare(0, 14, test2) != 0 ||
301         test3.compare(0, 14, test2) != 0 ||
302         test4.compare(12, 14, test2) != 0 ||
303         test3.compare(0, 18, test1) <=0  )
304         errln("compare(offset, length, UnicodeString) failes");
305 
306     // test compare(UChar*)
307     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308         errln("compare(UChar*) failed");
309 
310     // test compare(char*)
311     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312         errln("compare(char*) failed");
313 
314     // test compare(UChar*, length)
315     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316         errln("compare(UChar*, length) failed");
317 
318     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319     if (test1.compare(0, 14, test2, 0, 14) != 0
320     || test1.compare(0, 14, test3, 0, 14) != 0
321     || test1.compare(0, 14, test4, 12, 14) != 0)
322         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323 
324     if (test1.compare(10, 4, test2, 0, 4) >= 0
325     || test1.compare(10, 4, test3, 22, 9) <= 0
326     || test1.compare(10, 4, test4, 22, 4) != 0)
327         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328 
329     // test compareBetween
330     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332         errln("compareBetween failed");
333 
334     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336         errln("compareBetween failed");
337 
338     // test compare() etc. with strings that share a buffer but are not equal
339     test2=test1; // share the buffer, length() too large for the stackBuffer
340     test2.truncate(1); // change only the length, not the buffer
341     if( test1==test2 || test1<=test2 ||
342         test1.compare(test2)<=0 ||
343         test1.compareCodePointOrder(test2)<=0 ||
344         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348     ) {
349         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350     }
351 
352     /* test compareCodePointOrder() */
353     {
354         /* these strings are in ascending order */
355         static const UChar strings[][4]={
356             { 0x61, 0 },                    /* U+0061 */
357             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
358             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
359             { 0xd800, 0 },                  /* U+d800 */
360             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
361             { 0xdfff, 0 },                  /* U+dfff */
362             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
363             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
364             { 0xd800, 0xdc02, 0 },          /* U+10002 */
365             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
366         };
367         UnicodeString u[20]; // must be at least as long as strings[]
368         int32_t i;
369 
370         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371             u[i]=UnicodeString(TRUE, strings[i], -1);
372         }
373 
374         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377             }
378         }
379     }
380 
381     /* test caseCompare() */
382     {
383         static const UChar
384         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
385         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388 
389         UnicodeString
390             mixed(TRUE, _mixed, -1),
391             otherDefault(TRUE, _otherDefault, -1),
392             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
393             different(TRUE, _different, -1);
394 
395         int8_t result;
396 
397         /* test caseCompare() */
398         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401         }
402         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403         if(result!=0) {
404             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405         }
406         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409         }
410 
411         /* test caseCompare() */
412         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413         if(result<=0) {
414             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415         }
416 
417         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421         }
422 
423         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425         if(result<=0) {
426             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427         }
428     }
429 
430     // test that srcLength=-1 is handled in functions that
431     // take input const UChar */int32_t srcLength (j785)
432     {
433         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435 
436         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438         }
439 
440         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442         }
443 
444         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446         }
447 
448         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450         }
451 
452         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454         }
455 
456         UnicodeString s2, s3;
457         s2.replace(0, 0, u+1, -1);
458         s3.replace(0, 0, u, 1, -1);
459         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461         }
462     }
463 }
464 
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
469     UnicodeString  test2;
470     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472     UnicodeString  test5;
473     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474 
475     test1.extract(11, 12, test2);
476     test1.extract(11, 12, test3);
477     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479     }
480 
481     // test proper pinning in extractBetween()
482     test1.extractBetween(-3, 7, test5);
483     if(test5!=UNICODE_STRING("Now is ", 7)) {
484         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485     }
486 
487     test1.extractBetween(11, 23, test5);
488     if (test1.extract(60, 71, test6) != 9) {
489         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490     }
491     if (test1.extract(11, 12, test6) != 12) {
492         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493     }
494 
495     // convert test4 back to Unicode for comparison
496     UnicodeString test4b(test4, 12);
497 
498     if (test1.extract(11, 12, (char *)NULL) != 12) {
499         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500     }
501     if (test1.extract(11, -1, test6) != 0) {
502         errln("UnicodeString.extract(-1) failed to stop reading the string.");
503     }
504 
505     for (int32_t i = 0; i < 12; i++) {
506         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508             break;
509         }
510         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512             break;
513         }
514         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515             errln(UnicodeString("extracting into an array of char failed at position ") + i);
516             break;
517         }
518         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520             break;
521         }
522     }
523 
524     // test preflighting and overflows with invariant conversion
525     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527     }
528 
529     test4[2] = (char)0xff;
530     if (test1.extract(0, 10, test4, 2, "") != 10) {
531         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532     }
533     if (test4[2] != (char)0xff) {
534         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535     }
536 
537     {
538         // test new, NUL-terminating extract() function
539         UnicodeString s("terminate", "");
540         UChar dest[20]={
541             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543         };
544         UErrorCode errorCode;
545         int32_t length;
546 
547         errorCode=U_ZERO_ERROR;
548         length=s.extract((UChar *)NULL, 0, errorCode);
549         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551         }
552 
553         errorCode=U_ZERO_ERROR;
554         length=s.extract(dest, s.length()-1, errorCode);
555         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557                 length, u_errorName(errorCode), s.length());
558         }
559 
560         errorCode=U_ZERO_ERROR;
561         length=s.extract(dest, s.length(), errorCode);
562         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564                 length, u_errorName(errorCode), s.length());
565         }
566         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568         }
569 
570         errorCode=U_ZERO_ERROR;
571         length=s.extract(dest, s.length()+1, errorCode);
572         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574                 length, u_errorName(errorCode), s.length());
575         }
576         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578         }
579     }
580 
581     {
582         // test new UConverter extract() and constructor
583         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584         char buffer[32];
585         static const char expect[]={
586             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588             (char)0xc3, (char)0x84,
589             (char)0xe1, (char)0xbb, (char)0x90
590         };
591         UErrorCode errorCode=U_ZERO_ERROR;
592         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593         int32_t length;
594 
595         if(U_SUCCESS(errorCode)) {
596             // test preflighting
597             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598                 errorCode!=U_BUFFER_OVERFLOW_ERROR
599             ) {
600                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601                       length, u_errorName(errorCode));
602             }
603             errorCode=U_ZERO_ERROR;
604             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605                 errorCode!=U_BUFFER_OVERFLOW_ERROR
606             ) {
607                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608                       length, u_errorName(errorCode));
609             }
610 
611             // try error cases
612             errorCode=U_ZERO_ERROR;
613             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615             }
616             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619             }
620             errorCode=U_ZERO_ERROR;
621 
622             // extract for real
623             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624                 uprv_memcmp(buffer, expect, 13)!=0 ||
625                 buffer[13]!=0 ||
626                 U_FAILURE(errorCode)
627             ) {
628                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629                       length, u_errorName(errorCode));
630             }
631             // Test again with just the converter name.
632             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633                 uprv_memcmp(buffer, expect, 13)!=0 ||
634                 buffer[13]!=0 ||
635                 U_FAILURE(errorCode)
636             ) {
637                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638                       length, u_errorName(errorCode));
639             }
640 
641             // try the constructor
642             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643             if(U_FAILURE(errorCode) || s!=t) {
644                 errln("UnicodeString(UConverter) conversion failed (%s)",
645                       u_errorName(errorCode));
646             }
647 
648             ucnv_close(cnv);
649         }
650     }
651 }
652 
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656     UnicodeString   test1("The rain in Spain stays mainly on the plain");
657     UnicodeString   test2("eat SPAMburgers!");
658     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659     char            test4[] = "SPAM";
660     UnicodeString&  test5 = test1;
661 
662     test1.replace(4, 4, test2, 4, 4);
663     test1.replace(12, 5, test3, 4);
664     test3[4] = 0;
665     test1.replace(17, 4, test3);
666     test1.replace(23, 4, test4);
667     test1.replaceBetween(37, 42, test2, 4, 8);
668 
669     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670         errln("One of the replace methods failed:\n"
671               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672               "  got \"" + test1 + "\"");
673 
674     test1.remove(21, 1);
675     test1.removeBetween(26, 28);
676 
677     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678         errln("One of the remove methods failed:\n"
679               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680               "  got \"" + test1 + "\"");
681 
682     for (int32_t i = 0; i < test1.length(); i++) {
683         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684             test1.setCharAt(i, 0x78);
685         }
686     }
687 
688     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689         errln("One of the remove methods failed:\n"
690               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691               "  got \"" + test1 + "\"");
692 
693     test1.remove();
694     if (test1.length() != 0)
695         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697 
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701     UnicodeString test1("test test ttest tetest testesteststt");
702     UnicodeString test2("test");
703     UChar testChar = 0x74;
704 
705     UChar32 testChar32 = 0x20402;
706     UChar testData[]={
707         //   0       1       2       3       4       5       6       7
708         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709 
710         //   8       9      10      11      12      13      14      15
711         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712 
713         //  16      17      18      19
714         0xdc02, 0xd841, 0x0073, 0x0000
715     };
716     UnicodeString test3(testData);
717     UnicodeString test4(testChar32);
718 
719     uint16_t occurrences = 0;
720     int32_t startPos = 0;
721     for ( ;
722           startPos != -1 && startPos < test1.length();
723           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724         ;
725     if (occurrences != 6)
726         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727 
728     for ( occurrences = 0, startPos = 10;
729           startPos != -1 && startPos < test1.length();
730           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731         ;
732     if (occurrences != 4)
733         errln(UnicodeString("indexOf with starting offset failed: "
734                             "expected to find 4 occurrences, found ") + occurrences);
735 
736     int32_t endPos = 28;
737     for ( occurrences = 0, startPos = 5;
738           startPos != -1 && startPos < test1.length();
739           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740         ;
741     if (occurrences != 4)
742         errln(UnicodeString("indexOf with starting and ending offsets failed: "
743                             "expected to find 4 occurrences, found ") + occurrences);
744 
745     //using UChar32 string
746     for ( startPos=0, occurrences=0;
747           startPos != -1 && startPos < test3.length();
748           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749         ;
750     if (occurrences != 4)
751         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752 
753     for ( startPos=10, occurrences=0;
754           startPos != -1 && startPos < test3.length();
755           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756         ;
757     if (occurrences != 2)
758         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759     //---
760 
761     for ( occurrences = 0, startPos = 0;
762           startPos != -1 && startPos < test1.length();
763           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764         ;
765     if (occurrences != 16)
766         errln(UnicodeString("indexOf with character failed: "
767                             "expected to find 16 occurrences, found ") + occurrences);
768 
769     for ( occurrences = 0, startPos = 10;
770           startPos != -1 && startPos < test1.length();
771           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772         ;
773     if (occurrences != 12)
774         errln(UnicodeString("indexOf with character & start offset failed: "
775                             "expected to find 12 occurrences, found ") + occurrences);
776 
777     for ( occurrences = 0, startPos = 5, endPos = 28;
778           startPos != -1 && startPos < test1.length();
779           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780         ;
781     if (occurrences != 10)
782         errln(UnicodeString("indexOf with character & start & end offsets failed: "
783                             "expected to find 10 occurrences, found ") + occurrences);
784 
785     //testing for UChar32
786     UnicodeString subString;
787     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788         subString.append(test3, startPos, test3.length());
789         if(subString.indexOf(testChar32) != -1 ){
790              ++occurrences;
791         }
792         subString.remove();
793     }
794     if (occurrences != 14)
795         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796 
797     for ( occurrences = 0, startPos = 0;
798           startPos != -1 && startPos < test3.length();
799           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800         ;
801     if (occurrences != 4)
802         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803 
804     endPos=test3.length();
805     for ( occurrences = 0, startPos = 5;
806           startPos != -1 && startPos < test3.length();
807           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808         ;
809     if (occurrences != 3)
810         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811     //---
812 
813     if(test1.lastIndexOf(test2)!=29) {
814         errln("test1.lastIndexOf(test2)!=29");
815     }
816 
817     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818         errln("test1.lastIndexOf(test2, start) failed");
819     }
820 
821     for ( occurrences = 0, startPos = 32;
822           startPos != -1;
823           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824         ;
825     if (occurrences != 4)
826         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827                             "expected to find 4 occurrences, found ") + occurrences);
828 
829     for ( occurrences = 0, startPos = 32;
830           startPos != -1;
831           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832         ;
833     if (occurrences != 11)
834         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835                             "expected to find 11 occurrences, found ") + occurrences);
836 
837     //testing UChar32
838     startPos=test3.length();
839     for ( occurrences = 0;
840           startPos != -1;
841           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842         ;
843     if (occurrences != 3)
844         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845 
846 
847     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
848         subString.remove();
849         subString.append(test3, 0, endPos);
850         if(subString.lastIndexOf(testChar32) != -1 ){
851             ++occurrences;
852         }
853     }
854     if (occurrences != 18)
855         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856     //---
857 
858     // test that indexOf(UChar32) and lastIndexOf(UChar32)
859     // do not find surrogate code points when they are part of matched pairs
860     // (= part of supplementary code points)
861     // Jitterbug 1542
862     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864     }
865     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868     ) {
869         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870     }
871 }
872 
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876     UnicodeString test1("hello");
877     UnicodeString test2("   there");
878     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
879     UnicodeString test4;
880     UBool returnVal;
881     UnicodeString expectedValue;
882 
883     returnVal = test1.padLeading(15);
884     expectedValue = "          hello";
885     if (returnVal == FALSE || test1 != expectedValue)
886         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887 
888     returnVal = test2.padTrailing(15);
889     expectedValue = "   there       ";
890     if (returnVal == FALSE || test2 != expectedValue)
891         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892 
893     expectedValue = test3;
894     returnVal = test3.padTrailing(15);
895     if (returnVal == TRUE || test3 != expectedValue)
896         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897 
898     expectedValue = "hello";
899     test4.setTo(test1).trim();
900 
901     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902         errln("trim(UnicodeString&) failed");
903 
904     test1.trim();
905     if (test1 != expectedValue)
906         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907 
908     test2.trim();
909     expectedValue = "there";
910     if (test2 != expectedValue)
911         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912 
913     test3.trim();
914     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
915     if (test3 != expectedValue)
916         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917 
918     returnVal = test1.truncate(15);
919     expectedValue = "hello";
920     if (returnVal == TRUE || test1 != expectedValue)
921         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922 
923     returnVal = test2.truncate(15);
924     expectedValue = "there";
925     if (returnVal == TRUE || test2 != expectedValue)
926         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927 
928     returnVal = test3.truncate(15);
929     expectedValue = "Hi!  How ya doi";
930     if (returnVal == FALSE || test3 != expectedValue)
931         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933 
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938     UnicodeString test2("Now");
939     UnicodeString test3("country.");
940     UnicodeString test4("count");
941 
942     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944     }
945 
946     if (test1.startsWith(test3) ||
947         test1.startsWith(test3.getBuffer(), test3.length()) ||
948         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949     ) {
950         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951     }
952 
953     if (test1.endsWith(test2)) {
954         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955     }
956 
957     if (!test1.endsWith(test3)) {
958         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959     }
960     if (!test1.endsWith(test3, 0, INT32_MAX)) {
961         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962     }
963 
964     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966     }
967     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969     }
970 
971     if (!test3.startsWith(test4)) {
972         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973     }
974 
975     if (test4.startsWith(test3)) {
976         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977     }
978 }
979 
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982     UnicodeString test("abcde");
983     const UChar ab[] = { 0x61, 0x62, 0 };
984     const UChar de[] = { 0x64, 0x65, 0 };
985     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990 
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994     UnicodeString test1("One potato, two potato, three potato, four\n");
995     UnicodeString test2("potato");
996     UnicodeString test3("MISSISSIPPI");
997 
998     UnicodeString expectedValue;
999 
1000     test1.findAndReplace(test2, test3);
1001     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002     if (test1 != expectedValue)
1003         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004     test1.findAndReplace(2, 32, test3, test2);
1005     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006     if (test1 != expectedValue)
1007         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009 
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013     UnicodeString test("backwards words say to used I");
1014 
1015     test.reverse();
1016     test.reverse(2, 4);
1017     test.reverse(7, 2);
1018     test.reverse(10, 3);
1019     test.reverse(14, 5);
1020     test.reverse(20, 9);
1021 
1022     if (test != "I used to say words backwards")
1023         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1024             + test + "\"");
1025 
1026     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027     test.reverse();
1028     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029         errln("reverse() failed with supplementary characters");
1030     }
1031 
1032     // Test case for ticket #8091:
1033     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034     // an odd-length string that contains no other lead surrogates.
1035     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037     test.reverse();
1038     if(test!=expected) {
1039         errln("reverse() failed with only lead surrogate in the middle");
1040     }
1041 }
1042 
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046     UnicodeString   test1("This is a test");
1047     UnicodeString   test2("This is a test");
1048     UnicodeString   test3("Me too!");
1049 
1050     // test getBuffer(minCapacity) and releaseBuffer()
1051     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052     UChar *p=test1.getBuffer(20);
1053     if(test1.getCapacity()<20) {
1054         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055     }
1056 
1057     test1.append((UChar)7); // must not be able to modify the string here
1058     test1.setCharAt(3, 7);
1059     test1.reverse();
1060     if( test1.length()!=0 ||
1061         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063     ) {
1064         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065     }
1066 
1067     p[0]=1;
1068     p[1]=2;
1069     p[2]=3;
1070     test1.releaseBuffer(3);
1071     test1.append((UChar)4);
1072 
1073     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075     }
1076 
1077     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078     test1.releaseBuffer(1);
1079     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081     }
1082 
1083     // test getBuffer(const)
1084     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085     if( test1.length()!=4 ||
1086         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088     ) {
1089         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090     }
1091 
1092     // test releaseBuffer() with a NUL-terminated buffer
1093     test1.getBuffer(20)[2]=0;
1094     test1.releaseBuffer(); // implicit -1
1095     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097     }
1098 
1099     // test releaseBuffer() with a non-NUL-terminated buffer
1100     p=test1.getBuffer(256);
1101     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1103     }
1104     test1.releaseBuffer();  // implicit -1
1105     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107     }
1108 
1109     // test getTerminatedBuffer()
1110     test1=UnicodeString("This is another test.", "");
1111     test2=UnicodeString("This is another test.", "");
1112     q=test1.getTerminatedBuffer();
1113     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114         errln("getTerminatedBuffer()[length]!=0");
1115     }
1116 
1117     const UChar u[]={ 5, 6, 7, 8, 0 };
1118     test1.setTo(FALSE, u, 3);
1119     q=test1.getTerminatedBuffer();
1120     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122     }
1123 
1124     test1.setTo(TRUE, u, -1);
1125     q=test1.getTerminatedBuffer();
1126     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128     }
1129 
1130     // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131     // as u" lila", offset by 3 code units
1132     test1=UnicodeString(TRUE, u"la", 2);
1133     test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134     assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135         u"la lila", test1);
1136 
1137     test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1138     assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139         u"la dudum lila", test1);
1140 
1141     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142     test1.insert(9, ucs, -1);
1143     assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144         u"la dudum hm lila", test1);
1145 
1146     test1.replace(9, 2, (UChar)0x2b);
1147     assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148         u"la dudum + lila", test1);
1149 
1150     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151         errln("UnicodeString::hasMetaData() returns TRUE");
1152     }
1153 
1154     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156     test1.truncate(36);  // ensure length()<getCapacity()
1157     test2=test1;  // share the buffer
1158     test1.truncate(5);
1159     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160         errln("UnicodeString(shared buffer).truncate() failed");
1161     }
1162     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164               "modified another copy of the string!");
1165     }
1166     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167     test1.truncate(36);  // ensure length()<getCapacity()
1168     test2=test1;  // share the buffer
1169     test1.remove();
1170     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171         errln("UnicodeString(shared buffer).remove() failed");
1172     }
1173     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175               "modified another copy of the string!");
1176     }
1177 
1178     // ticket #9740
1179     test1.setTo(TRUE, ucs, 3);
1180     assertEquals("length of read-only alias", 3, test1.length());
1181     test1.trim();
1182     assertEquals("length of read-only alias after trim()", 2, test1.length());
1183     assertEquals("length of terminated buffer of read-only alias + trim()",
1184                  2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186 
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190     UChar           testString[] ={
1191         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192     UChar           guardWord = 0x4DED;
1193     UnicodeString*  test = 0;
1194 
1195     test = new  UnicodeString(testString);
1196     if (*test != "This is a crazy test.")
1197         errln("Test string failed to initialize properly.");
1198     if (guardWord != 0x04DED)
1199         errln("Test string initialization overwrote guard word!");
1200 
1201     test->insert(8, "only ");
1202     test->remove(15, 6);
1203     if (*test != "This is only a test.")
1204         errln("Manipulation of test string failed to work right.");
1205     if (guardWord != 0x4DED)
1206         errln("Manipulation of test string overwrote guard word!");
1207 
1208     // we have to deinitialize and release the backing store by calling the destructor
1209     // explicitly, since we can't overload operator delete
1210     delete test;
1211 
1212     UChar workingBuffer[] = {
1213         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218     UChar guardWord2 = 0x4DED;
1219 
1220     test = new UnicodeString(workingBuffer, 35, 100);
1221     if (*test != "Now is the time for all men to come")
1222         errln("Stack-allocated backing store failed to initialize correctly.");
1223     if (guardWord2 != 0x4DED)
1224         errln("Stack-allocated backing store overwrote guard word!");
1225 
1226     test->insert(24, "good ");
1227     if (*test != "Now is the time for all good men to come")
1228         errln("insert() on stack-allocated UnicodeString didn't work right");
1229     if (guardWord2 != 0x4DED)
1230         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231 
1232     if (workingBuffer[24] != 0x67)
1233         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234 
1235     *test += " to the aid of their country.";
1236     if (*test != "Now is the time for all good men to come to the aid of their country.")
1237         errln("Stack-allocated UnicodeString overflow didn't work");
1238     if (guardWord2 != 0x4DED)
1239         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240 
1241     *test = "ha!";
1242     if (*test != "ha!")
1243         errln("Assignment to stack-allocated UnicodeString didn't work");
1244     if (workingBuffer[0] != 0x4e)
1245         errln("Change to UnicodeString after overflow are still affecting original buffer");
1246     if (guardWord2 != 0x4DED)
1247         errln("Change to UnicodeString after overflow overwrote guard word!");
1248 
1249     // test read-only aliasing with setTo()
1250     workingBuffer[0] = 0x20ac;
1251     workingBuffer[1] = 0x125;
1252     workingBuffer[2] = 0;
1253     test->setTo(TRUE, workingBuffer, 2);
1254     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256     }
1257 
1258     UnicodeString *c=test->clone();
1259 
1260     workingBuffer[1] = 0x109;
1261     if(test->charAt(1) != 0x109) {
1262         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263     }
1264 
1265     if(c->length() != 2 || c->charAt(1) != 0x125) {
1266         errln("clone(alias) did not copy the buffer");
1267     }
1268     delete c;
1269 
1270     test->setTo(TRUE, workingBuffer, -1);
1271     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273     }
1274 
1275     test->setTo(FALSE, workingBuffer, -1);
1276     if(!test->isBogus()) {
1277         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278     }
1279 
1280     delete test;
1281 
1282     test=new UnicodeString();
1283     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284     test->setTo(buffer, 4, 10);
1285     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288     }
1289     delete test;
1290 
1291 
1292     // test the UChar32 constructor
1293     UnicodeString c32Test((UChar32)0x10ff2a);
1294     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296     ) {
1297         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298     }
1299 
1300     // test the (new) capacity constructor
1301     UnicodeString capTest(5, (UChar32)0x2a, 5);
1302     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303         capTest.char32At(0) != 0x2a ||
1304         capTest.char32At(4) != 0x2a
1305     ) {
1306         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307     }
1308 
1309     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311         capTest.char32At(0) != 0x10ff2a ||
1312         capTest.char32At(4) != 0x10ff2a
1313     ) {
1314         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315     }
1316 
1317     capTest = UnicodeString(5, (UChar32)0, 0);
1318     if(capTest.length() != 0) {
1319         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320     }
1321 }
1322 
1323 /**
1324  * Test the unescape() function.
1325  */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328     UnicodeString OUT("abc");
1329     OUT.append((UChar)0x4567);
1330     OUT.append(" ");
1331     OUT.append((UChar)0xA);
1332     OUT.append((UChar)0xD);
1333     OUT.append(" ");
1334     OUT.append((UChar32)0x00101234);
1335     OUT.append("xyz");
1336     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337     UnicodeString result = IN.unescape();
1338     if (result != OUT) {
1339         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340               prettify(result) + ", expected " +
1341               prettify(OUT));
1342     }
1343 
1344     // test that an empty string is returned in case of an error
1345     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347     }
1348 }
1349 
1350 /* test code point counting functions --------------------------------------- */
1351 
1352 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1353 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1354 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1355     int32_t count=s.countChar32(start, length);
1356     return count>number;
1357 }
1358 
1359 /* compare the real function against the reference */
1360 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1361 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1362     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1363         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1364                 start, length, number, s.hasMoreChar32Than(start, length, number));
1365     }
1366 }
1367 
1368 void
TestCountChar32(void)1369 UnicodeStringTest::TestCountChar32(void) {
1370     {
1371         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1372 
1373         // test countChar32()
1374         // note that this also calls and tests u_countChar32(length>=0)
1375         if(
1376             s.countChar32()!=4 ||
1377             s.countChar32(1)!=4 ||
1378             s.countChar32(2)!=3 ||
1379             s.countChar32(2, 3)!=2 ||
1380             s.countChar32(2, 0)!=0
1381         ) {
1382             errln("UnicodeString::countChar32() failed");
1383         }
1384 
1385         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1386         const UChar *buffer=s.getTerminatedBuffer();
1387         if(
1388             u_countChar32(buffer, -1)!=4 ||
1389             u_countChar32(buffer+1, -1)!=4 ||
1390             u_countChar32(buffer+2, -1)!=3 ||
1391             u_countChar32(buffer+3, -1)!=3 ||
1392             u_countChar32(buffer+4, -1)!=2 ||
1393             u_countChar32(buffer+5, -1)!=1 ||
1394             u_countChar32(buffer+6, -1)!=0
1395         ) {
1396             errln("u_countChar32(length=-1) failed");
1397         }
1398 
1399         // test u_countChar32() with bad input
1400         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1401             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1402         }
1403     }
1404 
1405     /* test data and variables for hasMoreChar32Than() */
1406     static const UChar str[]={
1407         0x61, 0x62, 0xd800, 0xdc00,
1408         0xd801, 0xdc01, 0x63, 0xd802,
1409         0x64, 0xdc03, 0x65, 0x66,
1410         0xd804, 0xdc04, 0xd805, 0xdc05,
1411         0x67
1412     };
1413     UnicodeString string(str, UPRV_LENGTHOF(str));
1414     int32_t start, length, number;
1415 
1416     /* test hasMoreChar32Than() */
1417     for(length=string.length(); length>=0; --length) {
1418         for(start=0; start<=length; ++start) {
1419             for(number=-1; number<=((length-start)+2); ++number) {
1420                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1421             }
1422         }
1423     }
1424 
1425     /* test hasMoreChar32Than() with pinning */
1426     for(start=-1; start<=string.length()+1; ++start) {
1427         for(number=-1; number<=((string.length()-start)+2); ++number) {
1428             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1429         }
1430     }
1431 
1432     /* test hasMoreChar32Than() with a bogus string */
1433     string.setToBogus();
1434     for(length=-1; length<=1; ++length) {
1435         for(start=-1; start<=length; ++start) {
1436             for(number=-1; number<=((length-start)+2); ++number) {
1437                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1438             }
1439         }
1440     }
1441 }
1442 
1443 void
TestBogus()1444 UnicodeStringTest::TestBogus() {
1445     UnicodeString   test1("This is a test");
1446     UnicodeString   test2("This is a test");
1447     UnicodeString   test3("Me too!");
1448 
1449     // test isBogus() and setToBogus()
1450     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1451         errln("A string returned TRUE for isBogus()!");
1452     }
1453 
1454     // NULL pointers are treated like empty strings
1455     // use other illegal arguments to make a bogus string
1456     test3.setTo(FALSE, test1.getBuffer(), -2);
1457     if(!test3.isBogus()) {
1458         errln("A bogus string returned FALSE for isBogus()!");
1459     }
1460     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1461         errln("hashCode() failed");
1462     }
1463     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1464         errln("bogus.getBuffer()!=0");
1465     }
1466     if (test1.indexOf(test3) != -1) {
1467         errln("bogus.indexOf() != -1");
1468     }
1469     if (test1.lastIndexOf(test3) != -1) {
1470         errln("bogus.lastIndexOf() != -1");
1471     }
1472     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1473         errln("caseCompare() doesn't work with bogus strings");
1474     }
1475     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1476         errln("compareCodePointOrder() doesn't work with bogus strings");
1477     }
1478 
1479     // verify that non-assignment modifications fail and do not revive a bogus string
1480     test3.setToBogus();
1481     test3.append((UChar)0x61);
1482     if(!test3.isBogus() || test3.getBuffer()!=0) {
1483         errln("bogus.append('a') worked but must not");
1484     }
1485 
1486     test3.setToBogus();
1487     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1488     if(!test3.isBogus() || test3.getBuffer()!=0) {
1489         errln("bogus.findAndReplace() worked but must not");
1490     }
1491 
1492     test3.setToBogus();
1493     test3.trim();
1494     if(!test3.isBogus() || test3.getBuffer()!=0) {
1495         errln("bogus.trim() revived bogus but must not");
1496     }
1497 
1498     test3.setToBogus();
1499     test3.remove(1);
1500     if(!test3.isBogus() || test3.getBuffer()!=0) {
1501         errln("bogus.remove(1) revived bogus but must not");
1502     }
1503 
1504     test3.setToBogus();
1505     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1506         errln("bogus.setCharAt(0, 'b') worked but must not");
1507     }
1508 
1509     test3.setToBogus();
1510     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1511         errln("bogus.truncate(1) revived bogus but must not");
1512     }
1513 
1514     // verify that assignments revive a bogus string
1515     test3.setToBogus();
1516     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1517         errln("bogus.operator=() failed");
1518     }
1519 
1520     test3.setToBogus();
1521     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1522         errln("bogus.fastCopyFrom() failed");
1523     }
1524 
1525     test3.setToBogus();
1526     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1527         errln("bogus.setTo(UniStr) failed");
1528     }
1529 
1530     test3.setToBogus();
1531     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1532         errln("bogus.setTo(UniStr, 0) failed");
1533     }
1534 
1535     test3.setToBogus();
1536     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1537         errln("bogus.setTo(UniStr, 0, len) failed");
1538     }
1539 
1540     test3.setToBogus();
1541     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1542         errln("bogus.setTo(const UChar *, len) failed");
1543     }
1544 
1545     test3.setToBogus();
1546     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1547         errln("bogus.setTo(UChar) failed");
1548     }
1549 
1550     test3.setToBogus();
1551     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1552         errln("bogus.setTo(UChar32) failed");
1553     }
1554 
1555     test3.setToBogus();
1556     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1557         errln("bogus.setTo(readonly alias) failed");
1558     }
1559 
1560     // writable alias to another string's buffer: very bad idea, just convenient for this test
1561     test3.setToBogus();
1562     if(!test3.isBogus() ||
1563             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1564                         test1.length(), test1.getCapacity()).isBogus() ||
1565             test3!=test1) {
1566         errln("bogus.setTo(writable alias) failed");
1567     }
1568 
1569     // verify simple, documented ways to turn a bogus string into an empty one
1570     test3.setToBogus();
1571     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1572         errln("bogus.operator=(UnicodeString()) failed");
1573     }
1574 
1575     test3.setToBogus();
1576     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1577         errln("bogus.setTo(UnicodeString()) failed");
1578     }
1579 
1580     test3.setToBogus();
1581     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1582         errln("bogus.remove() failed");
1583     }
1584 
1585     test3.setToBogus();
1586     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1587         errln("bogus.remove(0, INT32_MAX) failed");
1588     }
1589 
1590     test3.setToBogus();
1591     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1592         errln("bogus.truncate(0) failed");
1593     }
1594 
1595     test3.setToBogus();
1596     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1597         errln("bogus.setTo((UChar32)-1) failed");
1598     }
1599 
1600     static const UChar nul=0;
1601 
1602     test3.setToBogus();
1603     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1604         errln("bogus.setTo(&nul, 0) failed");
1605     }
1606 
1607     test3.setToBogus();
1608     if(!test3.isBogus() || test3.getBuffer()!=0) {
1609         errln("setToBogus() failed to make a string bogus");
1610     }
1611 
1612     test3.setToBogus();
1613     if(test1.isBogus() || !(test1=test3).isBogus()) {
1614         errln("normal=bogus failed to make the left string bogus");
1615     }
1616 
1617     // test that NULL primitive input string values are treated like
1618     // empty strings, not errors (bogus)
1619     test2.setTo((UChar32)0x10005);
1620     if(test2.insert(1, nullptr, 1).length()!=2) {
1621         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1622     }
1623 
1624     UErrorCode errorCode=U_ZERO_ERROR;
1625     UnicodeString
1626         test4((const UChar *)NULL),
1627         test5(TRUE, (const UChar *)NULL, 1),
1628         test6((UChar *)NULL, 5, 5),
1629         test7((const char *)NULL, 3, NULL, errorCode);
1630     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1631         errln("a constructor set to bogus for a NULL input string, should be empty");
1632     }
1633 
1634     test4.setTo(NULL, 3);
1635     test5.setTo(TRUE, (const UChar *)NULL, 1);
1636     test6.setTo((UChar *)NULL, 5, 5);
1637     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1638         errln("a setTo() set to bogus for a NULL input string, should be empty");
1639     }
1640 
1641     // test that bogus==bogus<any
1642     if(test1!=test3 || test1.compare(test3)!=0) {
1643         errln("bogus==bogus failed");
1644     }
1645 
1646     test2.remove();
1647     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1648         errln("bogus<empty failed");
1649     }
1650 }
1651 
1652 // StringEnumeration ------------------------------------------------------- ***
1653 // most of StringEnumeration is tested elsewhere
1654 // this test improves code coverage
1655 
1656 static const char *const
1657 testEnumStrings[]={
1658     "a",
1659     "b",
1660     "c",
1661     "this is a long string which helps us test some buffer limits",
1662     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1663 };
1664 
1665 class TestEnumeration : public StringEnumeration {
1666 public:
TestEnumeration()1667     TestEnumeration() : i(0) {}
1668 
count(UErrorCode &) const1669     virtual int32_t count(UErrorCode& /*status*/) const {
1670         return UPRV_LENGTHOF(testEnumStrings);
1671     }
1672 
snext(UErrorCode & status)1673     virtual const UnicodeString *snext(UErrorCode &status) {
1674         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1675             unistr=UnicodeString(testEnumStrings[i++], "");
1676             return &unistr;
1677         }
1678 
1679         return NULL;
1680     }
1681 
reset(UErrorCode &)1682     virtual void reset(UErrorCode& /*status*/) {
1683         i=0;
1684     }
1685 
getStaticClassID()1686     static inline UClassID getStaticClassID() {
1687         return (UClassID)&fgClassID;
1688     }
getDynamicClassID() const1689     virtual UClassID getDynamicClassID() const {
1690         return getStaticClassID();
1691     }
1692 
1693 private:
1694     static const char fgClassID;
1695 
1696     int32_t i;
1697 };
1698 
1699 const char TestEnumeration::fgClassID=0;
1700 
1701 void
TestStringEnumeration()1702 UnicodeStringTest::TestStringEnumeration() {
1703     UnicodeString s;
1704     TestEnumeration ten;
1705     int32_t i, length;
1706     UErrorCode status;
1707 
1708     const UChar *pu;
1709     const char *pc;
1710 
1711     // test the next() default implementation and ensureCharsCapacity()
1712     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1713         status=U_ZERO_ERROR;
1714         pc=ten.next(&length, status);
1715         s=UnicodeString(testEnumStrings[i], "");
1716         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1717             errln("StringEnumeration.next(%d) failed", i);
1718         }
1719     }
1720     status=U_ZERO_ERROR;
1721     if(ten.next(&length, status)!=NULL) {
1722         errln("StringEnumeration.next(done)!=NULL");
1723     }
1724 
1725     // test the unext() default implementation
1726     ten.reset(status);
1727     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1728         status=U_ZERO_ERROR;
1729         pu=ten.unext(&length, status);
1730         s=UnicodeString(testEnumStrings[i], "");
1731         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1732             errln("StringEnumeration.unext(%d) failed", i);
1733         }
1734     }
1735     status=U_ZERO_ERROR;
1736     if(ten.unext(&length, status)!=NULL) {
1737         errln("StringEnumeration.unext(done)!=NULL");
1738     }
1739 
1740     // test that the default clone() implementation works, and returns NULL
1741     if(ten.clone()!=NULL) {
1742         errln("StringEnumeration.clone()!=NULL");
1743     }
1744 
1745     // test that uenum_openFromStringEnumeration() works
1746     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1747     StringEnumeration *newTen = new TestEnumeration;
1748     status=U_ZERO_ERROR;
1749     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1750     if (uten==NULL || U_FAILURE(status)) {
1751         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1752         return;
1753     }
1754 
1755     // test  uenum_next()
1756     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1757         status=U_ZERO_ERROR;
1758         pc=uenum_next(uten, &length, &status);
1759         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1760             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1761         }
1762     }
1763     status=U_ZERO_ERROR;
1764     if(uenum_next(uten, &length, &status)!=NULL) {
1765         errln("File %s, line %d, uenum_next(done)!=NULL");
1766     }
1767 
1768     // test the uenum_unext()
1769     uenum_reset(uten, &status);
1770     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1771         status=U_ZERO_ERROR;
1772         pu=uenum_unext(uten, &length, &status);
1773         s=UnicodeString(testEnumStrings[i], "");
1774         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1775             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1776         }
1777     }
1778     status=U_ZERO_ERROR;
1779     if(uenum_unext(uten, &length, &status)!=NULL) {
1780         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1781     }
1782 
1783     uenum_close(uten);
1784 }
1785 
1786 /*
1787  * Namespace test, to make sure that macros like UNICODE_STRING include the
1788  * namespace qualifier.
1789  *
1790  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1791  */
1792 namespace bogus {
1793     class UnicodeString {
1794     public:
1795         enum EInvariant { kInvariant };
UnicodeString()1796         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1797         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1798         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1799 ) : i(length) {}
1800     private:
1801         int32_t i;
1802     };
1803 }
1804 
1805 void
TestNameSpace()1806 UnicodeStringTest::TestNameSpace() {
1807     // Provoke name collision unless the UnicodeString macros properly
1808     // qualify the icu::UnicodeString class.
1809     using namespace bogus;
1810 
1811     // Use all UnicodeString macros from unistr.h.
1812     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1813     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1814     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1815 
1816     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1817     icu::UnicodeString s4=s1+s2+s3;
1818     if(s4.length()!=9) {
1819         errln("Something wrong with UnicodeString::operator+().");
1820     }
1821 }
1822 
1823 void
TestUTF32()1824 UnicodeStringTest::TestUTF32() {
1825     // Input string length US_STACKBUF_SIZE to cause overflow of the
1826     // initially chosen fStackBuffer due to supplementary characters.
1827     static const UChar32 utf32[] = {
1828         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1829         0x10000, 0x20000, 0xe0000, 0x10ffff
1830     };
1831     static const UChar expected_utf16[] = {
1832         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1833         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1834     };
1835     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1836     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1837     if(from32 != expected) {
1838         errln("UnicodeString::fromUTF32() did not create the expected string.");
1839     }
1840 
1841     static const UChar utf16[] = {
1842         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1843     };
1844     static const UChar32 expected_utf32[] = {
1845         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1846     };
1847     UChar32 result32[16];
1848     UErrorCode errorCode = U_ZERO_ERROR;
1849     int32_t length32 =
1850         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1851         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1852     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1853         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1854         result32[length32] != 0
1855     ) {
1856         errln("UnicodeString::toUTF32() did not create the expected string.");
1857     }
1858 }
1859 
1860 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1861 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1862     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1863             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1864     virtual void Flush() { calledFlush = TRUE; }
1865     UBool calledFlush;
1866 };
1867 
1868 void
TestUTF8()1869 UnicodeStringTest::TestUTF8() {
1870     static const uint8_t utf8[] = {
1871         // Code points:
1872         // 0x41, 0xd900,
1873         // 0x61, 0xdc00,
1874         // 0x110000, 0x5a,
1875         // 0x50000, 0x7a,
1876         // 0x10000, 0x20000,
1877         // 0xe0000, 0x10ffff
1878         0x41, 0xed, 0xa4, 0x80,
1879         0x61, 0xed, 0xb0, 0x80,
1880         0xf4, 0x90, 0x80, 0x80, 0x5a,
1881         0xf1, 0x90, 0x80, 0x80, 0x7a,
1882         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1883         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1884     };
1885     static const UChar expected_utf16[] = {
1886         0x41, 0xfffd, 0xfffd, 0xfffd,
1887         0x61, 0xfffd, 0xfffd, 0xfffd,
1888         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1889         0xd900, 0xdc00, 0x7a,
1890         0xd800, 0xdc00, 0xd840, 0xdc00,
1891         0xdb40, 0xdc00, 0xdbff, 0xdfff
1892     };
1893     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1894     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1895 
1896     if(from8 != expected) {
1897         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1898     }
1899     std::string utf8_string((const char *)utf8, sizeof(utf8));
1900     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1901     if(from8b != expected) {
1902         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1903     }
1904 
1905     static const UChar utf16[] = {
1906         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1907     };
1908     static const uint8_t expected_utf8[] = {
1909         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1910         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1911     };
1912     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1913 
1914     char buffer[64];
1915     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1916     us.toUTF8(sink);
1917     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1918         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1919     ) {
1920         errln("UnicodeString::toUTF8() did not create the expected string.");
1921     }
1922     if(!sink.calledFlush) {
1923         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1924     }
1925     // Initial contents for testing that toUTF8String() appends.
1926     std::string result8 = "-->";
1927     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1928     // Use the return value just for testing.
1929     std::string &result8r = us.toUTF8String(result8);
1930     if(result8r != expected8 || &result8r != &result8) {
1931         errln("UnicodeString::toUTF8String() did not create the expected string.");
1932     }
1933 }
1934 
1935 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1936 static UnicodeString wrapUChars(const UChar *uchars) {
1937     return UnicodeString(TRUE, uchars, -1);
1938 }
1939 
1940 void
TestReadOnlyAlias()1941 UnicodeStringTest::TestReadOnlyAlias() {
1942     UChar uchars[]={ 0x61, 0x62, 0 };
1943     UnicodeString alias(TRUE, uchars, 2);
1944     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1945         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1946         return;
1947     }
1948     alias.truncate(1);
1949     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1950         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1951     }
1952     if(alias.getTerminatedBuffer()==uchars) {
1953         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1954               "did not allocate and copy as expected.");
1955     }
1956     if(uchars[1]!=0x62) {
1957         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1958               "modified the original buffer.");
1959     }
1960     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1961         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1962               "does not return a buffer terminated at the proper length.");
1963     }
1964 
1965     alias.setTo(TRUE, uchars, 2);
1966     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1967         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1968         return;
1969     }
1970     alias.remove();
1971     if(alias.length()!=0) {
1972         errln("UnicodeString(read-only-alias).remove() did not work.");
1973     }
1974     if(alias.getTerminatedBuffer()==uchars) {
1975         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1976               "did not un-alias as expected.");
1977     }
1978     if(uchars[0]!=0x61) {
1979         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1980               "modified the original buffer.");
1981     }
1982     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1983         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1984               "does not return a buffer terminated at length 0.");
1985     }
1986 
1987     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1988     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1989     alias.remove(0, 10);
1990     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1991         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1992     }
1993     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1994     alias.remove(27, 99);
1995     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1996         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1997     }
1998     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1999     alias.retainBetween(6, 30);
2000     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2001         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2002     }
2003 
2004     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2005     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2006 
2007     UnicodeString temp;
2008     temp.fastCopyFrom(longString.tempSubString());
2009     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2010         errln("UnicodeString.tempSubString() failed");
2011     }
2012     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2013     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2014         errln("UnicodeString.tempSubString(-3, 5) failed");
2015     }
2016     temp.fastCopyFrom(longString.tempSubString(17));
2017     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2018         errln("UnicodeString.tempSubString(17) failed");
2019     }
2020     temp.fastCopyFrom(longString.tempSubString(99));
2021     if(!temp.isEmpty()) {
2022         errln("UnicodeString.tempSubString(99) failed");
2023     }
2024     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2025     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2026         errln("UnicodeString.tempSubStringBetween(6) failed");
2027     }
2028     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2029     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2030         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2031     }
2032     UnicodeString bogusString;
2033     bogusString.setToBogus();
2034     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2035     if(!temp.isBogus()) {
2036         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2037     }
2038 }
2039 
2040 void
doTestAppendable(UnicodeString & dest,Appendable & app)2041 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2042     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2043     static const UChar fg[3]={ 0x66, 0x67, 0 };
2044     if(!app.reserveAppendCapacity(12)) {
2045         errln("Appendable.reserve(12) failed");
2046     }
2047     app.appendCodeUnit(0x61);
2048     app.appendCodePoint(0x62);
2049     app.appendCodePoint(0x50000);
2050     app.appendString(cde, 3);
2051     app.appendString(fg, -1);
2052     UChar scratch[3];
2053     int32_t capacity=-1;
2054     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2055     if(capacity<3) {
2056         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2057         return;
2058     }
2059     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2060     u_memcpy(buffer, hij, 3);
2061     app.appendString(buffer, 3);
2062     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2063         errln("Appendable.append(...) failed");
2064     }
2065     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2066     if(buffer!=NULL || capacity!=0) {
2067         errln("Appendable.getAppendBuffer(min=0) failed");
2068     }
2069     capacity=1;
2070     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2071     if(buffer!=NULL || capacity!=0) {
2072         errln("Appendable.getAppendBuffer(scratch<min) failed");
2073     }
2074 }
2075 
2076 class SimpleAppendable : public Appendable {
2077 public:
SimpleAppendable(UnicodeString & dest)2078     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2079     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2080     SimpleAppendable &reset() { str.remove(); return *this; }
2081 private:
2082     UnicodeString &str;
2083 };
2084 
2085 void
TestAppendable()2086 UnicodeStringTest::TestAppendable() {
2087     UnicodeString dest;
2088     SimpleAppendable app(dest);
2089     doTestAppendable(dest, app);
2090 }
2091 
2092 void
TestUnicodeStringImplementsAppendable()2093 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2094     UnicodeString dest;
2095     UnicodeStringAppendable app(dest);
2096     doTestAppendable(dest, app);
2097 }
2098 
2099 void
TestSizeofUnicodeString()2100 UnicodeStringTest::TestSizeofUnicodeString() {
2101     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2102     // See the API comments for UNISTR_OBJECT_SIZE.
2103     size_t sizeofUniStr=sizeof(UnicodeString);
2104     size_t expected=UNISTR_OBJECT_SIZE;
2105     if(expected!=sizeofUniStr) {
2106         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2107         // of the compiler might add more internal padding than expected.
2108         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2109               (int)sizeofUniStr, (int)expected);
2110     }
2111     if(sizeofUniStr<32) {
2112         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2113     }
2114     // We assume that the entire UnicodeString object,
2115     // minus the vtable pointer and 2 bytes for flags and short length,
2116     // is available for internal storage of UChars.
2117     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2118     UnicodeString s;
2119     const UChar *emptyBuffer=s.getBuffer();
2120     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2121         s.append((UChar)0x2e);
2122     }
2123     const UChar *fullBuffer=s.getBuffer();
2124     if(fullBuffer!=emptyBuffer) {
2125         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2126               expectedStackBufferLength);
2127     }
2128     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2129     if(terminatedBuffer==emptyBuffer) {
2130         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2131               expectedStackBufferLength);
2132     }
2133 }
2134 
2135 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2136 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2137     dest = std::move(src);
2138 }
2139 
2140 void
TestMoveSwap()2141 UnicodeStringTest::TestMoveSwap() {
2142     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2143     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2144     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2145     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2146     const UChar *p = s2.getBuffer();
2147     s1.swap(s2);
2148     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2149         errln("UnicodeString.swap() did not swap");
2150     }
2151     swap(s2, s3);
2152     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2153         errln("swap(UnicodeString) did not swap back");
2154     }
2155     UnicodeString s4;
2156     s4 = std::move(s1);
2157     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2158         errln("UnicodeString = std::move(heap) did not move");
2159     }
2160     UnicodeString s5;
2161     s5 = std::move(s2);
2162     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2163         errln("UnicodeString = std::move(stack) did not move");
2164     }
2165     UnicodeString s6;
2166     s6 = std::move(s3);
2167     if(s6.getBuffer() != abc || s6.length() != 3) {
2168         errln("UnicodeString = std::move(alias) did not move");
2169     }
2170     infoln("TestMoveSwap() with rvalue references");
2171     s1 = static_cast<UnicodeString &&>(s6);
2172     if(s1.getBuffer() != abc || s1.length() != 3) {
2173         errln("UnicodeString move assignment operator did not move");
2174     }
2175     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2176     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2177         errln("UnicodeString move constructor did not move");
2178     }
2179 
2180     // Move self assignment leaves the object valid but in an undefined state.
2181     // Do it to make sure there is no crash,
2182     // but do not check for any particular resulting value.
2183     moveFrom(s1, s1);
2184     moveFrom(s2, s2);
2185     moveFrom(s3, s3);
2186     moveFrom(s4, s4);
2187     moveFrom(s5, s5);
2188     moveFrom(s6, s6);
2189     moveFrom(s7, s7);
2190     // Simple copy assignment must work.
2191     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2192     s1 = s6 = s4 = s7 = simple;
2193     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2194         errln("UnicodeString copy after self-move did not work");
2195     }
2196 }
2197 
2198 void
TestUInt16Pointers()2199 UnicodeStringTest::TestUInt16Pointers() {
2200     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2201     uint16_t arr[4];
2202 
2203     UnicodeString expected(u"abc");
2204     assertEquals("abc from pointer", expected, UnicodeString(carr));
2205     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2206     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2207 
2208     UnicodeString alias(arr, 0, 4);
2209     alias.append(u'a').append(u'b').append(u'c');
2210     assertEquals("abc from writable alias", expected, alias);
2211     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2212 
2213     UErrorCode errorCode = U_ZERO_ERROR;
2214     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2215     assertSuccess(WHERE, errorCode);
2216     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2217 }
2218 
2219 void
TestWCharPointers()2220 UnicodeStringTest::TestWCharPointers() {
2221 #if U_SIZEOF_WCHAR_T==2
2222     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2223     wchar_t arr[4];
2224 
2225     UnicodeString expected(u"abc");
2226     assertEquals("abc from pointer", expected, UnicodeString(carr));
2227     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2228     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2229 
2230     UnicodeString alias(arr, 0, 4);
2231     alias.append(u'a').append(u'b').append(u'c');
2232     assertEquals("abc from writable alias", expected, alias);
2233     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2234 
2235     UErrorCode errorCode = U_ZERO_ERROR;
2236     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2237     assertSuccess(WHERE, errorCode);
2238     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2239 #endif
2240 }
2241 
2242 void
TestNullPointers()2243 UnicodeStringTest::TestNullPointers() {
2244     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2245     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2246     assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2247 
2248     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2249     assertTrue("empty from writable alias", alias.isEmpty());
2250     alias.append(u'a').append(u'b').append(u'c');
2251     UnicodeString expected(u"abc");
2252     assertEquals("abc from writable alias", expected, alias);
2253 
2254     UErrorCode errorCode = U_ZERO_ERROR;
2255     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2256     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2257 }
2258 
TestUnicodeStringInsertAppendToSelf()2259 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2260     IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2261 
2262     // Test append operation
2263     UnicodeString str(u"foo ");
2264     str.append(str);
2265     str.append(str);
2266     str.append(str);
2267     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2268 
2269     // Test append operation with readonly alias to start
2270     str = UnicodeString(TRUE, u"foo ", 4);
2271     str.append(str);
2272     str.append(str);
2273     str.append(str);
2274     assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2275 
2276     // Test append operation with aliased substring
2277     str = u"abcde";
2278     UnicodeString sub = str.tempSubString(1, 2);
2279     str.append(sub);
2280     assertEquals("", u"abcdebc", str);
2281 
2282     // Test append operation with double-aliased substring
2283     str = UnicodeString(TRUE, u"abcde", 5);
2284     sub = str.tempSubString(1, 2);
2285     str.append(sub);
2286     assertEquals("", u"abcdebc", str);
2287 
2288     // Test insert operation
2289     str = u"a-*b";
2290     str.insert(2, str);
2291     str.insert(4, str);
2292     str.insert(8, str);
2293     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2294 
2295     // Test insert operation with readonly alias to start
2296     str = UnicodeString(TRUE, u"a-*b", 4);
2297     str.insert(2, str);
2298     str.insert(4, str);
2299     str.insert(8, str);
2300     assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2301 
2302     // Test insert operation with aliased substring
2303     str = u"abcde";
2304     sub = str.tempSubString(1, 3);
2305     str.insert(2, sub);
2306     assertEquals("", u"abbcdcde", str);
2307 
2308     // Test insert operation with double-aliased substring
2309     str = UnicodeString(TRUE, u"abcde", 5);
2310     sub = str.tempSubString(1, 3);
2311     str.insert(2, sub);
2312     assertEquals("", u"abbcdcde", str);
2313 }
2314 
TestLargeAppend()2315 void UnicodeStringTest::TestLargeAppend() {
2316     if(quick) return;
2317 
2318     IcuTestErrorCode status(*this, "TestLargeAppend");
2319     // Make a large UnicodeString
2320     int32_t len = 0xAFFFFFF;
2321     UnicodeString str;
2322     char16_t *buf = str.getBuffer(len);
2323     // A fast way to set buffer to valid Unicode.
2324     // 4E4E is a valid unicode character
2325     uprv_memset(buf, 0x4e, len * 2);
2326     str.releaseBuffer(len);
2327     UnicodeString dest;
2328     // Append it 16 times
2329     // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2330     // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2331     int64_t total = 0;
2332     for (int32_t i = 0; i < 16; i++) {
2333         dest.append(str);
2334         total += len;
2335         if (total <= INT32_MAX) {
2336             assertFalse("dest is not bogus", dest.isBogus());
2337         } else {
2338             assertTrue("dest should be bogus", dest.isBogus());
2339         }
2340     }
2341     dest.remove();
2342     total = 0;
2343     for (int32_t i = 0; i < 16; i++) {
2344         dest.append(str);
2345         total += len;
2346         if (total + len <= INT32_MAX) {
2347             assertFalse("dest is not bogus", dest.isBogus());
2348         } else if (total <= INT32_MAX) {
2349             // Check that a string of exactly the maximum size works
2350             UnicodeString str2;
2351             int32_t remain = INT32_MAX - total;
2352             char16_t *buf2 = str2.getBuffer(remain);
2353             if (buf2 == nullptr) {
2354                 // if somehow memory allocation fail, return the test
2355                 return;
2356             }
2357             uprv_memset(buf2, 0x4e, remain * 2);
2358             str2.releaseBuffer(remain);
2359             dest.append(str2);
2360             total += remain;
2361             assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2362             assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2363             assertFalse("dest is not bogus", dest.isBogus());
2364 
2365             // Check that a string size+1 goes bogus
2366             str2.truncate(1);
2367             dest.append(str2);
2368             total++;
2369             assertTrue("dest should be bogus", dest.isBogus());
2370         } else {
2371             assertTrue("dest should be bogus", dest.isBogus());
2372         }
2373     }
2374 }
2375