• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 
9 #include "ustrtest.h"
10 #include "unicode/appendable.h"
11 #include "unicode/std_string.h"
12 #include "unicode/unistr.h"
13 #include "unicode/uchar.h"
14 #include "unicode/ustring.h"
15 #include "unicode/locid.h"
16 #include "unicode/strenum.h"
17 #include "unicode/ucnv.h"
18 #include "unicode/uenum.h"
19 #include "unicode/utf16.h"
20 #include "cmemory.h"
21 #include "charstr.h"
22 
23 #if 0
24 #include "unicode/ustream.h"
25 
26 #include <iostream>
27 using namespace std;
28 
29 #endif
30 
~UnicodeStringTest()31 UnicodeStringTest::~UnicodeStringTest() {}
32 
33 extern IntlTest *createStringCaseTest();
34 
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)35 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
36 {
37     if (exec) logln("TestSuite UnicodeStringTest: ");
38     TESTCASE_AUTO_BEGIN;
39     TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
40     TESTCASE_AUTO(TestBasicManipulation);
41     TESTCASE_AUTO(TestCompare);
42     TESTCASE_AUTO(TestExtract);
43     TESTCASE_AUTO(TestRemoveReplace);
44     TESTCASE_AUTO(TestSearching);
45     TESTCASE_AUTO(TestSpacePadding);
46     TESTCASE_AUTO(TestPrefixAndSuffix);
47     TESTCASE_AUTO(TestFindAndReplace);
48     TESTCASE_AUTO(TestBogus);
49     TESTCASE_AUTO(TestReverse);
50     TESTCASE_AUTO(TestMiscellaneous);
51     TESTCASE_AUTO(TestStackAllocation);
52     TESTCASE_AUTO(TestUnescape);
53     TESTCASE_AUTO(TestCountChar32);
54     TESTCASE_AUTO(TestStringEnumeration);
55     TESTCASE_AUTO(TestNameSpace);
56     TESTCASE_AUTO(TestUTF32);
57     TESTCASE_AUTO(TestUTF8);
58     TESTCASE_AUTO(TestReadOnlyAlias);
59     TESTCASE_AUTO(TestAppendable);
60     TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
61     TESTCASE_AUTO(TestSizeofUnicodeString);
62     TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
63     TESTCASE_AUTO(TestMoveSwap);
64     TESTCASE_AUTO(TestUInt16Pointers);
65     TESTCASE_AUTO(TestWCharPointers);
66     TESTCASE_AUTO(TestNullPointers);
67     TESTCASE_AUTO_END;
68 }
69 
70 void
TestBasicManipulation()71 UnicodeStringTest::TestBasicManipulation()
72 {
73     UnicodeString   test1("Now is the time for all men to come swiftly to the aid of the party.\n");
74     UnicodeString   expectedValue;
75     UnicodeString   *c;
76 
77     c=(UnicodeString *)test1.clone();
78     test1.insert(24, "good ");
79     expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
80     if (test1 != expectedValue)
81         errln("insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
82 
83     c->insert(24, "good ");
84     if(*c != expectedValue) {
85         errln("clone()->insert() failed:  expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
86     }
87     delete c;
88 
89     test1.remove(41, 8);
90     expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
91     if (test1 != expectedValue)
92         errln("remove() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
93 
94     test1.replace(58, 6, "ir country");
95     expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
96     if (test1 != expectedValue)
97         errln("replace() failed:  expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
98 
99     UChar     temp[80];
100     test1.extract(0, 15, temp);
101 
102     UnicodeString       test2(temp, 15);
103 
104     expectedValue = "Now is the time";
105     if (test2 != expectedValue)
106         errln("extract() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
107 
108     test2 += " for me to go!\n";
109     expectedValue = "Now is the time for me to go!\n";
110     if (test2 != expectedValue)
111         errln("operator+=() failed:  expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
112 
113     if (test1.length() != 70)
114         errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
115     if (test2.length() != 30)
116         errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
117 
118     UnicodeString test3;
119     test3.append((UChar32)0x20402);
120     if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
121         errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
122     }
123     if(test3.length() != 2){
124         errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
125     }
126     test3.append((UChar32)0x0074);
127     if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
128         errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
129     }
130     if(test3.length() != 3){
131         errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
132     }
133 
134     // test some UChar32 overloads
135     if( test3.setTo((UChar32)0x10330).length() != 2 ||
136         test3.insert(0, (UChar32)0x20100).length() != 4 ||
137         test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
138         (test3 = (UChar32)0x14001).length() != 2
139     ) {
140         errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
141     }
142 
143     {
144         // test moveIndex32()
145         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
146 
147         if(
148             s.moveIndex32(2, -1)!=0 ||
149             s.moveIndex32(2, 1)!=4 ||
150             s.moveIndex32(2, 2)!=5 ||
151             s.moveIndex32(5, -2)!=2 ||
152             s.moveIndex32(0, -1)!=0 ||
153             s.moveIndex32(6, 1)!=6
154         ) {
155             errln("UnicodeString::moveIndex32() failed");
156         }
157 
158         if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
159             errln("UnicodeString::getChar32Start() failed");
160         }
161 
162         if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
163             errln("UnicodeString::getChar32Limit() failed");
164         }
165     }
166 
167     {
168         // test new 2.2 constructors and setTo function that parallel Java's substring function.
169         UnicodeString src("Hello folks how are you?");
170         UnicodeString target1("how are you?");
171         if (target1 != UnicodeString(src, 12)) {
172             errln("UnicodeString(const UnicodeString&, int32_t) failed");
173         }
174         UnicodeString target2("folks");
175         if (target2 != UnicodeString(src, 6, 5)) {
176             errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
177         }
178         if (target1 != target2.setTo(src, 12)) {
179             errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
180         }
181     }
182 
183     {
184         // op+ is new in ICU 2.8
185         UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
186         if(s!=UnicodeString("abcdefghi", "")) {
187             errln("operator+(UniStr, UniStr) failed");
188         }
189     }
190 
191     {
192         // tests for Jitterbug 2360
193         // verify that APIs with source pointer + length accept length == -1
194         // mostly test only where modified, only few functions did not already do this
195         if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
196             errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
197         }
198 
199         UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0,   0x62, 0xffff, 0xdbff, 0xdfff };
200         UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
201 
202         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
203             errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
204         }
205         if(t.length()!=u_strlen(buffer)) {
206             errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
207         }
208 
209         if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
210             errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
211         }
212         if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
213             errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
214         }
215 
216         buffer[u_strlen(buffer)]=0xe4;
217         UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
218         if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
219             errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
220         }
221         if(u.length()!=UPRV_LENGTHOF(buffer)) {
222             errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
223         }
224 
225         static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
226         UConverter *cnv;
227         UErrorCode errorCode=U_ZERO_ERROR;
228 
229         cnv=ucnv_open("ISO-8859-1", &errorCode);
230         UnicodeString v(cs, -1, cnv, errorCode);
231         ucnv_close(cnv);
232         if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
233             errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
234         }
235     }
236 
237 #if U_CHARSET_IS_UTF8
238     {
239         // Test the hardcoded-UTF-8 UnicodeString optimizations.
240         static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
241         static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
242         UnicodeString from8a = UnicodeString((const char *)utf8);
243         UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
244         UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
245         if(from8a != from16 || from8b != from16) {
246             errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
247         }
248         char buffer[16];
249         int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
250         if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
251             errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
252         }
253         length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
254         if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
255             errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
256         }
257     }
258 #endif
259 }
260 
261 void
TestCompare()262 UnicodeStringTest::TestCompare()
263 {
264     UnicodeString   test1("this is a test");
265     UnicodeString   test2("this is a test");
266     UnicodeString   test3("this is a test of the emergency broadcast system");
267     UnicodeString   test4("never say, \"this is a test\"!!");
268 
269     UnicodeString   test5((UChar)0x5000);
270     UnicodeString   test6((UChar)0x5100);
271 
272     UChar         uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
273                  0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
274     char            chars[] = "this is a test";
275 
276     // test operator== and operator!=
277     if (test1 != test2 || test1 == test3 || test1 == test4)
278         errln("operator== or operator!= failed");
279 
280     // test operator> and operator<
281     if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
282         !(test5 < test6)
283     ) {
284         errln("operator> or operator< failed");
285     }
286 
287     // test operator>= and operator<=
288     if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
289         errln("operator>= or operator<= failed");
290 
291     // test compare(UnicodeString)
292     if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
293         errln("compare(UnicodeString) failed");
294 
295     //test compare(offset, length, UnicodeString)
296     if(test1.compare(0, 14, test2) != 0 ||
297         test3.compare(0, 14, test2) != 0 ||
298         test4.compare(12, 14, test2) != 0 ||
299         test3.compare(0, 18, test1) <=0  )
300         errln("compare(offset, length, UnicodeString) failes");
301 
302     // test compare(UChar*)
303     if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
304         errln("compare(UChar*) failed");
305 
306     // test compare(char*)
307     if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
308         errln("compare(char*) failed");
309 
310     // test compare(UChar*, length)
311     if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
312         errln("compare(UChar*, length) failed");
313 
314     // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
315     if (test1.compare(0, 14, test2, 0, 14) != 0
316     || test1.compare(0, 14, test3, 0, 14) != 0
317     || test1.compare(0, 14, test4, 12, 14) != 0)
318         errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
319 
320     if (test1.compare(10, 4, test2, 0, 4) >= 0
321     || test1.compare(10, 4, test3, 22, 9) <= 0
322     || test1.compare(10, 4, test4, 22, 4) != 0)
323         errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
324 
325     // test compareBetween
326     if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
327                     || test1.compareBetween(0, 14, test4, 12, 26) != 0)
328         errln("compareBetween failed");
329 
330     if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
331                     || test1.compareBetween(10, 14, test4, 22, 26) != 0)
332         errln("compareBetween failed");
333 
334     // test compare() etc. with strings that share a buffer but are not equal
335     test2=test1; // share the buffer, length() too large for the stackBuffer
336     test2.truncate(1); // change only the length, not the buffer
337     if( test1==test2 || test1<=test2 ||
338         test1.compare(test2)<=0 ||
339         test1.compareCodePointOrder(test2)<=0 ||
340         test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
341         test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
342         test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
343         test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
344     ) {
345         errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
346     }
347 
348     /* test compareCodePointOrder() */
349     {
350         /* these strings are in ascending order */
351         static const UChar strings[][4]={
352             { 0x61, 0 },                    /* U+0061 */
353             { 0x20ac, 0xd801, 0 },          /* U+20ac U+d801 */
354             { 0x20ac, 0xd800, 0xdc00, 0 },  /* U+20ac U+10000 */
355             { 0xd800, 0 },                  /* U+d800 */
356             { 0xd800, 0xff61, 0 },          /* U+d800 U+ff61 */
357             { 0xdfff, 0 },                  /* U+dfff */
358             { 0xff61, 0xdfff, 0 },          /* U+ff61 U+dfff */
359             { 0xff61, 0xd800, 0xdc02, 0 },  /* U+ff61 U+10002 */
360             { 0xd800, 0xdc02, 0 },          /* U+10002 */
361             { 0xd84d, 0xdc56, 0 }           /* U+23456 */
362         };
363         UnicodeString u[20]; // must be at least as long as strings[]
364         int32_t i;
365 
366         for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
367             u[i]=UnicodeString(TRUE, strings[i], -1);
368         }
369 
370         for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
371             if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
372                 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
373             }
374         }
375     }
376 
377     /* test caseCompare() */
378     {
379         static const UChar
380         _mixed[]=               { 0x61, 0x42, 0x131, 0x3a3, 0xdf,       0x130,       0x49,  0xfb03,           0xd93f, 0xdfff, 0 },
381         _otherDefault[]=        { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69,  0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
382         _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69,        0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
383         _different[]=           { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130,       0x49,  0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
384 
385         UnicodeString
386             mixed(TRUE, _mixed, -1),
387             otherDefault(TRUE, _otherDefault, -1),
388             otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
389             different(TRUE, _different, -1);
390 
391         int8_t result;
392 
393         /* test caseCompare() */
394         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
395         if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
396             errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
397         }
398         result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
399         if(result!=0) {
400             errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
401         }
402         result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403         if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
404             errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
405         }
406 
407         /* test caseCompare() */
408         result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
409         if(result<=0) {
410             errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
411         }
412 
413         /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
414         result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
415         if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
416             errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
417         }
418 
419         /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
420         result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
421         if(result<=0) {
422             errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
423         }
424     }
425 
426     // test that srcLength=-1 is handled in functions that
427     // take input const UChar */int32_t srcLength (j785)
428     {
429         static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
430         UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
431 
432         if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
433             errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
434         }
435 
436         if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
437             errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
438         }
439 
440         if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
441             errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
442         }
443 
444         if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
445             errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
446         }
447 
448         if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
449             errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
450         }
451 
452         UnicodeString s2, s3;
453         s2.replace(0, 0, u+1, -1);
454         s3.replace(0, 0, u, 1, -1);
455         if(s.compare(1, 999, s2)!=0 || s2!=s3) {
456             errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
457         }
458     }
459 }
460 
461 void
TestExtract()462 UnicodeStringTest::TestExtract()
463 {
464     UnicodeString  test1("Now is the time for all good men to come to the aid of their country.", "");
465     UnicodeString  test2;
466     UChar          test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
467     char           test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
468     UnicodeString  test5;
469     char           test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
470 
471     test1.extract(11, 12, test2);
472     test1.extract(11, 12, test3);
473     if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
474         errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
475     }
476 
477     // test proper pinning in extractBetween()
478     test1.extractBetween(-3, 7, test5);
479     if(test5!=UNICODE_STRING("Now is ", 7)) {
480         errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
481     }
482 
483     test1.extractBetween(11, 23, test5);
484     if (test1.extract(60, 71, test6) != 9) {
485         errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
486     }
487     if (test1.extract(11, 12, test6) != 12) {
488         errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
489     }
490 
491     // convert test4 back to Unicode for comparison
492     UnicodeString test4b(test4, 12);
493 
494     if (test1.extract(11, 12, (char *)NULL) != 12) {
495         errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
496     }
497     if (test1.extract(11, -1, test6) != 0) {
498         errln("UnicodeString.extract(-1) failed to stop reading the string.");
499     }
500 
501     for (int32_t i = 0; i < 12; i++) {
502         if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
503             errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
504             break;
505         }
506         if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
507             errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
508             break;
509         }
510         if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
511             errln(UnicodeString("extracting into an array of char failed at position ") + i);
512             break;
513         }
514         if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
515             errln(UnicodeString("extracting with extractBetween failed at position ") + i);
516             break;
517         }
518     }
519 
520     // test preflighting and overflows with invariant conversion
521     if (test1.extract(0, 10, (char *)NULL, "") != 10) {
522         errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
523     }
524 
525     test4[2] = (char)0xff;
526     if (test1.extract(0, 10, test4, 2, "") != 10) {
527         errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
528     }
529     if (test4[2] != (char)0xff) {
530         errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
531     }
532 
533     {
534         // test new, NUL-terminating extract() function
535         UnicodeString s("terminate", "");
536         UChar dest[20]={
537             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
538             0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
539         };
540         UErrorCode errorCode;
541         int32_t length;
542 
543         errorCode=U_ZERO_ERROR;
544         length=s.extract((UChar *)NULL, 0, errorCode);
545         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
546             errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
547         }
548 
549         errorCode=U_ZERO_ERROR;
550         length=s.extract(dest, s.length()-1, errorCode);
551         if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
552             errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
553                 length, u_errorName(errorCode), s.length());
554         }
555 
556         errorCode=U_ZERO_ERROR;
557         length=s.extract(dest, s.length(), errorCode);
558         if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
559             errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
560                 length, u_errorName(errorCode), s.length());
561         }
562         if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
563             errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
564         }
565 
566         errorCode=U_ZERO_ERROR;
567         length=s.extract(dest, s.length()+1, errorCode);
568         if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
569             errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
570                 length, u_errorName(errorCode), s.length());
571         }
572         if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
573             errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
574         }
575     }
576 
577     {
578         // test new UConverter extract() and constructor
579         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
580         char buffer[32];
581         static const char expect[]={
582             (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
583             (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
584             (char)0xc3, (char)0x84,
585             (char)0xe1, (char)0xbb, (char)0x90
586         };
587         UErrorCode errorCode=U_ZERO_ERROR;
588         UConverter *cnv=ucnv_open("UTF-8", &errorCode);
589         int32_t length;
590 
591         if(U_SUCCESS(errorCode)) {
592             // test preflighting
593             if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
594                 errorCode!=U_BUFFER_OVERFLOW_ERROR
595             ) {
596                 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
597                       length, u_errorName(errorCode));
598             }
599             errorCode=U_ZERO_ERROR;
600             if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
601                 errorCode!=U_BUFFER_OVERFLOW_ERROR
602             ) {
603                 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
604                       length, u_errorName(errorCode));
605             }
606 
607             // try error cases
608             errorCode=U_ZERO_ERROR;
609             if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
610                 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
611             }
612             errorCode=U_ILLEGAL_ARGUMENT_ERROR;
613             if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614                 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
615             }
616             errorCode=U_ZERO_ERROR;
617 
618             // extract for real
619             if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
620                 uprv_memcmp(buffer, expect, 13)!=0 ||
621                 buffer[13]!=0 ||
622                 U_FAILURE(errorCode)
623             ) {
624                 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
625                       length, u_errorName(errorCode));
626             }
627             // Test again with just the converter name.
628             if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
629                 uprv_memcmp(buffer, expect, 13)!=0 ||
630                 buffer[13]!=0 ||
631                 U_FAILURE(errorCode)
632             ) {
633                 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
634                       length, u_errorName(errorCode));
635             }
636 
637             // try the constructor
638             UnicodeString t(expect, sizeof(expect), cnv, errorCode);
639             if(U_FAILURE(errorCode) || s!=t) {
640                 errln("UnicodeString(UConverter) conversion failed (%s)",
641                       u_errorName(errorCode));
642             }
643 
644             ucnv_close(cnv);
645         }
646     }
647 }
648 
649 void
TestRemoveReplace()650 UnicodeStringTest::TestRemoveReplace()
651 {
652     UnicodeString   test1("The rain in Spain stays mainly on the plain");
653     UnicodeString   test2("eat SPAMburgers!");
654     UChar         test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
655     char            test4[] = "SPAM";
656     UnicodeString&  test5 = test1;
657 
658     test1.replace(4, 4, test2, 4, 4);
659     test1.replace(12, 5, test3, 4);
660     test3[4] = 0;
661     test1.replace(17, 4, test3);
662     test1.replace(23, 4, test4);
663     test1.replaceBetween(37, 42, test2, 4, 8);
664 
665     if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
666         errln("One of the replace methods failed:\n"
667               "  expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
668               "  got \"" + test1 + "\"");
669 
670     test1.remove(21, 1);
671     test1.removeBetween(26, 28);
672 
673     if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
674         errln("One of the remove methods failed:\n"
675               "  expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
676               "  got \"" + test1 + "\"");
677 
678     for (int32_t i = 0; i < test1.length(); i++) {
679         if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
680             test1.setCharAt(i, 0x78);
681         }
682     }
683 
684     if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
685         errln("One of the remove methods failed:\n"
686               "  expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
687               "  got \"" + test1 + "\"");
688 
689     test1.remove();
690     if (test1.length() != 0)
691         errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
692 }
693 
694 void
TestSearching()695 UnicodeStringTest::TestSearching()
696 {
697     UnicodeString test1("test test ttest tetest testesteststt");
698     UnicodeString test2("test");
699     UChar testChar = 0x74;
700 
701     UChar32 testChar32 = 0x20402;
702     UChar testData[]={
703         //   0       1       2       3       4       5       6       7
704         0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
705 
706         //   8       9      10      11      12      13      14      15
707         0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
708 
709         //  16      17      18      19
710         0xdc02, 0xd841, 0x0073, 0x0000
711     };
712     UnicodeString test3(testData);
713     UnicodeString test4(testChar32);
714 
715     uint16_t occurrences = 0;
716     int32_t startPos = 0;
717     for ( ;
718           startPos != -1 && startPos < test1.length();
719           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
720         ;
721     if (occurrences != 6)
722         errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
723 
724     for ( occurrences = 0, startPos = 10;
725           startPos != -1 && startPos < test1.length();
726           (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
727         ;
728     if (occurrences != 4)
729         errln(UnicodeString("indexOf with starting offset failed: "
730                             "expected to find 4 occurrences, found ") + occurrences);
731 
732     int32_t endPos = 28;
733     for ( occurrences = 0, startPos = 5;
734           startPos != -1 && startPos < test1.length();
735           (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
736         ;
737     if (occurrences != 4)
738         errln(UnicodeString("indexOf with starting and ending offsets failed: "
739                             "expected to find 4 occurrences, found ") + occurrences);
740 
741     //using UChar32 string
742     for ( startPos=0, occurrences=0;
743           startPos != -1 && startPos < test3.length();
744           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
745         ;
746     if (occurrences != 4)
747         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
748 
749     for ( startPos=10, occurrences=0;
750           startPos != -1 && startPos < test3.length();
751           (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
752         ;
753     if (occurrences != 2)
754         errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
755     //---
756 
757     for ( occurrences = 0, startPos = 0;
758           startPos != -1 && startPos < test1.length();
759           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
760         ;
761     if (occurrences != 16)
762         errln(UnicodeString("indexOf with character failed: "
763                             "expected to find 16 occurrences, found ") + occurrences);
764 
765     for ( occurrences = 0, startPos = 10;
766           startPos != -1 && startPos < test1.length();
767           (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
768         ;
769     if (occurrences != 12)
770         errln(UnicodeString("indexOf with character & start offset failed: "
771                             "expected to find 12 occurrences, found ") + occurrences);
772 
773     for ( occurrences = 0, startPos = 5, endPos = 28;
774           startPos != -1 && startPos < test1.length();
775           (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
776         ;
777     if (occurrences != 10)
778         errln(UnicodeString("indexOf with character & start & end offsets failed: "
779                             "expected to find 10 occurrences, found ") + occurrences);
780 
781     //testing for UChar32
782     UnicodeString subString;
783     for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
784         subString.append(test3, startPos, test3.length());
785         if(subString.indexOf(testChar32) != -1 ){
786              ++occurrences;
787         }
788         subString.remove();
789     }
790     if (occurrences != 14)
791         errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
792 
793     for ( occurrences = 0, startPos = 0;
794           startPos != -1 && startPos < test3.length();
795           (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
796         ;
797     if (occurrences != 4)
798         errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
799 
800     endPos=test3.length();
801     for ( occurrences = 0, startPos = 5;
802           startPos != -1 && startPos < test3.length();
803           (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
804         ;
805     if (occurrences != 3)
806         errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
807     //---
808 
809     if(test1.lastIndexOf(test2)!=29) {
810         errln("test1.lastIndexOf(test2)!=29");
811     }
812 
813     if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
814         errln("test1.lastIndexOf(test2, start) failed");
815     }
816 
817     for ( occurrences = 0, startPos = 32;
818           startPos != -1;
819           (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
820         ;
821     if (occurrences != 4)
822         errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
823                             "expected to find 4 occurrences, found ") + occurrences);
824 
825     for ( occurrences = 0, startPos = 32;
826           startPos != -1;
827           (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
828         ;
829     if (occurrences != 11)
830         errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
831                             "expected to find 11 occurrences, found ") + occurrences);
832 
833     //testing UChar32
834     startPos=test3.length();
835     for ( occurrences = 0;
836           startPos != -1;
837           (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
838         ;
839     if (occurrences != 3)
840         errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
841 
842 
843     for ( occurrences = 0, endPos = test3.length();  endPos > 0; endPos -= 1){
844         subString.remove();
845         subString.append(test3, 0, endPos);
846         if(subString.lastIndexOf(testChar32) != -1 ){
847             ++occurrences;
848         }
849     }
850     if (occurrences != 18)
851         errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
852     //---
853 
854     // test that indexOf(UChar32) and lastIndexOf(UChar32)
855     // do not find surrogate code points when they are part of matched pairs
856     // (= part of supplementary code points)
857     // Jitterbug 1542
858     if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
859         errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
860     }
861     if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
862         UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
863         test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
864     ) {
865         errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
866     }
867 }
868 
869 void
TestSpacePadding()870 UnicodeStringTest::TestSpacePadding()
871 {
872     UnicodeString test1("hello");
873     UnicodeString test2("   there");
874     UnicodeString test3("Hi!  How ya doin'?  Beautiful day, isn't it?");
875     UnicodeString test4;
876     UBool returnVal;
877     UnicodeString expectedValue;
878 
879     returnVal = test1.padLeading(15);
880     expectedValue = "          hello";
881     if (returnVal == FALSE || test1 != expectedValue)
882         errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
883 
884     returnVal = test2.padTrailing(15);
885     expectedValue = "   there       ";
886     if (returnVal == FALSE || test2 != expectedValue)
887         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
888 
889     expectedValue = test3;
890     returnVal = test3.padTrailing(15);
891     if (returnVal == TRUE || test3 != expectedValue)
892         errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
893 
894     expectedValue = "hello";
895     test4.setTo(test1).trim();
896 
897     if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
898         errln("trim(UnicodeString&) failed");
899 
900     test1.trim();
901     if (test1 != expectedValue)
902         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
903 
904     test2.trim();
905     expectedValue = "there";
906     if (test2 != expectedValue)
907         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
908 
909     test3.trim();
910     expectedValue = "Hi!  How ya doin'?  Beautiful day, isn't it?";
911     if (test3 != expectedValue)
912         errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
913 
914     returnVal = test1.truncate(15);
915     expectedValue = "hello";
916     if (returnVal == TRUE || test1 != expectedValue)
917         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
918 
919     returnVal = test2.truncate(15);
920     expectedValue = "there";
921     if (returnVal == TRUE || test2 != expectedValue)
922         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
923 
924     returnVal = test3.truncate(15);
925     expectedValue = "Hi!  How ya doi";
926     if (returnVal == FALSE || test3 != expectedValue)
927         errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
928 }
929 
930 void
TestPrefixAndSuffix()931 UnicodeStringTest::TestPrefixAndSuffix()
932 {
933     UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
934     UnicodeString test2("Now");
935     UnicodeString test3("country.");
936     UnicodeString test4("count");
937 
938     if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
939         errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
940     }
941 
942     if (test1.startsWith(test3) ||
943         test1.startsWith(test3.getBuffer(), test3.length()) ||
944         test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
945     ) {
946         errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
947     }
948 
949     if (test1.endsWith(test2)) {
950         errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
951     }
952 
953     if (!test1.endsWith(test3)) {
954         errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
955     }
956     if (!test1.endsWith(test3, 0, INT32_MAX)) {
957         errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
958     }
959 
960     if(!test1.endsWith(test3.getBuffer(), test3.length())) {
961         errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962     }
963     if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
964         errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
965     }
966 
967     if (!test3.startsWith(test4)) {
968         errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
969     }
970 
971     if (test4.startsWith(test3)) {
972         errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
973     }
974 }
975 
976 void
TestStartsWithAndEndsWithNulTerminated()977 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
978     UnicodeString test("abcde");
979     const UChar ab[] = { 0x61, 0x62, 0 };
980     const UChar de[] = { 0x64, 0x65, 0 };
981     assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
982     assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
983     assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
984     assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
985 }
986 
987 void
TestFindAndReplace()988 UnicodeStringTest::TestFindAndReplace()
989 {
990     UnicodeString test1("One potato, two potato, three potato, four\n");
991     UnicodeString test2("potato");
992     UnicodeString test3("MISSISSIPPI");
993 
994     UnicodeString expectedValue;
995 
996     test1.findAndReplace(test2, test3);
997     expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
998     if (test1 != expectedValue)
999         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1000     test1.findAndReplace(2, 32, test3, test2);
1001     expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1002     if (test1 != expectedValue)
1003         errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004 }
1005 
1006 void
TestReverse()1007 UnicodeStringTest::TestReverse()
1008 {
1009     UnicodeString test("backwards words say to used I");
1010 
1011     test.reverse();
1012     test.reverse(2, 4);
1013     test.reverse(7, 2);
1014     test.reverse(10, 3);
1015     test.reverse(14, 5);
1016     test.reverse(20, 9);
1017 
1018     if (test != "I used to say words backwards")
1019         errln("reverse() failed:  Expected \"I used to say words backwards\",\n got \""
1020             + test + "\"");
1021 
1022     test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1023     test.reverse();
1024     if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1025         errln("reverse() failed with supplementary characters");
1026     }
1027 
1028     // Test case for ticket #8091:
1029     // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1030     // an odd-length string that contains no other lead surrogates.
1031     test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1032     UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1033     test.reverse();
1034     if(test!=expected) {
1035         errln("reverse() failed with only lead surrogate in the middle");
1036     }
1037 }
1038 
1039 void
TestMiscellaneous()1040 UnicodeStringTest::TestMiscellaneous()
1041 {
1042     UnicodeString   test1("This is a test");
1043     UnicodeString   test2("This is a test");
1044     UnicodeString   test3("Me too!");
1045 
1046     // test getBuffer(minCapacity) and releaseBuffer()
1047     test1=UnicodeString(); // make sure that it starts with its stackBuffer
1048     UChar *p=test1.getBuffer(20);
1049     if(test1.getCapacity()<20) {
1050         errln("UnicodeString::getBuffer(20).getCapacity()<20");
1051     }
1052 
1053     test1.append((UChar)7); // must not be able to modify the string here
1054     test1.setCharAt(3, 7);
1055     test1.reverse();
1056     if( test1.length()!=0 ||
1057         test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1058         test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1059     ) {
1060         errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1061     }
1062 
1063     p[0]=1;
1064     p[1]=2;
1065     p[2]=3;
1066     test1.releaseBuffer(3);
1067     test1.append((UChar)4);
1068 
1069     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1070         errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1071     }
1072 
1073     // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1074     test1.releaseBuffer(1);
1075     if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1076         errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1077     }
1078 
1079     // test getBuffer(const)
1080     const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1081     if( test1.length()!=4 ||
1082         q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1083         r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1084     ) {
1085         errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1086     }
1087 
1088     // test releaseBuffer() with a NUL-terminated buffer
1089     test1.getBuffer(20)[2]=0;
1090     test1.releaseBuffer(); // implicit -1
1091     if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1092         errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1093     }
1094 
1095     // test releaseBuffer() with a non-NUL-terminated buffer
1096     p=test1.getBuffer(256);
1097     for(int32_t i=0; i<test1.getCapacity(); ++i) {
1098         p[i]=(UChar)1;      // fill the buffer with all non-NUL code units
1099     }
1100     test1.releaseBuffer();  // implicit -1
1101     if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1102         errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1103     }
1104 
1105     // test getTerminatedBuffer()
1106     test1=UnicodeString("This is another test.", "");
1107     test2=UnicodeString("This is another test.", "");
1108     q=test1.getTerminatedBuffer();
1109     if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1110         errln("getTerminatedBuffer()[length]!=0");
1111     }
1112 
1113     const UChar u[]={ 5, 6, 7, 8, 0 };
1114     test1.setTo(FALSE, u, 3);
1115     q=test1.getTerminatedBuffer();
1116     if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1117         errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1118     }
1119 
1120     test1.setTo(TRUE, u, -1);
1121     q=test1.getTerminatedBuffer();
1122     if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1123         errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1124     }
1125 
1126     test1=UNICODE_STRING("la", 2);
1127     test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1128     if(test1!=UNICODE_STRING("la lila", 7)) {
1129         errln("UnicodeString::append(const UChar *, start, length) failed");
1130     }
1131 
1132     test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1133     if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1134         errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1135     }
1136 
1137     static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1138     test1.insert(9, ucs, -1);
1139     if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1140         errln("UnicodeString::insert(start, const UChar *, length) failed");
1141     }
1142 
1143     test1.replace(9, 2, (UChar)0x2b);
1144     if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1145         errln("UnicodeString::replace(start, length, UChar) failed");
1146     }
1147 
1148     if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1149         errln("UnicodeString::hasMetaData() returns TRUE");
1150     }
1151 
1152     // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1153     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1154     test1.truncate(36);  // ensure length()<getCapacity()
1155     test2=test1;  // share the buffer
1156     test1.truncate(5);
1157     if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1158         errln("UnicodeString(shared buffer).truncate() failed");
1159     }
1160     if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1161         errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1162               "modified another copy of the string!");
1163     }
1164     test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1165     test1.truncate(36);  // ensure length()<getCapacity()
1166     test2=test1;  // share the buffer
1167     test1.remove();
1168     if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1169         errln("UnicodeString(shared buffer).remove() failed");
1170     }
1171     if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1172         errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1173               "modified another copy of the string!");
1174     }
1175 
1176     // ticket #9740
1177     test1.setTo(TRUE, ucs, 3);
1178     assertEquals("length of read-only alias", 3, test1.length());
1179     test1.trim();
1180     assertEquals("length of read-only alias after trim()", 2, test1.length());
1181     assertEquals("length of terminated buffer of read-only alias + trim()",
1182                  2, u_strlen(test1.getTerminatedBuffer()));
1183 }
1184 
1185 void
TestStackAllocation()1186 UnicodeStringTest::TestStackAllocation()
1187 {
1188     UChar           testString[] ={
1189         0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1190     UChar           guardWord = 0x4DED;
1191     UnicodeString*  test = 0;
1192 
1193     test = new  UnicodeString(testString);
1194     if (*test != "This is a crazy test.")
1195         errln("Test string failed to initialize properly.");
1196     if (guardWord != 0x04DED)
1197         errln("Test string initialization overwrote guard word!");
1198 
1199     test->insert(8, "only ");
1200     test->remove(15, 6);
1201     if (*test != "This is only a test.")
1202         errln("Manipulation of test string failed to work right.");
1203     if (guardWord != 0x4DED)
1204         errln("Manipulation of test string overwrote guard word!");
1205 
1206     // we have to deinitialize and release the backing store by calling the destructor
1207     // explicitly, since we can't overload operator delete
1208     delete test;
1209 
1210     UChar workingBuffer[] = {
1211         0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1212         0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1213         0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1214         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1215         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1216     UChar guardWord2 = 0x4DED;
1217 
1218     test = new UnicodeString(workingBuffer, 35, 100);
1219     if (*test != "Now is the time for all men to come")
1220         errln("Stack-allocated backing store failed to initialize correctly.");
1221     if (guardWord2 != 0x4DED)
1222         errln("Stack-allocated backing store overwrote guard word!");
1223 
1224     test->insert(24, "good ");
1225     if (*test != "Now is the time for all good men to come")
1226         errln("insert() on stack-allocated UnicodeString didn't work right");
1227     if (guardWord2 != 0x4DED)
1228         errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1229 
1230     if (workingBuffer[24] != 0x67)
1231         errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1232 
1233     *test += " to the aid of their country.";
1234     if (*test != "Now is the time for all good men to come to the aid of their country.")
1235         errln("Stack-allocated UnicodeString overflow didn't work");
1236     if (guardWord2 != 0x4DED)
1237         errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1238 
1239     *test = "ha!";
1240     if (*test != "ha!")
1241         errln("Assignment to stack-allocated UnicodeString didn't work");
1242     if (workingBuffer[0] != 0x4e)
1243         errln("Change to UnicodeString after overflow are still affecting original buffer");
1244     if (guardWord2 != 0x4DED)
1245         errln("Change to UnicodeString after overflow overwrote guard word!");
1246 
1247     // test read-only aliasing with setTo()
1248     workingBuffer[0] = 0x20ac;
1249     workingBuffer[1] = 0x125;
1250     workingBuffer[2] = 0;
1251     test->setTo(TRUE, workingBuffer, 2);
1252     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1253         errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1254     }
1255 
1256     UnicodeString *c=(UnicodeString *)test->clone();
1257 
1258     workingBuffer[1] = 0x109;
1259     if(test->charAt(1) != 0x109) {
1260         errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1261     }
1262 
1263     if(c->length() != 2 || c->charAt(1) != 0x125) {
1264         errln("clone(alias) did not copy the buffer");
1265     }
1266     delete c;
1267 
1268     test->setTo(TRUE, workingBuffer, -1);
1269     if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1270         errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1271     }
1272 
1273     test->setTo(FALSE, workingBuffer, -1);
1274     if(!test->isBogus()) {
1275         errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1276     }
1277 
1278     delete test;
1279 
1280     test=new UnicodeString();
1281     UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1282     test->setTo(buffer, 4, 10);
1283     if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1284         test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1285         errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1286     }
1287     delete test;
1288 
1289 
1290     // test the UChar32 constructor
1291     UnicodeString c32Test((UChar32)0x10ff2a);
1292     if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1293         c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1294     ) {
1295         errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1296     }
1297 
1298     // test the (new) capacity constructor
1299     UnicodeString capTest(5, (UChar32)0x2a, 5);
1300     if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1301         capTest.char32At(0) != 0x2a ||
1302         capTest.char32At(4) != 0x2a
1303     ) {
1304         errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1305     }
1306 
1307     capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1308     if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1309         capTest.char32At(0) != 0x10ff2a ||
1310         capTest.char32At(4) != 0x10ff2a
1311     ) {
1312         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1313     }
1314 
1315     capTest = UnicodeString(5, (UChar32)0, 0);
1316     if(capTest.length() != 0) {
1317         errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1318     }
1319 }
1320 
1321 /**
1322  * Test the unescape() function.
1323  */
TestUnescape(void)1324 void UnicodeStringTest::TestUnescape(void) {
1325     UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1326     UnicodeString OUT("abc");
1327     OUT.append((UChar)0x4567);
1328     OUT.append(" ");
1329     OUT.append((UChar)0xA);
1330     OUT.append((UChar)0xD);
1331     OUT.append(" ");
1332     OUT.append((UChar32)0x00101234);
1333     OUT.append("xyz");
1334     OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1335     UnicodeString result = IN.unescape();
1336     if (result != OUT) {
1337         errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1338               prettify(result) + ", expected " +
1339               prettify(OUT));
1340     }
1341 
1342     // test that an empty string is returned in case of an error
1343     if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1344         errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1345     }
1346 }
1347 
1348 /* test code point counting functions --------------------------------------- */
1349 
1350 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1351 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1352 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1353     int32_t count=s.countChar32(start, length);
1354     return count>number;
1355 }
1356 
1357 /* compare the real function against the reference */
1358 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1359 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1360     if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1361         errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1362                 start, length, number, s.hasMoreChar32Than(start, length, number));
1363     }
1364 }
1365 
1366 void
TestCountChar32(void)1367 UnicodeStringTest::TestCountChar32(void) {
1368     {
1369         UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1370 
1371         // test countChar32()
1372         // note that this also calls and tests u_countChar32(length>=0)
1373         if(
1374             s.countChar32()!=4 ||
1375             s.countChar32(1)!=4 ||
1376             s.countChar32(2)!=3 ||
1377             s.countChar32(2, 3)!=2 ||
1378             s.countChar32(2, 0)!=0
1379         ) {
1380             errln("UnicodeString::countChar32() failed");
1381         }
1382 
1383         // NUL-terminate the string buffer and test u_countChar32(length=-1)
1384         const UChar *buffer=s.getTerminatedBuffer();
1385         if(
1386             u_countChar32(buffer, -1)!=4 ||
1387             u_countChar32(buffer+1, -1)!=4 ||
1388             u_countChar32(buffer+2, -1)!=3 ||
1389             u_countChar32(buffer+3, -1)!=3 ||
1390             u_countChar32(buffer+4, -1)!=2 ||
1391             u_countChar32(buffer+5, -1)!=1 ||
1392             u_countChar32(buffer+6, -1)!=0
1393         ) {
1394             errln("u_countChar32(length=-1) failed");
1395         }
1396 
1397         // test u_countChar32() with bad input
1398         if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1399             errln("u_countChar32(bad input) failed (returned non-zero counts)");
1400         }
1401     }
1402 
1403     /* test data and variables for hasMoreChar32Than() */
1404     static const UChar str[]={
1405         0x61, 0x62, 0xd800, 0xdc00,
1406         0xd801, 0xdc01, 0x63, 0xd802,
1407         0x64, 0xdc03, 0x65, 0x66,
1408         0xd804, 0xdc04, 0xd805, 0xdc05,
1409         0x67
1410     };
1411     UnicodeString string(str, UPRV_LENGTHOF(str));
1412     int32_t start, length, number;
1413 
1414     /* test hasMoreChar32Than() */
1415     for(length=string.length(); length>=0; --length) {
1416         for(start=0; start<=length; ++start) {
1417             for(number=-1; number<=((length-start)+2); ++number) {
1418                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1419             }
1420         }
1421     }
1422 
1423     /* test hasMoreChar32Than() with pinning */
1424     for(start=-1; start<=string.length()+1; ++start) {
1425         for(number=-1; number<=((string.length()-start)+2); ++number) {
1426             _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1427         }
1428     }
1429 
1430     /* test hasMoreChar32Than() with a bogus string */
1431     string.setToBogus();
1432     for(length=-1; length<=1; ++length) {
1433         for(start=-1; start<=length; ++start) {
1434             for(number=-1; number<=((length-start)+2); ++number) {
1435                 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1436             }
1437         }
1438     }
1439 }
1440 
1441 void
TestBogus()1442 UnicodeStringTest::TestBogus() {
1443     UnicodeString   test1("This is a test");
1444     UnicodeString   test2("This is a test");
1445     UnicodeString   test3("Me too!");
1446 
1447     // test isBogus() and setToBogus()
1448     if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1449         errln("A string returned TRUE for isBogus()!");
1450     }
1451 
1452     // NULL pointers are treated like empty strings
1453     // use other illegal arguments to make a bogus string
1454     test3.setTo(FALSE, test1.getBuffer(), -2);
1455     if(!test3.isBogus()) {
1456         errln("A bogus string returned FALSE for isBogus()!");
1457     }
1458     if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1459         errln("hashCode() failed");
1460     }
1461     if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1462         errln("bogus.getBuffer()!=0");
1463     }
1464     if (test1.indexOf(test3) != -1) {
1465         errln("bogus.indexOf() != -1");
1466     }
1467     if (test1.lastIndexOf(test3) != -1) {
1468         errln("bogus.lastIndexOf() != -1");
1469     }
1470     if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1471         errln("caseCompare() doesn't work with bogus strings");
1472     }
1473     if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1474         errln("compareCodePointOrder() doesn't work with bogus strings");
1475     }
1476 
1477     // verify that non-assignment modifications fail and do not revive a bogus string
1478     test3.setToBogus();
1479     test3.append((UChar)0x61);
1480     if(!test3.isBogus() || test3.getBuffer()!=0) {
1481         errln("bogus.append('a') worked but must not");
1482     }
1483 
1484     test3.setToBogus();
1485     test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1486     if(!test3.isBogus() || test3.getBuffer()!=0) {
1487         errln("bogus.findAndReplace() worked but must not");
1488     }
1489 
1490     test3.setToBogus();
1491     test3.trim();
1492     if(!test3.isBogus() || test3.getBuffer()!=0) {
1493         errln("bogus.trim() revived bogus but must not");
1494     }
1495 
1496     test3.setToBogus();
1497     test3.remove(1);
1498     if(!test3.isBogus() || test3.getBuffer()!=0) {
1499         errln("bogus.remove(1) revived bogus but must not");
1500     }
1501 
1502     test3.setToBogus();
1503     if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1504         errln("bogus.setCharAt(0, 'b') worked but must not");
1505     }
1506 
1507     test3.setToBogus();
1508     if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1509         errln("bogus.truncate(1) revived bogus but must not");
1510     }
1511 
1512     // verify that assignments revive a bogus string
1513     test3.setToBogus();
1514     if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1515         errln("bogus.operator=() failed");
1516     }
1517 
1518     test3.setToBogus();
1519     if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1520         errln("bogus.fastCopyFrom() failed");
1521     }
1522 
1523     test3.setToBogus();
1524     if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1525         errln("bogus.setTo(UniStr) failed");
1526     }
1527 
1528     test3.setToBogus();
1529     if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1530         errln("bogus.setTo(UniStr, 0) failed");
1531     }
1532 
1533     test3.setToBogus();
1534     if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1535         errln("bogus.setTo(UniStr, 0, len) failed");
1536     }
1537 
1538     test3.setToBogus();
1539     if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1540         errln("bogus.setTo(const UChar *, len) failed");
1541     }
1542 
1543     test3.setToBogus();
1544     if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1545         errln("bogus.setTo(UChar) failed");
1546     }
1547 
1548     test3.setToBogus();
1549     if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1550         errln("bogus.setTo(UChar32) failed");
1551     }
1552 
1553     test3.setToBogus();
1554     if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1555         errln("bogus.setTo(readonly alias) failed");
1556     }
1557 
1558     // writable alias to another string's buffer: very bad idea, just convenient for this test
1559     test3.setToBogus();
1560     if(!test3.isBogus() ||
1561             test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1562                         test1.length(), test1.getCapacity()).isBogus() ||
1563             test3!=test1) {
1564         errln("bogus.setTo(writable alias) failed");
1565     }
1566 
1567     // verify simple, documented ways to turn a bogus string into an empty one
1568     test3.setToBogus();
1569     if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1570         errln("bogus.operator=(UnicodeString()) failed");
1571     }
1572 
1573     test3.setToBogus();
1574     if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1575         errln("bogus.setTo(UnicodeString()) failed");
1576     }
1577 
1578     test3.setToBogus();
1579     if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1580         errln("bogus.remove() failed");
1581     }
1582 
1583     test3.setToBogus();
1584     if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1585         errln("bogus.remove(0, INT32_MAX) failed");
1586     }
1587 
1588     test3.setToBogus();
1589     if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1590         errln("bogus.truncate(0) failed");
1591     }
1592 
1593     test3.setToBogus();
1594     if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1595         errln("bogus.setTo((UChar32)-1) failed");
1596     }
1597 
1598     static const UChar nul=0;
1599 
1600     test3.setToBogus();
1601     if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1602         errln("bogus.setTo(&nul, 0) failed");
1603     }
1604 
1605     test3.setToBogus();
1606     if(!test3.isBogus() || test3.getBuffer()!=0) {
1607         errln("setToBogus() failed to make a string bogus");
1608     }
1609 
1610     test3.setToBogus();
1611     if(test1.isBogus() || !(test1=test3).isBogus()) {
1612         errln("normal=bogus failed to make the left string bogus");
1613     }
1614 
1615     // test that NULL primitive input string values are treated like
1616     // empty strings, not errors (bogus)
1617     test2.setTo((UChar32)0x10005);
1618     if(test2.insert(1, nullptr, 1).length()!=2) {
1619         errln("UniStr.insert(...nullptr...) should not modify the string but does");
1620     }
1621 
1622     UErrorCode errorCode=U_ZERO_ERROR;
1623     UnicodeString
1624         test4((const UChar *)NULL),
1625         test5(TRUE, (const UChar *)NULL, 1),
1626         test6((UChar *)NULL, 5, 5),
1627         test7((const char *)NULL, 3, NULL, errorCode);
1628     if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1629         errln("a constructor set to bogus for a NULL input string, should be empty");
1630     }
1631 
1632     test4.setTo(NULL, 3);
1633     test5.setTo(TRUE, (const UChar *)NULL, 1);
1634     test6.setTo((UChar *)NULL, 5, 5);
1635     if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1636         errln("a setTo() set to bogus for a NULL input string, should be empty");
1637     }
1638 
1639     // test that bogus==bogus<any
1640     if(test1!=test3 || test1.compare(test3)!=0) {
1641         errln("bogus==bogus failed");
1642     }
1643 
1644     test2.remove();
1645     if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1646         errln("bogus<empty failed");
1647     }
1648 }
1649 
1650 // StringEnumeration ------------------------------------------------------- ***
1651 // most of StringEnumeration is tested elsewhere
1652 // this test improves code coverage
1653 
1654 static const char *const
1655 testEnumStrings[]={
1656     "a",
1657     "b",
1658     "c",
1659     "this is a long string which helps us test some buffer limits",
1660     "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1661 };
1662 
1663 class TestEnumeration : public StringEnumeration {
1664 public:
TestEnumeration()1665     TestEnumeration() : i(0) {}
1666 
count(UErrorCode &) const1667     virtual int32_t count(UErrorCode& /*status*/) const {
1668         return UPRV_LENGTHOF(testEnumStrings);
1669     }
1670 
snext(UErrorCode & status)1671     virtual const UnicodeString *snext(UErrorCode &status) {
1672         if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1673             unistr=UnicodeString(testEnumStrings[i++], "");
1674             return &unistr;
1675         }
1676 
1677         return NULL;
1678     }
1679 
reset(UErrorCode &)1680     virtual void reset(UErrorCode& /*status*/) {
1681         i=0;
1682     }
1683 
getStaticClassID()1684     static inline UClassID getStaticClassID() {
1685         return (UClassID)&fgClassID;
1686     }
getDynamicClassID() const1687     virtual UClassID getDynamicClassID() const {
1688         return getStaticClassID();
1689     }
1690 
1691 private:
1692     static const char fgClassID;
1693 
1694     int32_t i;
1695 };
1696 
1697 const char TestEnumeration::fgClassID=0;
1698 
1699 void
TestStringEnumeration()1700 UnicodeStringTest::TestStringEnumeration() {
1701     UnicodeString s;
1702     TestEnumeration ten;
1703     int32_t i, length;
1704     UErrorCode status;
1705 
1706     const UChar *pu;
1707     const char *pc;
1708 
1709     // test the next() default implementation and ensureCharsCapacity()
1710     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1711         status=U_ZERO_ERROR;
1712         pc=ten.next(&length, status);
1713         s=UnicodeString(testEnumStrings[i], "");
1714         if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1715             errln("StringEnumeration.next(%d) failed", i);
1716         }
1717     }
1718     status=U_ZERO_ERROR;
1719     if(ten.next(&length, status)!=NULL) {
1720         errln("StringEnumeration.next(done)!=NULL");
1721     }
1722 
1723     // test the unext() default implementation
1724     ten.reset(status);
1725     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1726         status=U_ZERO_ERROR;
1727         pu=ten.unext(&length, status);
1728         s=UnicodeString(testEnumStrings[i], "");
1729         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1730             errln("StringEnumeration.unext(%d) failed", i);
1731         }
1732     }
1733     status=U_ZERO_ERROR;
1734     if(ten.unext(&length, status)!=NULL) {
1735         errln("StringEnumeration.unext(done)!=NULL");
1736     }
1737 
1738     // test that the default clone() implementation works, and returns NULL
1739     if(ten.clone()!=NULL) {
1740         errln("StringEnumeration.clone()!=NULL");
1741     }
1742 
1743     // test that uenum_openFromStringEnumeration() works
1744     // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1745     StringEnumeration *newTen = new TestEnumeration;
1746     status=U_ZERO_ERROR;
1747     UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1748     if (uten==NULL || U_FAILURE(status)) {
1749         errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1750         return;
1751     }
1752 
1753     // test  uenum_next()
1754     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1755         status=U_ZERO_ERROR;
1756         pc=uenum_next(uten, &length, &status);
1757         if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1758             errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1759         }
1760     }
1761     status=U_ZERO_ERROR;
1762     if(uenum_next(uten, &length, &status)!=NULL) {
1763         errln("File %s, line %d, uenum_next(done)!=NULL");
1764     }
1765 
1766     // test the uenum_unext()
1767     uenum_reset(uten, &status);
1768     for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1769         status=U_ZERO_ERROR;
1770         pu=uenum_unext(uten, &length, &status);
1771         s=UnicodeString(testEnumStrings[i], "");
1772         if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1773             errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1774         }
1775     }
1776     status=U_ZERO_ERROR;
1777     if(uenum_unext(uten, &length, &status)!=NULL) {
1778         errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1779     }
1780 
1781     uenum_close(uten);
1782 }
1783 
1784 /*
1785  * Namespace test, to make sure that macros like UNICODE_STRING include the
1786  * namespace qualifier.
1787  *
1788  * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1789  */
1790 namespace bogus {
1791     class UnicodeString {
1792     public:
1793         enum EInvariant { kInvariant };
UnicodeString()1794         UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1795         UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1796         UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1797 ) : i(length) {}
1798     private:
1799         int32_t i;
1800     };
1801 }
1802 
1803 void
TestNameSpace()1804 UnicodeStringTest::TestNameSpace() {
1805     // Provoke name collision unless the UnicodeString macros properly
1806     // qualify the icu::UnicodeString class.
1807     using namespace bogus;
1808 
1809     // Use all UnicodeString macros from unistr.h.
1810     icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1811     icu::UnicodeString s2=UNICODE_STRING("def", 3);
1812     icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1813 
1814     // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1815     icu::UnicodeString s4=s1+s2+s3;
1816     if(s4.length()!=9) {
1817         errln("Something wrong with UnicodeString::operator+().");
1818     }
1819 }
1820 
1821 void
TestUTF32()1822 UnicodeStringTest::TestUTF32() {
1823     // Input string length US_STACKBUF_SIZE to cause overflow of the
1824     // initially chosen fStackBuffer due to supplementary characters.
1825     static const UChar32 utf32[] = {
1826         0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1827         0x10000, 0x20000, 0xe0000, 0x10ffff
1828     };
1829     static const UChar expected_utf16[] = {
1830         0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1831         0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1832     };
1833     UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1834     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1835     if(from32 != expected) {
1836         errln("UnicodeString::fromUTF32() did not create the expected string.");
1837     }
1838 
1839     static const UChar utf16[] = {
1840         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1841     };
1842     static const UChar32 expected_utf32[] = {
1843         0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1844     };
1845     UChar32 result32[16];
1846     UErrorCode errorCode = U_ZERO_ERROR;
1847     int32_t length32 =
1848         UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1849         toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1850     if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1851         0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1852         result32[length32] != 0
1853     ) {
1854         errln("UnicodeString::toUTF32() did not create the expected string.");
1855     }
1856 }
1857 
1858 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1859 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1860     TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1861             : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1862     virtual void Flush() { calledFlush = TRUE; }
1863     UBool calledFlush;
1864 };
1865 
1866 void
TestUTF8()1867 UnicodeStringTest::TestUTF8() {
1868     static const uint8_t utf8[] = {
1869         // Code points:
1870         // 0x41, 0xd900,
1871         // 0x61, 0xdc00,
1872         // 0x110000, 0x5a,
1873         // 0x50000, 0x7a,
1874         // 0x10000, 0x20000,
1875         // 0xe0000, 0x10ffff
1876         0x41, 0xed, 0xa4, 0x80,
1877         0x61, 0xed, 0xb0, 0x80,
1878         0xf4, 0x90, 0x80, 0x80, 0x5a,
1879         0xf1, 0x90, 0x80, 0x80, 0x7a,
1880         0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1881         0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1882     };
1883     static const UChar expected_utf16[] = {
1884         0x41, 0xfffd, 0xfffd, 0xfffd,
1885         0x61, 0xfffd, 0xfffd, 0xfffd,
1886         0xfffd,  0xfffd, 0xfffd, 0xfffd,0x5a,
1887         0xd900, 0xdc00, 0x7a,
1888         0xd800, 0xdc00, 0xd840, 0xdc00,
1889         0xdb40, 0xdc00, 0xdbff, 0xdfff
1890     };
1891     UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1892     UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1893 
1894     if(from8 != expected) {
1895         errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1896     }
1897     std::string utf8_string((const char *)utf8, sizeof(utf8));
1898     UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1899     if(from8b != expected) {
1900         errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1901     }
1902 
1903     static const UChar utf16[] = {
1904         0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1905     };
1906     static const uint8_t expected_utf8[] = {
1907         0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1908         0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1909     };
1910     UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1911 
1912     char buffer[64];
1913     TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1914     us.toUTF8(sink);
1915     if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1916         0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1917     ) {
1918         errln("UnicodeString::toUTF8() did not create the expected string.");
1919     }
1920     if(!sink.calledFlush) {
1921         errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1922     }
1923     // Initial contents for testing that toUTF8String() appends.
1924     std::string result8 = "-->";
1925     std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1926     // Use the return value just for testing.
1927     std::string &result8r = us.toUTF8String(result8);
1928     if(result8r != expected8 || &result8r != &result8) {
1929         errln("UnicodeString::toUTF8String() did not create the expected string.");
1930     }
1931 }
1932 
1933 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1934 static UnicodeString wrapUChars(const UChar *uchars) {
1935     return UnicodeString(TRUE, uchars, -1);
1936 }
1937 
1938 void
TestReadOnlyAlias()1939 UnicodeStringTest::TestReadOnlyAlias() {
1940     UChar uchars[]={ 0x61, 0x62, 0 };
1941     UnicodeString alias(TRUE, uchars, 2);
1942     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1943         errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1944         return;
1945     }
1946     alias.truncate(1);
1947     if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1948         errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1949     }
1950     if(alias.getTerminatedBuffer()==uchars) {
1951         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1952               "did not allocate and copy as expected.");
1953     }
1954     if(uchars[1]!=0x62) {
1955         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1956               "modified the original buffer.");
1957     }
1958     if(1!=u_strlen(alias.getTerminatedBuffer())) {
1959         errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1960               "does not return a buffer terminated at the proper length.");
1961     }
1962 
1963     alias.setTo(TRUE, uchars, 2);
1964     if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1965         errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1966         return;
1967     }
1968     alias.remove();
1969     if(alias.length()!=0) {
1970         errln("UnicodeString(read-only-alias).remove() did not work.");
1971     }
1972     if(alias.getTerminatedBuffer()==uchars) {
1973         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1974               "did not un-alias as expected.");
1975     }
1976     if(uchars[0]!=0x61) {
1977         errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1978               "modified the original buffer.");
1979     }
1980     if(0!=u_strlen(alias.getTerminatedBuffer())) {
1981         errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1982               "does not return a buffer terminated at length 0.");
1983     }
1984 
1985     UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1986     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1987     alias.remove(0, 10);
1988     if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1989         errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1990     }
1991     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1992     alias.remove(27, 99);
1993     if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1994         errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1995     }
1996     alias.setTo(FALSE, longString.getBuffer(), longString.length());
1997     alias.retainBetween(6, 30);
1998     if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1999         errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2000     }
2001 
2002     UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2003     UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2004 
2005     UnicodeString temp;
2006     temp.fastCopyFrom(longString.tempSubString());
2007     if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2008         errln("UnicodeString.tempSubString() failed");
2009     }
2010     temp.fastCopyFrom(longString.tempSubString(-3, 5));
2011     if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2012         errln("UnicodeString.tempSubString(-3, 5) failed");
2013     }
2014     temp.fastCopyFrom(longString.tempSubString(17));
2015     if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2016         errln("UnicodeString.tempSubString(17) failed");
2017     }
2018     temp.fastCopyFrom(longString.tempSubString(99));
2019     if(!temp.isEmpty()) {
2020         errln("UnicodeString.tempSubString(99) failed");
2021     }
2022     temp.fastCopyFrom(longString.tempSubStringBetween(6));
2023     if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2024         errln("UnicodeString.tempSubStringBetween(6) failed");
2025     }
2026     temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2027     if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2028         errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2029     }
2030     UnicodeString bogusString;
2031     bogusString.setToBogus();
2032     temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2033     if(!temp.isBogus()) {
2034         errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2035     }
2036 }
2037 
2038 void
doTestAppendable(UnicodeString & dest,Appendable & app)2039 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2040     static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2041     static const UChar fg[3]={ 0x66, 0x67, 0 };
2042     if(!app.reserveAppendCapacity(12)) {
2043         errln("Appendable.reserve(12) failed");
2044     }
2045     app.appendCodeUnit(0x61);
2046     app.appendCodePoint(0x62);
2047     app.appendCodePoint(0x50000);
2048     app.appendString(cde, 3);
2049     app.appendString(fg, -1);
2050     UChar scratch[3];
2051     int32_t capacity=-1;
2052     UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2053     if(capacity<3) {
2054         errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2055         return;
2056     }
2057     static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2058     u_memcpy(buffer, hij, 3);
2059     app.appendString(buffer, 3);
2060     if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2061         errln("Appendable.append(...) failed");
2062     }
2063     buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2064     if(buffer!=NULL || capacity!=0) {
2065         errln("Appendable.getAppendBuffer(min=0) failed");
2066     }
2067     capacity=1;
2068     buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2069     if(buffer!=NULL || capacity!=0) {
2070         errln("Appendable.getAppendBuffer(scratch<min) failed");
2071     }
2072 }
2073 
2074 class SimpleAppendable : public Appendable {
2075 public:
SimpleAppendable(UnicodeString & dest)2076     explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2077     virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2078     SimpleAppendable &reset() { str.remove(); return *this; }
2079 private:
2080     UnicodeString &str;
2081 };
2082 
2083 void
TestAppendable()2084 UnicodeStringTest::TestAppendable() {
2085     UnicodeString dest;
2086     SimpleAppendable app(dest);
2087     doTestAppendable(dest, app);
2088 }
2089 
2090 void
TestUnicodeStringImplementsAppendable()2091 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2092     UnicodeString dest;
2093     UnicodeStringAppendable app(dest);
2094     doTestAppendable(dest, app);
2095 }
2096 
2097 void
TestSizeofUnicodeString()2098 UnicodeStringTest::TestSizeofUnicodeString() {
2099     // See the comments in unistr.h near the declaration of UnicodeString's fields.
2100     // See the API comments for UNISTR_OBJECT_SIZE.
2101     size_t sizeofUniStr=sizeof(UnicodeString);
2102     size_t expected=UNISTR_OBJECT_SIZE;
2103     if(expected!=sizeofUniStr) {
2104         // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2105         // of the compiler might add more internal padding than expected.
2106         errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2107               (int)sizeofUniStr, (int)expected);
2108     }
2109     if(sizeofUniStr<32) {
2110         errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2111     }
2112     // We assume that the entire UnicodeString object,
2113     // minus the vtable pointer and 2 bytes for flags and short length,
2114     // is available for internal storage of UChars.
2115     int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2116     UnicodeString s;
2117     const UChar *emptyBuffer=s.getBuffer();
2118     for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2119         s.append((UChar)0x2e);
2120     }
2121     const UChar *fullBuffer=s.getBuffer();
2122     if(fullBuffer!=emptyBuffer) {
2123         errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2124               expectedStackBufferLength);
2125     }
2126     const UChar *terminatedBuffer=s.getTerminatedBuffer();
2127     if(terminatedBuffer==emptyBuffer) {
2128         errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2129               expectedStackBufferLength);
2130     }
2131 }
2132 
2133 void
TestMoveSwap()2134 UnicodeStringTest::TestMoveSwap() {
2135     static const UChar abc[3] = { 0x61, 0x62, 0x63 };  // "abc"
2136     UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc));  // read-only alias
2137     UnicodeString s2(100, 0x7a, 100);  // 100 * 'z' should be on the heap
2138     UnicodeString s3("defg", 4, US_INV);  // in stack buffer
2139     const UChar *p = s2.getBuffer();
2140     s1.swap(s2);
2141     if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2142         errln("UnicodeString.swap() did not swap");
2143     }
2144     swap(s2, s3);
2145     if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2146         errln("swap(UnicodeString) did not swap back");
2147     }
2148     UnicodeString s4;
2149     s4.moveFrom(s1);
2150     if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2151         errln("UnicodeString.moveFrom(heap) did not move");
2152     }
2153     UnicodeString s5;
2154     s5.moveFrom(s2);
2155     if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2156         errln("UnicodeString.moveFrom(stack) did not move");
2157     }
2158     UnicodeString s6;
2159     s6.moveFrom(s3);
2160     if(s6.getBuffer() != abc || s6.length() != 3) {
2161         errln("UnicodeString.moveFrom(alias) did not move");
2162     }
2163     infoln("TestMoveSwap() with rvalue references");
2164     s1 = static_cast<UnicodeString &&>(s6);
2165     if(s1.getBuffer() != abc || s1.length() != 3) {
2166         errln("UnicodeString move assignment operator did not move");
2167     }
2168     UnicodeString s7(static_cast<UnicodeString &&>(s4));
2169     if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2170         errln("UnicodeString move constructor did not move");
2171     }
2172 
2173     // Move self assignment leaves the object valid but in an undefined state.
2174     // Do it to make sure there is no crash,
2175     // but do not check for any particular resulting value.
2176     s1.moveFrom(s1);
2177     s2.moveFrom(s2);
2178     s3.moveFrom(s3);
2179     s4.moveFrom(s4);
2180     s5.moveFrom(s5);
2181     s6.moveFrom(s6);
2182     s7.moveFrom(s7);
2183     // Simple copy assignment must work.
2184     UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2185     s1 = s6 = s4 = s7 = simple;
2186     if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2187         errln("UnicodeString copy after self-move did not work");
2188     }
2189 }
2190 
2191 void
TestUInt16Pointers()2192 UnicodeStringTest::TestUInt16Pointers() {
2193     static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2194     uint16_t arr[4];
2195 
2196     UnicodeString expected(u"abc");
2197     assertEquals("abc from pointer", expected, UnicodeString(carr));
2198     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2199     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2200 
2201     UnicodeString alias(arr, 0, 4);
2202     alias.append(u'a').append(u'b').append(u'c');
2203     assertEquals("abc from writable alias", expected, alias);
2204     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2205 
2206     UErrorCode errorCode = U_ZERO_ERROR;
2207     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2208     TEST_ASSERT_STATUS(errorCode);
2209     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2210 }
2211 
2212 void
TestWCharPointers()2213 UnicodeStringTest::TestWCharPointers() {
2214 #if U_SIZEOF_WCHAR_T==2
2215     static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2216     wchar_t arr[4];
2217 
2218     UnicodeString expected(u"abc");
2219     assertEquals("abc from pointer", expected, UnicodeString(carr));
2220     assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2221     assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2222 
2223     UnicodeString alias(arr, 0, 4);
2224     alias.append(u'a').append(u'b').append(u'c');
2225     assertEquals("abc from writable alias", expected, alias);
2226     assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2227 
2228     UErrorCode errorCode = U_ZERO_ERROR;
2229     int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2230     TEST_ASSERT_STATUS(errorCode);
2231     assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2232 #endif
2233 }
2234 
2235 void
TestNullPointers()2236 UnicodeStringTest::TestNullPointers() {
2237     assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2238     assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2239     assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2240 
2241     UnicodeString alias(nullptr, 4, 4);  // empty, no alias
2242     assertTrue("empty from writable alias", alias.isEmpty());
2243     alias.append(u'a').append(u'b').append(u'c');
2244     UnicodeString expected(u"abc");
2245     assertEquals("abc from writable alias", expected, alias);
2246 
2247     UErrorCode errorCode = U_ZERO_ERROR;
2248     UnicodeString(u"def").extract(nullptr, 0, errorCode);
2249     assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2250 }
2251