1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include <utility>
10
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24
25 #if 0
26 #include "unicode/ustream.h"
27
28 #include <iostream>
29 using namespace std;
30
31 #endif
32
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34
35 extern IntlTest *createStringCaseTest();
36
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39 if (exec) logln("TestSuite UnicodeStringTest: ");
40 TESTCASE_AUTO_BEGIN;
41 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42 TESTCASE_AUTO(TestBasicManipulation);
43 TESTCASE_AUTO(TestCompare);
44 TESTCASE_AUTO(TestExtract);
45 TESTCASE_AUTO(TestRemoveReplace);
46 TESTCASE_AUTO(TestSearching);
47 TESTCASE_AUTO(TestSpacePadding);
48 TESTCASE_AUTO(TestPrefixAndSuffix);
49 TESTCASE_AUTO(TestFindAndReplace);
50 TESTCASE_AUTO(TestBogus);
51 TESTCASE_AUTO(TestReverse);
52 TESTCASE_AUTO(TestMiscellaneous);
53 TESTCASE_AUTO(TestStackAllocation);
54 TESTCASE_AUTO(TestUnescape);
55 TESTCASE_AUTO(TestCountChar32);
56 TESTCASE_AUTO(TestStringEnumeration);
57 TESTCASE_AUTO(TestNameSpace);
58 TESTCASE_AUTO(TestUTF32);
59 TESTCASE_AUTO(TestUTF8);
60 TESTCASE_AUTO(TestReadOnlyAlias);
61 TESTCASE_AUTO(TestAppendable);
62 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63 TESTCASE_AUTO(TestSizeofUnicodeString);
64 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65 TESTCASE_AUTO(TestMoveSwap);
66 TESTCASE_AUTO(TestUInt16Pointers);
67 TESTCASE_AUTO(TestWCharPointers);
68 TESTCASE_AUTO(TestNullPointers);
69 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70 TESTCASE_AUTO(TestLargeAppend);
71 TESTCASE_AUTO_END;
72 }
73
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78 UnicodeString expectedValue;
79 UnicodeString *c;
80
81 c=test1.clone();
82 test1.insert(24, "good ");
83 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84 if (test1 != expectedValue)
85 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86
87 c->insert(24, "good ");
88 if(*c != expectedValue) {
89 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90 }
91 delete c;
92
93 test1.remove(41, 8);
94 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95 if (test1 != expectedValue)
96 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97
98 test1.replace(58, 6, "ir country");
99 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100 if (test1 != expectedValue)
101 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102
103 UChar temp[80];
104 test1.extract(0, 15, temp);
105
106 UnicodeString test2(temp, 15);
107
108 expectedValue = "Now is the time";
109 if (test2 != expectedValue)
110 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111
112 test2 += " for me to go!\n";
113 expectedValue = "Now is the time for me to go!\n";
114 if (test2 != expectedValue)
115 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116
117 if (test1.length() != 70)
118 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119 if (test2.length() != 30)
120 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121
122 UnicodeString test3;
123 test3.append((UChar32)0x20402);
124 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126 }
127 if(test3.length() != 2){
128 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129 }
130 test3.append((UChar32)0x0074);
131 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133 }
134 if(test3.length() != 3){
135 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136 }
137
138 // test some UChar32 overloads
139 if( test3.setTo((UChar32)0x10330).length() != 2 ||
140 test3.insert(0, (UChar32)0x20100).length() != 4 ||
141 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142 (test3 = (UChar32)0x14001).length() != 2
143 ) {
144 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145 }
146
147 {
148 // test moveIndex32()
149 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150
151 if(
152 s.moveIndex32(2, -1)!=0 ||
153 s.moveIndex32(2, 1)!=4 ||
154 s.moveIndex32(2, 2)!=5 ||
155 s.moveIndex32(5, -2)!=2 ||
156 s.moveIndex32(0, -1)!=0 ||
157 s.moveIndex32(6, 1)!=6
158 ) {
159 errln("UnicodeString::moveIndex32() failed");
160 }
161
162 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163 errln("UnicodeString::getChar32Start() failed");
164 }
165
166 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167 errln("UnicodeString::getChar32Limit() failed");
168 }
169 }
170
171 {
172 // test new 2.2 constructors and setTo function that parallel Java's substring function.
173 UnicodeString src("Hello folks how are you?");
174 UnicodeString target1("how are you?");
175 if (target1 != UnicodeString(src, 12)) {
176 errln("UnicodeString(const UnicodeString&, int32_t) failed");
177 }
178 UnicodeString target2("folks");
179 if (target2 != UnicodeString(src, 6, 5)) {
180 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181 }
182 if (target1 != target2.setTo(src, 12)) {
183 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184 }
185 }
186
187 {
188 // op+ is new in ICU 2.8
189 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190 if(s!=UnicodeString("abcdefghi", "")) {
191 errln("operator+(UniStr, UniStr) failed");
192 }
193 }
194
195 {
196 // tests for Jitterbug 2360
197 // verify that APIs with source pointer + length accept length == -1
198 // mostly test only where modified, only few functions did not already do this
199 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201 }
202
203 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
204 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205
206 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208 }
209 if(t.length()!=u_strlen(buffer)) {
210 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211 }
212
213 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215 }
216 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218 }
219
220 buffer[u_strlen(buffer)]=0xe4;
221 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224 }
225 if(u.length()!=UPRV_LENGTHOF(buffer)) {
226 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227 }
228
229 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230 UConverter *cnv;
231 UErrorCode errorCode=U_ZERO_ERROR;
232
233 cnv=ucnv_open("ISO-8859-1", &errorCode);
234 UnicodeString v(cs, -1, cnv, errorCode);
235 ucnv_close(cnv);
236 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238 }
239 }
240
241 #if U_CHARSET_IS_UTF8
242 {
243 // Test the hardcoded-UTF-8 UnicodeString optimizations.
244 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246 UnicodeString from8a = UnicodeString((const char *)utf8);
247 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248 UnicodeString from16(false, utf16, UPRV_LENGTHOF(utf16));
249 if(from8a != from16 || from8b != from16) {
250 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251 }
252 char buffer[16];
253 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256 }
257 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260 }
261 }
262 #endif
263 }
264
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268 UnicodeString test1("this is a test");
269 UnicodeString test2("this is a test");
270 UnicodeString test3("this is a test of the emergency broadcast system");
271 UnicodeString test4("never say, \"this is a test\"!!");
272
273 UnicodeString test5((UChar)0x5000);
274 UnicodeString test6((UChar)0x5100);
275
276 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278 char chars[] = "this is a test";
279
280 // test operator== and operator!=
281 if (test1 != test2 || test1 == test3 || test1 == test4)
282 errln("operator== or operator!= failed");
283
284 // test operator> and operator<
285 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286 !(test5 < test6)
287 ) {
288 errln("operator> or operator< failed");
289 }
290
291 // test operator>= and operator<=
292 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293 errln("operator>= or operator<= failed");
294
295 // test compare(UnicodeString)
296 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297 errln("compare(UnicodeString) failed");
298
299 //test compare(offset, length, UnicodeString)
300 if(test1.compare(0, 14, test2) != 0 ||
301 test3.compare(0, 14, test2) != 0 ||
302 test4.compare(12, 14, test2) != 0 ||
303 test3.compare(0, 18, test1) <=0 )
304 errln("compare(offset, length, UnicodeString) fails");
305
306 // test compare(UChar*)
307 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308 errln("compare(UChar*) failed");
309
310 // test compare(char*)
311 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312 errln("compare(char*) failed");
313
314 // test compare(UChar*, length)
315 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316 errln("compare(UChar*, length) failed");
317
318 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319 if (test1.compare(0, 14, test2, 0, 14) != 0
320 || test1.compare(0, 14, test3, 0, 14) != 0
321 || test1.compare(0, 14, test4, 12, 14) != 0)
322 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323
324 if (test1.compare(10, 4, test2, 0, 4) >= 0
325 || test1.compare(10, 4, test3, 22, 9) <= 0
326 || test1.compare(10, 4, test4, 22, 4) != 0)
327 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328
329 // test compareBetween
330 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332 errln("compareBetween failed");
333
334 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336 errln("compareBetween failed");
337
338 // test compare() etc. with strings that share a buffer but are not equal
339 test2=test1; // share the buffer, length() too large for the stackBuffer
340 test2.truncate(1); // change only the length, not the buffer
341 if( test1==test2 || test1<=test2 ||
342 test1.compare(test2)<=0 ||
343 test1.compareCodePointOrder(test2)<=0 ||
344 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348 ) {
349 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350 }
351
352 /* test compareCodePointOrder() */
353 {
354 /* these strings are in ascending order */
355 static const UChar strings[][4]={
356 { 0x61, 0 }, /* U+0061 */
357 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
358 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
359 { 0xd800, 0 }, /* U+d800 */
360 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
361 { 0xdfff, 0 }, /* U+dfff */
362 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
363 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
364 { 0xd800, 0xdc02, 0 }, /* U+10002 */
365 { 0xd84d, 0xdc56, 0 } /* U+23456 */
366 };
367 UnicodeString u[20]; // must be at least as long as strings[]
368 int32_t i;
369
370 for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371 u[i]=UnicodeString(true, strings[i], -1);
372 }
373
374 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377 }
378 }
379 }
380
381 /* test caseCompare() */
382 {
383 static const UChar
384 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
385 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388
389 UnicodeString
390 mixed(true, _mixed, -1),
391 otherDefault(true, _otherDefault, -1),
392 otherExcludeSpecialI(true, _otherExcludeSpecialI, -1),
393 different(true, _different, -1);
394
395 int8_t result;
396
397 /* test caseCompare() */
398 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401 }
402 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403 if(result!=0) {
404 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405 }
406 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409 }
410
411 /* test caseCompare() */
412 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413 if(result<=0) {
414 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415 }
416
417 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421 }
422
423 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425 if(result<=0) {
426 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427 }
428 }
429
430 // test that srcLength=-1 is handled in functions that
431 // take input const UChar */int32_t srcLength (j785)
432 {
433 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435
436 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438 }
439
440 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442 }
443
444 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446 }
447
448 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450 }
451
452 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454 }
455
456 UnicodeString s2, s3;
457 s2.replace(0, 0, u+1, -1);
458 s3.replace(0, 0, u, 1, -1);
459 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461 }
462 }
463 }
464
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
469 UnicodeString test2;
470 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472 UnicodeString test5;
473 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474
475 test1.extract(11, 12, test2);
476 test1.extract(11, 12, test3);
477 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479 }
480
481 // test proper pinning in extractBetween()
482 test1.extractBetween(-3, 7, test5);
483 if(test5!=UNICODE_STRING("Now is ", 7)) {
484 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485 }
486
487 test1.extractBetween(11, 23, test5);
488 if (test1.extract(60, 71, test6) != 9) {
489 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490 }
491 if (test1.extract(11, 12, test6) != 12) {
492 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493 }
494
495 // convert test4 back to Unicode for comparison
496 UnicodeString test4b(test4, 12);
497
498 if (test1.extract(11, 12, (char *)NULL) != 12) {
499 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500 }
501 if (test1.extract(11, -1, test6) != 0) {
502 errln("UnicodeString.extract(-1) failed to stop reading the string.");
503 }
504
505 for (int32_t i = 0; i < 12; i++) {
506 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508 break;
509 }
510 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512 break;
513 }
514 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515 errln(UnicodeString("extracting into an array of char failed at position ") + i);
516 break;
517 }
518 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520 break;
521 }
522 }
523
524 // test preflighting and overflows with invariant conversion
525 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527 }
528
529 test4[2] = (char)0xff;
530 if (test1.extract(0, 10, test4, 2, "") != 10) {
531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532 }
533 if (test4[2] != (char)0xff) {
534 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535 }
536
537 {
538 // test new, NUL-terminating extract() function
539 UnicodeString s("terminate", "");
540 UChar dest[20]={
541 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543 };
544 UErrorCode errorCode;
545 int32_t length;
546
547 errorCode=U_ZERO_ERROR;
548 length=s.extract((UChar *)NULL, 0, errorCode);
549 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551 }
552
553 errorCode=U_ZERO_ERROR;
554 length=s.extract(dest, s.length()-1, errorCode);
555 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557 length, u_errorName(errorCode), s.length());
558 }
559
560 errorCode=U_ZERO_ERROR;
561 length=s.extract(dest, s.length(), errorCode);
562 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564 length, u_errorName(errorCode), s.length());
565 }
566 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568 }
569
570 errorCode=U_ZERO_ERROR;
571 length=s.extract(dest, s.length()+1, errorCode);
572 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574 length, u_errorName(errorCode), s.length());
575 }
576 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578 }
579 }
580
581 {
582 // test new UConverter extract() and constructor
583 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584 char buffer[32];
585 static const char expect[]={
586 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588 (char)0xc3, (char)0x84,
589 (char)0xe1, (char)0xbb, (char)0x90
590 };
591 UErrorCode errorCode=U_ZERO_ERROR;
592 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593 int32_t length;
594
595 if(U_SUCCESS(errorCode)) {
596 // test preflighting
597 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598 errorCode!=U_BUFFER_OVERFLOW_ERROR
599 ) {
600 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601 length, u_errorName(errorCode));
602 }
603 errorCode=U_ZERO_ERROR;
604 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605 errorCode!=U_BUFFER_OVERFLOW_ERROR
606 ) {
607 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608 length, u_errorName(errorCode));
609 }
610
611 // try error cases
612 errorCode=U_ZERO_ERROR;
613 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615 }
616 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619 }
620 errorCode=U_ZERO_ERROR;
621
622 // extract for real
623 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624 uprv_memcmp(buffer, expect, 13)!=0 ||
625 buffer[13]!=0 ||
626 U_FAILURE(errorCode)
627 ) {
628 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629 length, u_errorName(errorCode));
630 }
631 // Test again with just the converter name.
632 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633 uprv_memcmp(buffer, expect, 13)!=0 ||
634 buffer[13]!=0 ||
635 U_FAILURE(errorCode)
636 ) {
637 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638 length, u_errorName(errorCode));
639 }
640
641 // try the constructor
642 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643 if(U_FAILURE(errorCode) || s!=t) {
644 errln("UnicodeString(UConverter) conversion failed (%s)",
645 u_errorName(errorCode));
646 }
647
648 ucnv_close(cnv);
649 }
650 }
651 }
652
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656 UnicodeString test1("The rain in Spain stays mainly on the plain");
657 UnicodeString test2("eat SPAMburgers!");
658 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659 char test4[] = "SPAM";
660 UnicodeString& test5 = test1;
661
662 test1.replace(4, 4, test2, 4, 4);
663 test1.replace(12, 5, test3, 4);
664 test3[4] = 0;
665 test1.replace(17, 4, test3);
666 test1.replace(23, 4, test4);
667 test1.replaceBetween(37, 42, test2, 4, 8);
668
669 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670 errln("One of the replace methods failed:\n"
671 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672 " got \"" + test1 + "\"");
673
674 test1.remove(21, 1);
675 test1.removeBetween(26, 28);
676
677 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678 errln("One of the remove methods failed:\n"
679 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680 " got \"" + test1 + "\"");
681
682 for (int32_t i = 0; i < test1.length(); i++) {
683 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684 test1.setCharAt(i, 0x78);
685 }
686 }
687
688 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689 errln("One of the remove methods failed:\n"
690 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691 " got \"" + test1 + "\"");
692
693 test1.remove();
694 if (test1.length() != 0)
695 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701 UnicodeString test1("test test ttest tetest testesteststt");
702 UnicodeString test2("test");
703 UChar testChar = 0x74;
704
705 UChar32 testChar32 = 0x20402;
706 UChar testData[]={
707 // 0 1 2 3 4 5 6 7
708 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709
710 // 8 9 10 11 12 13 14 15
711 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712
713 // 16 17 18 19
714 0xdc02, 0xd841, 0x0073, 0x0000
715 };
716 UnicodeString test3(testData);
717 UnicodeString test4(testChar32);
718
719 uint16_t occurrences = 0;
720 int32_t startPos = 0;
721 for ( ;
722 startPos != -1 && startPos < test1.length();
723 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724 ;
725 if (occurrences != 6)
726 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727
728 for ( occurrences = 0, startPos = 10;
729 startPos != -1 && startPos < test1.length();
730 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731 ;
732 if (occurrences != 4)
733 errln(UnicodeString("indexOf with starting offset failed: "
734 "expected to find 4 occurrences, found ") + occurrences);
735
736 int32_t endPos = 28;
737 for ( occurrences = 0, startPos = 5;
738 startPos != -1 && startPos < test1.length();
739 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740 ;
741 if (occurrences != 4)
742 errln(UnicodeString("indexOf with starting and ending offsets failed: "
743 "expected to find 4 occurrences, found ") + occurrences);
744
745 //using UChar32 string
746 for ( startPos=0, occurrences=0;
747 startPos != -1 && startPos < test3.length();
748 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749 ;
750 if (occurrences != 4)
751 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752
753 for ( startPos=10, occurrences=0;
754 startPos != -1 && startPos < test3.length();
755 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756 ;
757 if (occurrences != 2)
758 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759 //---
760
761 for ( occurrences = 0, startPos = 0;
762 startPos != -1 && startPos < test1.length();
763 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764 ;
765 if (occurrences != 16)
766 errln(UnicodeString("indexOf with character failed: "
767 "expected to find 16 occurrences, found ") + occurrences);
768
769 for ( occurrences = 0, startPos = 10;
770 startPos != -1 && startPos < test1.length();
771 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772 ;
773 if (occurrences != 12)
774 errln(UnicodeString("indexOf with character & start offset failed: "
775 "expected to find 12 occurrences, found ") + occurrences);
776
777 for ( occurrences = 0, startPos = 5, endPos = 28;
778 startPos != -1 && startPos < test1.length();
779 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780 ;
781 if (occurrences != 10)
782 errln(UnicodeString("indexOf with character & start & end offsets failed: "
783 "expected to find 10 occurrences, found ") + occurrences);
784
785 //testing for UChar32
786 UnicodeString subString;
787 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788 subString.append(test3, startPos, test3.length());
789 if(subString.indexOf(testChar32) != -1 ){
790 ++occurrences;
791 }
792 subString.remove();
793 }
794 if (occurrences != 14)
795 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796
797 for ( occurrences = 0, startPos = 0;
798 startPos != -1 && startPos < test3.length();
799 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800 ;
801 if (occurrences != 4)
802 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803
804 endPos=test3.length();
805 for ( occurrences = 0, startPos = 5;
806 startPos != -1 && startPos < test3.length();
807 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808 ;
809 if (occurrences != 3)
810 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811 //---
812
813 if(test1.lastIndexOf(test2)!=29) {
814 errln("test1.lastIndexOf(test2)!=29");
815 }
816
817 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818 errln("test1.lastIndexOf(test2, start) failed");
819 }
820
821 for ( occurrences = 0, startPos = 32;
822 startPos != -1;
823 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824 ;
825 if (occurrences != 4)
826 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827 "expected to find 4 occurrences, found ") + occurrences);
828
829 for ( occurrences = 0, startPos = 32;
830 startPos != -1;
831 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832 ;
833 if (occurrences != 11)
834 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835 "expected to find 11 occurrences, found ") + occurrences);
836
837 //testing UChar32
838 startPos=test3.length();
839 for ( occurrences = 0;
840 startPos != -1;
841 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842 ;
843 if (occurrences != 3)
844 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845
846
847 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
848 subString.remove();
849 subString.append(test3, 0, endPos);
850 if(subString.lastIndexOf(testChar32) != -1 ){
851 ++occurrences;
852 }
853 }
854 if (occurrences != 18)
855 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856 //---
857
858 // test that indexOf(UChar32) and lastIndexOf(UChar32)
859 // do not find surrogate code points when they are part of matched pairs
860 // (= part of supplementary code points)
861 // Jitterbug 1542
862 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864 }
865 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868 ) {
869 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870 }
871 }
872
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876 UnicodeString test1("hello");
877 UnicodeString test2(" there");
878 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
879 UnicodeString test4;
880 UBool returnVal;
881 UnicodeString expectedValue;
882
883 returnVal = test1.padLeading(15);
884 expectedValue = " hello";
885 if (returnVal == false || test1 != expectedValue)
886 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887
888 returnVal = test2.padTrailing(15);
889 expectedValue = " there ";
890 if (returnVal == false || test2 != expectedValue)
891 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892
893 expectedValue = test3;
894 returnVal = test3.padTrailing(15);
895 if (returnVal == true || test3 != expectedValue)
896 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897
898 expectedValue = "hello";
899 test4.setTo(test1).trim();
900
901 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902 errln("trim(UnicodeString&) failed");
903
904 test1.trim();
905 if (test1 != expectedValue)
906 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907
908 test2.trim();
909 expectedValue = "there";
910 if (test2 != expectedValue)
911 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912
913 test3.trim();
914 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
915 if (test3 != expectedValue)
916 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917
918 returnVal = test1.truncate(15);
919 expectedValue = "hello";
920 if (returnVal == true || test1 != expectedValue)
921 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922
923 returnVal = test2.truncate(15);
924 expectedValue = "there";
925 if (returnVal == true || test2 != expectedValue)
926 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927
928 returnVal = test3.truncate(15);
929 expectedValue = "Hi! How ya doi";
930 if (returnVal == false || test3 != expectedValue)
931 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938 UnicodeString test2("Now");
939 UnicodeString test3("country.");
940 UnicodeString test4("count");
941
942 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944 }
945
946 if (test1.startsWith(test3) ||
947 test1.startsWith(test3.getBuffer(), test3.length()) ||
948 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949 ) {
950 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951 }
952
953 if (test1.endsWith(test2)) {
954 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955 }
956
957 if (!test1.endsWith(test3)) {
958 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959 }
960 if (!test1.endsWith(test3, 0, INT32_MAX)) {
961 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962 }
963
964 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966 }
967 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969 }
970
971 if (!test3.startsWith(test4)) {
972 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973 }
974
975 if (test4.startsWith(test3)) {
976 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977 }
978 }
979
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982 UnicodeString test("abcde");
983 const UChar ab[] = { 0x61, 0x62, 0 };
984 const UChar de[] = { 0x64, 0x65, 0 };
985 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994 UnicodeString test1("One potato, two potato, three potato, four\n");
995 UnicodeString test2("potato");
996 UnicodeString test3("MISSISSIPPI");
997
998 UnicodeString expectedValue;
999
1000 test1.findAndReplace(test2, test3);
1001 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002 if (test1 != expectedValue)
1003 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004 test1.findAndReplace(2, 32, test3, test2);
1005 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006 if (test1 != expectedValue)
1007 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013 UnicodeString test("backwards words say to used I");
1014
1015 test.reverse();
1016 test.reverse(2, 4);
1017 test.reverse(7, 2);
1018 test.reverse(10, 3);
1019 test.reverse(14, 5);
1020 test.reverse(20, 9);
1021
1022 if (test != "I used to say words backwards")
1023 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1024 + test + "\"");
1025
1026 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027 test.reverse();
1028 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029 errln("reverse() failed with supplementary characters");
1030 }
1031
1032 // Test case for ticket #8091:
1033 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034 // an odd-length string that contains no other lead surrogates.
1035 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037 test.reverse();
1038 if(test!=expected) {
1039 errln("reverse() failed with only lead surrogate in the middle");
1040 }
1041 }
1042
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046 UnicodeString test1("This is a test");
1047 UnicodeString test2("This is a test");
1048 UnicodeString test3("Me too!");
1049
1050 // test getBuffer(minCapacity) and releaseBuffer()
1051 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052 UChar *p=test1.getBuffer(20);
1053 if(test1.getCapacity()<20) {
1054 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055 }
1056
1057 test1.append((UChar)7); // must not be able to modify the string here
1058 test1.setCharAt(3, 7);
1059 test1.reverse();
1060 if( test1.length()!=0 ||
1061 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063 ) {
1064 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065 }
1066
1067 p[0]=1;
1068 p[1]=2;
1069 p[2]=3;
1070 test1.releaseBuffer(3);
1071 test1.append((UChar)4);
1072
1073 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075 }
1076
1077 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078 test1.releaseBuffer(1);
1079 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081 }
1082
1083 // test getBuffer(const)
1084 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085 if( test1.length()!=4 ||
1086 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088 ) {
1089 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090 }
1091
1092 // test releaseBuffer() with a NUL-terminated buffer
1093 test1.getBuffer(20)[2]=0;
1094 test1.releaseBuffer(); // implicit -1
1095 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097 }
1098
1099 // test releaseBuffer() with a non-NUL-terminated buffer
1100 p=test1.getBuffer(256);
1101 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1103 }
1104 test1.releaseBuffer(); // implicit -1
1105 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107 }
1108
1109 // test getTerminatedBuffer()
1110 test1=UnicodeString("This is another test.", "");
1111 test2=UnicodeString("This is another test.", "");
1112 q=test1.getTerminatedBuffer();
1113 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114 errln("getTerminatedBuffer()[length]!=0");
1115 }
1116
1117 const UChar u[]={ 5, 6, 7, 8, 0 };
1118 test1.setTo(false, u, 3);
1119 q=test1.getTerminatedBuffer();
1120 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122 }
1123
1124 test1.setTo(true, u, -1);
1125 q=test1.getTerminatedBuffer();
1126 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128 }
1129
1130 // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131 // as u" lila", offset by 3 code units
1132 test1=UnicodeString(true, u"la", 2);
1133 test1.append(UnicodeString(true, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134 assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135 u"la lila", test1);
1136
1137 test1.insert(3, UnicodeString(true, u"dudum ", 6), 0, INT32_MAX);
1138 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139 u"la dudum lila", test1);
1140
1141 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142 test1.insert(9, ucs, -1);
1143 assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144 u"la dudum hm lila", test1);
1145
1146 test1.replace(9, 2, (UChar)0x2b);
1147 assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148 u"la dudum + lila", test1);
1149
1150 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151 errln("UnicodeString::hasMetaData() returns true");
1152 }
1153
1154 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156 test1.truncate(36); // ensure length()<getCapacity()
1157 test2=test1; // share the buffer
1158 test1.truncate(5);
1159 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160 errln("UnicodeString(shared buffer).truncate() failed");
1161 }
1162 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164 "modified another copy of the string!");
1165 }
1166 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167 test1.truncate(36); // ensure length()<getCapacity()
1168 test2=test1; // share the buffer
1169 test1.remove();
1170 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171 errln("UnicodeString(shared buffer).remove() failed");
1172 }
1173 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175 "modified another copy of the string!");
1176 }
1177
1178 // ticket #9740
1179 test1.setTo(true, ucs, 3);
1180 assertEquals("length of read-only alias", 3, test1.length());
1181 test1.trim();
1182 assertEquals("length of read-only alias after trim()", 2, test1.length());
1183 assertEquals("length of terminated buffer of read-only alias + trim()",
1184 2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190 UChar testString[] ={
1191 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192 UChar guardWord = 0x4DED;
1193 UnicodeString* test = 0;
1194
1195 test = new UnicodeString(testString);
1196 if (*test != "This is a crazy test.")
1197 errln("Test string failed to initialize properly.");
1198 if (guardWord != 0x04DED)
1199 errln("Test string initialization overwrote guard word!");
1200
1201 test->insert(8, "only ");
1202 test->remove(15, 6);
1203 if (*test != "This is only a test.")
1204 errln("Manipulation of test string failed to work right.");
1205 if (guardWord != 0x4DED)
1206 errln("Manipulation of test string overwrote guard word!");
1207
1208 // we have to deinitialize and release the backing store by calling the destructor
1209 // explicitly, since we can't overload operator delete
1210 delete test;
1211
1212 UChar workingBuffer[] = {
1213 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218 UChar guardWord2 = 0x4DED;
1219
1220 test = new UnicodeString(workingBuffer, 35, 100);
1221 if (*test != "Now is the time for all men to come")
1222 errln("Stack-allocated backing store failed to initialize correctly.");
1223 if (guardWord2 != 0x4DED)
1224 errln("Stack-allocated backing store overwrote guard word!");
1225
1226 test->insert(24, "good ");
1227 if (*test != "Now is the time for all good men to come")
1228 errln("insert() on stack-allocated UnicodeString didn't work right");
1229 if (guardWord2 != 0x4DED)
1230 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231
1232 if (workingBuffer[24] != 0x67)
1233 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234
1235 *test += " to the aid of their country.";
1236 if (*test != "Now is the time for all good men to come to the aid of their country.")
1237 errln("Stack-allocated UnicodeString overflow didn't work");
1238 if (guardWord2 != 0x4DED)
1239 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240
1241 *test = "ha!";
1242 if (*test != "ha!")
1243 errln("Assignment to stack-allocated UnicodeString didn't work");
1244 if (workingBuffer[0] != 0x4e)
1245 errln("Change to UnicodeString after overflow are still affecting original buffer");
1246 if (guardWord2 != 0x4DED)
1247 errln("Change to UnicodeString after overflow overwrote guard word!");
1248
1249 // test read-only aliasing with setTo()
1250 workingBuffer[0] = 0x20ac;
1251 workingBuffer[1] = 0x125;
1252 workingBuffer[2] = 0;
1253 test->setTo(true, workingBuffer, 2);
1254 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256 }
1257
1258 UnicodeString *c=test->clone();
1259
1260 workingBuffer[1] = 0x109;
1261 if(test->charAt(1) != 0x109) {
1262 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263 }
1264
1265 if(c->length() != 2 || c->charAt(1) != 0x125) {
1266 errln("clone(alias) did not copy the buffer");
1267 }
1268 delete c;
1269
1270 test->setTo(true, workingBuffer, -1);
1271 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273 }
1274
1275 test->setTo(false, workingBuffer, -1);
1276 if(!test->isBogus()) {
1277 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278 }
1279
1280 delete test;
1281
1282 test=new UnicodeString();
1283 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284 test->setTo(buffer, 4, 10);
1285 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288 }
1289 delete test;
1290
1291
1292 // test the UChar32 constructor
1293 UnicodeString c32Test((UChar32)0x10ff2a);
1294 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296 ) {
1297 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298 }
1299
1300 // test the (new) capacity constructor
1301 UnicodeString capTest(5, (UChar32)0x2a, 5);
1302 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303 capTest.char32At(0) != 0x2a ||
1304 capTest.char32At(4) != 0x2a
1305 ) {
1306 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307 }
1308
1309 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311 capTest.char32At(0) != 0x10ff2a ||
1312 capTest.char32At(4) != 0x10ff2a
1313 ) {
1314 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315 }
1316
1317 capTest = UnicodeString(5, (UChar32)0, 0);
1318 if(capTest.length() != 0) {
1319 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320 }
1321 }
1322
1323 /**
1324 * Test the unescape() function.
1325 */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328 UnicodeString OUT("abc");
1329 OUT.append((UChar)0x4567);
1330 OUT.append(" ");
1331 OUT.append((UChar)0xA);
1332 OUT.append((UChar)0xD);
1333 OUT.append(" ");
1334 OUT.append((UChar32)0x00101234);
1335 OUT.append("xyz");
1336 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337 UnicodeString result = IN.unescape();
1338 if (result != OUT) {
1339 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340 prettify(result) + ", expected " +
1341 prettify(OUT));
1342 }
1343
1344 // test that an empty string is returned in case of an error
1345 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347 }
1348
1349 // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1350 UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1351 assertEquals("ASCII Euro", u"€", euro);
1352 UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1353 assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1354 }
1355
1356 /* test code point counting functions --------------------------------------- */
1357
1358 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1359 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1360 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1361 int32_t count=s.countChar32(start, length);
1362 return count>number;
1363 }
1364
1365 /* compare the real function against the reference */
1366 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1367 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1368 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1369 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1370 start, length, number, s.hasMoreChar32Than(start, length, number));
1371 }
1372 }
1373
1374 void
TestCountChar32(void)1375 UnicodeStringTest::TestCountChar32(void) {
1376 {
1377 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1378
1379 // test countChar32()
1380 // note that this also calls and tests u_countChar32(length>=0)
1381 if(
1382 s.countChar32()!=4 ||
1383 s.countChar32(1)!=4 ||
1384 s.countChar32(2)!=3 ||
1385 s.countChar32(2, 3)!=2 ||
1386 s.countChar32(2, 0)!=0
1387 ) {
1388 errln("UnicodeString::countChar32() failed");
1389 }
1390
1391 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1392 const UChar *buffer=s.getTerminatedBuffer();
1393 if(
1394 u_countChar32(buffer, -1)!=4 ||
1395 u_countChar32(buffer+1, -1)!=4 ||
1396 u_countChar32(buffer+2, -1)!=3 ||
1397 u_countChar32(buffer+3, -1)!=3 ||
1398 u_countChar32(buffer+4, -1)!=2 ||
1399 u_countChar32(buffer+5, -1)!=1 ||
1400 u_countChar32(buffer+6, -1)!=0
1401 ) {
1402 errln("u_countChar32(length=-1) failed");
1403 }
1404
1405 // test u_countChar32() with bad input
1406 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1407 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1408 }
1409 }
1410
1411 /* test data and variables for hasMoreChar32Than() */
1412 static const UChar str[]={
1413 0x61, 0x62, 0xd800, 0xdc00,
1414 0xd801, 0xdc01, 0x63, 0xd802,
1415 0x64, 0xdc03, 0x65, 0x66,
1416 0xd804, 0xdc04, 0xd805, 0xdc05,
1417 0x67
1418 };
1419 UnicodeString string(str, UPRV_LENGTHOF(str));
1420 int32_t start, length, number;
1421
1422 /* test hasMoreChar32Than() */
1423 for(length=string.length(); length>=0; --length) {
1424 for(start=0; start<=length; ++start) {
1425 for(number=-1; number<=((length-start)+2); ++number) {
1426 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1427 }
1428 }
1429 }
1430
1431 /* test hasMoreChar32Than() with pinning */
1432 for(start=-1; start<=string.length()+1; ++start) {
1433 for(number=-1; number<=((string.length()-start)+2); ++number) {
1434 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1435 }
1436 }
1437
1438 /* test hasMoreChar32Than() with a bogus string */
1439 string.setToBogus();
1440 for(length=-1; length<=1; ++length) {
1441 for(start=-1; start<=length; ++start) {
1442 for(number=-1; number<=((length-start)+2); ++number) {
1443 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1444 }
1445 }
1446 }
1447 }
1448
1449 void
TestBogus()1450 UnicodeStringTest::TestBogus() {
1451 UnicodeString test1("This is a test");
1452 UnicodeString test2("This is a test");
1453 UnicodeString test3("Me too!");
1454
1455 // test isBogus() and setToBogus()
1456 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1457 errln("A string returned true for isBogus()!");
1458 }
1459
1460 // NULL pointers are treated like empty strings
1461 // use other illegal arguments to make a bogus string
1462 test3.setTo(false, test1.getBuffer(), -2);
1463 if(!test3.isBogus()) {
1464 errln("A bogus string returned false for isBogus()!");
1465 }
1466 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1467 errln("hashCode() failed");
1468 }
1469 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1470 errln("bogus.getBuffer()!=0");
1471 }
1472 if (test1.indexOf(test3) != -1) {
1473 errln("bogus.indexOf() != -1");
1474 }
1475 if (test1.lastIndexOf(test3) != -1) {
1476 errln("bogus.lastIndexOf() != -1");
1477 }
1478 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1479 errln("caseCompare() doesn't work with bogus strings");
1480 }
1481 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1482 errln("compareCodePointOrder() doesn't work with bogus strings");
1483 }
1484
1485 // verify that non-assignment modifications fail and do not revive a bogus string
1486 test3.setToBogus();
1487 test3.append((UChar)0x61);
1488 if(!test3.isBogus() || test3.getBuffer()!=0) {
1489 errln("bogus.append('a') worked but must not");
1490 }
1491
1492 test3.setToBogus();
1493 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1494 if(!test3.isBogus() || test3.getBuffer()!=0) {
1495 errln("bogus.findAndReplace() worked but must not");
1496 }
1497
1498 test3.setToBogus();
1499 test3.trim();
1500 if(!test3.isBogus() || test3.getBuffer()!=0) {
1501 errln("bogus.trim() revived bogus but must not");
1502 }
1503
1504 test3.setToBogus();
1505 test3.remove(1);
1506 if(!test3.isBogus() || test3.getBuffer()!=0) {
1507 errln("bogus.remove(1) revived bogus but must not");
1508 }
1509
1510 test3.setToBogus();
1511 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1512 errln("bogus.setCharAt(0, 'b') worked but must not");
1513 }
1514
1515 test3.setToBogus();
1516 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1517 errln("bogus.truncate(1) revived bogus but must not");
1518 }
1519
1520 // verify that assignments revive a bogus string
1521 test3.setToBogus();
1522 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1523 errln("bogus.operator=() failed");
1524 }
1525
1526 test3.setToBogus();
1527 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1528 errln("bogus.fastCopyFrom() failed");
1529 }
1530
1531 test3.setToBogus();
1532 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1533 errln("bogus.setTo(UniStr) failed");
1534 }
1535
1536 test3.setToBogus();
1537 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1538 errln("bogus.setTo(UniStr, 0) failed");
1539 }
1540
1541 test3.setToBogus();
1542 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1543 errln("bogus.setTo(UniStr, 0, len) failed");
1544 }
1545
1546 test3.setToBogus();
1547 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1548 errln("bogus.setTo(const UChar *, len) failed");
1549 }
1550
1551 test3.setToBogus();
1552 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1553 errln("bogus.setTo(UChar) failed");
1554 }
1555
1556 test3.setToBogus();
1557 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1558 errln("bogus.setTo(UChar32) failed");
1559 }
1560
1561 test3.setToBogus();
1562 if(!test3.isBogus() || test3.setTo(false, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1563 errln("bogus.setTo(readonly alias) failed");
1564 }
1565
1566 // writable alias to another string's buffer: very bad idea, just convenient for this test
1567 test3.setToBogus();
1568 if(!test3.isBogus() ||
1569 test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1570 test1.length(), test1.getCapacity()).isBogus() ||
1571 test3!=test1) {
1572 errln("bogus.setTo(writable alias) failed");
1573 }
1574
1575 // verify simple, documented ways to turn a bogus string into an empty one
1576 test3.setToBogus();
1577 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1578 errln("bogus.operator=(UnicodeString()) failed");
1579 }
1580
1581 test3.setToBogus();
1582 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1583 errln("bogus.setTo(UnicodeString()) failed");
1584 }
1585
1586 test3.setToBogus();
1587 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1588 errln("bogus.remove() failed");
1589 }
1590
1591 test3.setToBogus();
1592 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1593 errln("bogus.remove(0, INT32_MAX) failed");
1594 }
1595
1596 test3.setToBogus();
1597 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1598 errln("bogus.truncate(0) failed");
1599 }
1600
1601 test3.setToBogus();
1602 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1603 errln("bogus.setTo((UChar32)-1) failed");
1604 }
1605
1606 static const UChar nul=0;
1607
1608 test3.setToBogus();
1609 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1610 errln("bogus.setTo(&nul, 0) failed");
1611 }
1612
1613 test3.setToBogus();
1614 if(!test3.isBogus() || test3.getBuffer()!=0) {
1615 errln("setToBogus() failed to make a string bogus");
1616 }
1617
1618 test3.setToBogus();
1619 if(test1.isBogus() || !(test1=test3).isBogus()) {
1620 errln("normal=bogus failed to make the left string bogus");
1621 }
1622
1623 // test that NULL primitive input string values are treated like
1624 // empty strings, not errors (bogus)
1625 test2.setTo((UChar32)0x10005);
1626 if(test2.insert(1, nullptr, 1).length()!=2) {
1627 errln("UniStr.insert(...nullptr...) should not modify the string but does");
1628 }
1629
1630 UErrorCode errorCode=U_ZERO_ERROR;
1631 UnicodeString
1632 test4((const UChar *)NULL),
1633 test5(true, (const UChar *)NULL, 1),
1634 test6((UChar *)NULL, 5, 5),
1635 test7((const char *)NULL, 3, NULL, errorCode);
1636 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1637 errln("a constructor set to bogus for a NULL input string, should be empty");
1638 }
1639
1640 test4.setTo(NULL, 3);
1641 test5.setTo(true, (const UChar *)NULL, 1);
1642 test6.setTo((UChar *)NULL, 5, 5);
1643 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1644 errln("a setTo() set to bogus for a NULL input string, should be empty");
1645 }
1646
1647 // test that bogus==bogus<any
1648 if(test1!=test3 || test1.compare(test3)!=0) {
1649 errln("bogus==bogus failed");
1650 }
1651
1652 test2.remove();
1653 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1654 errln("bogus<empty failed");
1655 }
1656
1657 // test that copy constructor of bogus is bogus & clone of bogus is nullptr
1658 {
1659 test3.setToBogus();
1660 UnicodeString test3Copy(test3);
1661 UnicodeString *test3Clone = test3.clone();
1662 assertTrue(WHERE, test3.isBogus());
1663 assertTrue(WHERE, test3Copy.isBogus());
1664 assertTrue(WHERE, test3Clone == nullptr);
1665 }
1666 }
1667
1668 // StringEnumeration ------------------------------------------------------- ***
1669 // most of StringEnumeration is tested elsewhere
1670 // this test improves code coverage
1671
1672 static const char *const
1673 testEnumStrings[]={
1674 "a",
1675 "b",
1676 "c",
1677 "this is a long string which helps us test some buffer limits",
1678 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1679 };
1680
1681 class TestEnumeration : public StringEnumeration {
1682 public:
TestEnumeration()1683 TestEnumeration() : i(0) {}
1684
count(UErrorCode &) const1685 virtual int32_t count(UErrorCode& /*status*/) const override {
1686 return UPRV_LENGTHOF(testEnumStrings);
1687 }
1688
snext(UErrorCode & status)1689 virtual const UnicodeString *snext(UErrorCode &status) override {
1690 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1691 unistr=UnicodeString(testEnumStrings[i++], "");
1692 return &unistr;
1693 }
1694
1695 return NULL;
1696 }
1697
reset(UErrorCode &)1698 virtual void reset(UErrorCode& /*status*/) override {
1699 i=0;
1700 }
1701
getStaticClassID()1702 static inline UClassID getStaticClassID() {
1703 return (UClassID)&fgClassID;
1704 }
getDynamicClassID() const1705 virtual UClassID getDynamicClassID() const override {
1706 return getStaticClassID();
1707 }
1708
1709 private:
1710 static const char fgClassID;
1711
1712 int32_t i;
1713 };
1714
1715 const char TestEnumeration::fgClassID=0;
1716
1717 void
TestStringEnumeration()1718 UnicodeStringTest::TestStringEnumeration() {
1719 UnicodeString s;
1720 TestEnumeration ten;
1721 int32_t i, length;
1722 UErrorCode status;
1723
1724 const UChar *pu;
1725 const char *pc;
1726
1727 // test the next() default implementation and ensureCharsCapacity()
1728 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1729 status=U_ZERO_ERROR;
1730 pc=ten.next(&length, status);
1731 s=UnicodeString(testEnumStrings[i], "");
1732 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1733 errln("StringEnumeration.next(%d) failed", i);
1734 }
1735 }
1736 status=U_ZERO_ERROR;
1737 if(ten.next(&length, status)!=NULL) {
1738 errln("StringEnumeration.next(done)!=NULL");
1739 }
1740
1741 // test the unext() default implementation
1742 ten.reset(status);
1743 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1744 status=U_ZERO_ERROR;
1745 pu=ten.unext(&length, status);
1746 s=UnicodeString(testEnumStrings[i], "");
1747 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1748 errln("StringEnumeration.unext(%d) failed", i);
1749 }
1750 }
1751 status=U_ZERO_ERROR;
1752 if(ten.unext(&length, status)!=NULL) {
1753 errln("StringEnumeration.unext(done)!=NULL");
1754 }
1755
1756 // test that the default clone() implementation works, and returns NULL
1757 if(ten.clone()!=NULL) {
1758 errln("StringEnumeration.clone()!=NULL");
1759 }
1760
1761 // test that uenum_openFromStringEnumeration() works
1762 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1763 StringEnumeration *newTen = new TestEnumeration;
1764 status=U_ZERO_ERROR;
1765 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1766 if (uten==NULL || U_FAILURE(status)) {
1767 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1768 return;
1769 }
1770
1771 // test uenum_next()
1772 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1773 status=U_ZERO_ERROR;
1774 pc=uenum_next(uten, &length, &status);
1775 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1776 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1777 }
1778 }
1779 status=U_ZERO_ERROR;
1780 if(uenum_next(uten, &length, &status)!=NULL) {
1781 errln("File %s, line %d, uenum_next(done)!=NULL");
1782 }
1783
1784 // test the uenum_unext()
1785 uenum_reset(uten, &status);
1786 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1787 status=U_ZERO_ERROR;
1788 pu=uenum_unext(uten, &length, &status);
1789 s=UnicodeString(testEnumStrings[i], "");
1790 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1791 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1792 }
1793 }
1794 status=U_ZERO_ERROR;
1795 if(uenum_unext(uten, &length, &status)!=NULL) {
1796 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1797 }
1798
1799 uenum_close(uten);
1800 }
1801
1802 /*
1803 * Namespace test, to make sure that macros like UNICODE_STRING include the
1804 * namespace qualifier.
1805 *
1806 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1807 */
1808 namespace bogus {
1809 class UnicodeString {
1810 public:
1811 enum EInvariant { kInvariant };
UnicodeString()1812 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1813 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1814 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1815 ) : i(length) {}
1816 private:
1817 int32_t i;
1818 };
1819 }
1820
1821 void
TestNameSpace()1822 UnicodeStringTest::TestNameSpace() {
1823 // Provoke name collision unless the UnicodeString macros properly
1824 // qualify the icu::UnicodeString class.
1825 using namespace bogus;
1826
1827 // Use all UnicodeString macros from unistr.h.
1828 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1829 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1830 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1831
1832 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1833 icu::UnicodeString s4=s1+s2+s3;
1834 if(s4.length()!=9) {
1835 errln("Something wrong with UnicodeString::operator+().");
1836 }
1837 }
1838
1839 void
TestUTF32()1840 UnicodeStringTest::TestUTF32() {
1841 // Input string length US_STACKBUF_SIZE to cause overflow of the
1842 // initially chosen fStackBuffer due to supplementary characters.
1843 static const UChar32 utf32[] = {
1844 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1845 0x10000, 0x20000, 0xe0000, 0x10ffff
1846 };
1847 static const UChar expected_utf16[] = {
1848 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1849 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1850 };
1851 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1852 UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1853 if(from32 != expected) {
1854 errln("UnicodeString::fromUTF32() did not create the expected string.");
1855 }
1856
1857 static const UChar utf16[] = {
1858 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1859 };
1860 static const UChar32 expected_utf32[] = {
1861 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1862 };
1863 UChar32 result32[16];
1864 UErrorCode errorCode = U_ZERO_ERROR;
1865 int32_t length32 =
1866 UnicodeString(false, utf16, UPRV_LENGTHOF(utf16)).
1867 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1868 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1869 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1870 result32[length32] != 0
1871 ) {
1872 errln("UnicodeString::toUTF32() did not create the expected string.");
1873 }
1874 }
1875
1876 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1877 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1878 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1879 : CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
Flush()1880 virtual void Flush() override { calledFlush = true; }
1881 UBool calledFlush;
1882 };
1883
1884 void
TestUTF8()1885 UnicodeStringTest::TestUTF8() {
1886 static const uint8_t utf8[] = {
1887 // Code points:
1888 // 0x41, 0xd900,
1889 // 0x61, 0xdc00,
1890 // 0x110000, 0x5a,
1891 // 0x50000, 0x7a,
1892 // 0x10000, 0x20000,
1893 // 0xe0000, 0x10ffff
1894 0x41, 0xed, 0xa4, 0x80,
1895 0x61, 0xed, 0xb0, 0x80,
1896 0xf4, 0x90, 0x80, 0x80, 0x5a,
1897 0xf1, 0x90, 0x80, 0x80, 0x7a,
1898 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1899 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1900 };
1901 static const UChar expected_utf16[] = {
1902 0x41, 0xfffd, 0xfffd, 0xfffd,
1903 0x61, 0xfffd, 0xfffd, 0xfffd,
1904 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
1905 0xd900, 0xdc00, 0x7a,
1906 0xd800, 0xdc00, 0xd840, 0xdc00,
1907 0xdb40, 0xdc00, 0xdbff, 0xdfff
1908 };
1909 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1910 UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1911
1912 if(from8 != expected) {
1913 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1914 }
1915 std::string utf8_string((const char *)utf8, sizeof(utf8));
1916 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1917 if(from8b != expected) {
1918 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1919 }
1920
1921 static const UChar utf16[] = {
1922 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1923 };
1924 static const uint8_t expected_utf8[] = {
1925 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1926 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1927 };
1928 UnicodeString us(false, utf16, UPRV_LENGTHOF(utf16));
1929
1930 char buffer[64];
1931 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1932 us.toUTF8(sink);
1933 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1934 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1935 ) {
1936 errln("UnicodeString::toUTF8() did not create the expected string.");
1937 }
1938 if(!sink.calledFlush) {
1939 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1940 }
1941 // Initial contents for testing that toUTF8String() appends.
1942 std::string result8 = "-->";
1943 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1944 // Use the return value just for testing.
1945 std::string &result8r = us.toUTF8String(result8);
1946 if(result8r != expected8 || &result8r != &result8) {
1947 errln("UnicodeString::toUTF8String() did not create the expected string.");
1948 }
1949 }
1950
1951 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1952 static UnicodeString wrapUChars(const UChar *uchars) {
1953 return UnicodeString(true, uchars, -1);
1954 }
1955
1956 void
TestReadOnlyAlias()1957 UnicodeStringTest::TestReadOnlyAlias() {
1958 UChar uchars[]={ 0x61, 0x62, 0 };
1959 UnicodeString alias(true, uchars, 2);
1960 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1961 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1962 return;
1963 }
1964 alias.truncate(1);
1965 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1966 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1967 }
1968 if(alias.getTerminatedBuffer()==uchars) {
1969 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1970 "did not allocate and copy as expected.");
1971 }
1972 if(uchars[1]!=0x62) {
1973 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1974 "modified the original buffer.");
1975 }
1976 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1977 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1978 "does not return a buffer terminated at the proper length.");
1979 }
1980
1981 alias.setTo(true, uchars, 2);
1982 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1983 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1984 return;
1985 }
1986 alias.remove();
1987 if(alias.length()!=0) {
1988 errln("UnicodeString(read-only-alias).remove() did not work.");
1989 }
1990 if(alias.getTerminatedBuffer()==uchars) {
1991 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1992 "did not un-alias as expected.");
1993 }
1994 if(uchars[0]!=0x61) {
1995 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1996 "modified the original buffer.");
1997 }
1998 if(0!=u_strlen(alias.getTerminatedBuffer())) {
1999 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
2000 "does not return a buffer terminated at length 0.");
2001 }
2002
2003 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
2004 alias.setTo(false, longString.getBuffer(), longString.length());
2005 alias.remove(0, 10);
2006 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
2007 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
2008 }
2009 alias.setTo(false, longString.getBuffer(), longString.length());
2010 alias.remove(27, 99);
2011 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2012 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2013 }
2014 alias.setTo(false, longString.getBuffer(), longString.length());
2015 alias.retainBetween(6, 30);
2016 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2017 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2018 }
2019
2020 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2021 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2022
2023 UnicodeString temp;
2024 temp.fastCopyFrom(longString.tempSubString());
2025 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2026 errln("UnicodeString.tempSubString() failed");
2027 }
2028 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2029 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2030 errln("UnicodeString.tempSubString(-3, 5) failed");
2031 }
2032 temp.fastCopyFrom(longString.tempSubString(17));
2033 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2034 errln("UnicodeString.tempSubString(17) failed");
2035 }
2036 temp.fastCopyFrom(longString.tempSubString(99));
2037 if(!temp.isEmpty()) {
2038 errln("UnicodeString.tempSubString(99) failed");
2039 }
2040 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2041 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2042 errln("UnicodeString.tempSubStringBetween(6) failed");
2043 }
2044 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2045 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2046 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2047 }
2048 UnicodeString bogusString;
2049 bogusString.setToBogus();
2050 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2051 if(!temp.isBogus()) {
2052 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2053 }
2054 }
2055
2056 void
doTestAppendable(UnicodeString & dest,Appendable & app)2057 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2058 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2059 static const UChar fg[3]={ 0x66, 0x67, 0 };
2060 if(!app.reserveAppendCapacity(12)) {
2061 errln("Appendable.reserve(12) failed");
2062 }
2063 app.appendCodeUnit(0x61);
2064 app.appendCodePoint(0x62);
2065 app.appendCodePoint(0x50000);
2066 app.appendString(cde, 3);
2067 app.appendString(fg, -1);
2068 UChar scratch[3];
2069 int32_t capacity=-1;
2070 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2071 if(capacity<3) {
2072 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2073 return;
2074 }
2075 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2076 u_memcpy(buffer, hij, 3);
2077 app.appendString(buffer, 3);
2078 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2079 errln("Appendable.append(...) failed");
2080 }
2081 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2082 if(buffer!=NULL || capacity!=0) {
2083 errln("Appendable.getAppendBuffer(min=0) failed");
2084 }
2085 capacity=1;
2086 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2087 if(buffer!=NULL || capacity!=0) {
2088 errln("Appendable.getAppendBuffer(scratch<min) failed");
2089 }
2090 }
2091
2092 class SimpleAppendable : public Appendable {
2093 public:
SimpleAppendable(UnicodeString & dest)2094 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2095 virtual UBool appendCodeUnit(UChar c) override { str.append(c); return true; }
reset()2096 SimpleAppendable &reset() { str.remove(); return *this; }
2097 private:
2098 UnicodeString &str;
2099 };
2100
2101 void
TestAppendable()2102 UnicodeStringTest::TestAppendable() {
2103 UnicodeString dest;
2104 SimpleAppendable app(dest);
2105 doTestAppendable(dest, app);
2106 }
2107
2108 void
TestUnicodeStringImplementsAppendable()2109 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2110 UnicodeString dest;
2111 UnicodeStringAppendable app(dest);
2112 doTestAppendable(dest, app);
2113 }
2114
2115 void
TestSizeofUnicodeString()2116 UnicodeStringTest::TestSizeofUnicodeString() {
2117 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2118 // See the API comments for UNISTR_OBJECT_SIZE.
2119 size_t sizeofUniStr=sizeof(UnicodeString);
2120 size_t expected=UNISTR_OBJECT_SIZE;
2121 if(expected!=sizeofUniStr) {
2122 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2123 // of the compiler might add more internal padding than expected.
2124 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2125 (int)sizeofUniStr, (int)expected);
2126 }
2127 if(sizeofUniStr<32) {
2128 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2129 }
2130 // We assume that the entire UnicodeString object,
2131 // minus the vtable pointer and 2 bytes for flags and short length,
2132 // is available for internal storage of UChars.
2133 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2134 UnicodeString s;
2135 const UChar *emptyBuffer=s.getBuffer();
2136 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2137 s.append((UChar)0x2e);
2138 }
2139 const UChar *fullBuffer=s.getBuffer();
2140 if(fullBuffer!=emptyBuffer) {
2141 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2142 expectedStackBufferLength);
2143 }
2144 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2145 if(terminatedBuffer==emptyBuffer) {
2146 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2147 expectedStackBufferLength);
2148 }
2149 }
2150
2151 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2152 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2153 dest = std::move(src);
2154 }
2155
2156 void
TestMoveSwap()2157 UnicodeStringTest::TestMoveSwap() {
2158 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2159 UnicodeString s1(false, abc, UPRV_LENGTHOF(abc)); // read-only alias
2160 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2161 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2162 const UChar *p = s2.getBuffer();
2163 s1.swap(s2);
2164 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2165 errln("UnicodeString.swap() did not swap");
2166 }
2167 swap(s2, s3);
2168 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2169 errln("swap(UnicodeString) did not swap back");
2170 }
2171 UnicodeString s4;
2172 s4 = std::move(s1);
2173 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2174 errln("UnicodeString = std::move(heap) did not move");
2175 }
2176 UnicodeString s5;
2177 s5 = std::move(s2);
2178 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2179 errln("UnicodeString = std::move(stack) did not move");
2180 }
2181 UnicodeString s6;
2182 s6 = std::move(s3);
2183 if(s6.getBuffer() != abc || s6.length() != 3) {
2184 errln("UnicodeString = std::move(alias) did not move");
2185 }
2186 infoln("TestMoveSwap() with rvalue references");
2187 s1 = static_cast<UnicodeString &&>(s6);
2188 if(s1.getBuffer() != abc || s1.length() != 3) {
2189 errln("UnicodeString move assignment operator did not move");
2190 }
2191 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2192 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2193 errln("UnicodeString move constructor did not move");
2194 }
2195
2196 // Move self assignment leaves the object valid but in an undefined state.
2197 // Do it to make sure there is no crash,
2198 // but do not check for any particular resulting value.
2199 moveFrom(s1, s1);
2200 moveFrom(s2, s2);
2201 moveFrom(s3, s3);
2202 moveFrom(s4, s4);
2203 moveFrom(s5, s5);
2204 moveFrom(s6, s6);
2205 moveFrom(s7, s7);
2206 // Simple copy assignment must work.
2207 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2208 s1 = s6 = s4 = s7 = simple;
2209 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2210 errln("UnicodeString copy after self-move did not work");
2211 }
2212 }
2213
2214 void
TestUInt16Pointers()2215 UnicodeStringTest::TestUInt16Pointers() {
2216 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2217 uint16_t arr[4];
2218
2219 UnicodeString expected(u"abc");
2220 assertEquals("abc from pointer", expected, UnicodeString(carr));
2221 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2222 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2223
2224 UnicodeString alias(arr, 0, 4);
2225 alias.append(u'a').append(u'b').append(u'c');
2226 assertEquals("abc from writable alias", expected, alias);
2227 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2228
2229 UErrorCode errorCode = U_ZERO_ERROR;
2230 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2231 assertSuccess(WHERE, errorCode);
2232 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2233 }
2234
2235 void
TestWCharPointers()2236 UnicodeStringTest::TestWCharPointers() {
2237 #if U_SIZEOF_WCHAR_T==2
2238 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2239 wchar_t arr[4];
2240
2241 UnicodeString expected(u"abc");
2242 assertEquals("abc from pointer", expected, UnicodeString(carr));
2243 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2244 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2245
2246 UnicodeString alias(arr, 0, 4);
2247 alias.append(u'a').append(u'b').append(u'c');
2248 assertEquals("abc from writable alias", expected, alias);
2249 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2250
2251 UErrorCode errorCode = U_ZERO_ERROR;
2252 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2253 assertSuccess(WHERE, errorCode);
2254 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2255 #endif
2256 }
2257
2258 void
TestNullPointers()2259 UnicodeStringTest::TestNullPointers() {
2260 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2261 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2262 assertTrue("empty from read-only-alias nullptr", UnicodeString(true, nullptr, 3).isEmpty());
2263
2264 UnicodeString alias(nullptr, 4, 4); // empty, no alias
2265 assertTrue("empty from writable alias", alias.isEmpty());
2266 alias.append(u'a').append(u'b').append(u'c');
2267 UnicodeString expected(u"abc");
2268 assertEquals("abc from writable alias", expected, alias);
2269
2270 UErrorCode errorCode = U_ZERO_ERROR;
2271 UnicodeString(u"def").extract(nullptr, 0, errorCode);
2272 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2273 }
2274
TestUnicodeStringInsertAppendToSelf()2275 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2276 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2277
2278 // Test append operation
2279 UnicodeString str(u"foo ");
2280 str.append(str);
2281 str.append(str);
2282 str.append(str);
2283 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2284
2285 // Test append operation with readonly alias to start
2286 str = UnicodeString(true, u"foo ", 4);
2287 str.append(str);
2288 str.append(str);
2289 str.append(str);
2290 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2291
2292 // Test append operation with aliased substring
2293 str = u"abcde";
2294 UnicodeString sub = str.tempSubString(1, 2);
2295 str.append(sub);
2296 assertEquals("", u"abcdebc", str);
2297
2298 // Test append operation with double-aliased substring
2299 str = UnicodeString(true, u"abcde", 5);
2300 sub = str.tempSubString(1, 2);
2301 str.append(sub);
2302 assertEquals("", u"abcdebc", str);
2303
2304 // Test insert operation
2305 str = u"a-*b";
2306 str.insert(2, str);
2307 str.insert(4, str);
2308 str.insert(8, str);
2309 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2310
2311 // Test insert operation with readonly alias to start
2312 str = UnicodeString(true, u"a-*b", 4);
2313 str.insert(2, str);
2314 str.insert(4, str);
2315 str.insert(8, str);
2316 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2317
2318 // Test insert operation with aliased substring
2319 str = u"abcde";
2320 sub = str.tempSubString(1, 3);
2321 str.insert(2, sub);
2322 assertEquals("", u"abbcdcde", str);
2323
2324 // Test insert operation with double-aliased substring
2325 str = UnicodeString(true, u"abcde", 5);
2326 sub = str.tempSubString(1, 3);
2327 str.insert(2, sub);
2328 assertEquals("", u"abbcdcde", str);
2329 }
2330
TestLargeAppend()2331 void UnicodeStringTest::TestLargeAppend() {
2332 if(quick) return;
2333
2334 IcuTestErrorCode status(*this, "TestLargeAppend");
2335 // Make a large UnicodeString
2336 int32_t len = 0xAFFFFFF;
2337 UnicodeString str;
2338 char16_t *buf = str.getBuffer(len);
2339 // A fast way to set buffer to valid Unicode.
2340 // 4E4E is a valid unicode character
2341 uprv_memset(buf, 0x4e, len * 2);
2342 str.releaseBuffer(len);
2343 UnicodeString dest;
2344 // Append it 16 times
2345 // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2346 // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2347 int64_t total = 0;
2348 for (int32_t i = 0; i < 16; i++) {
2349 dest.append(str);
2350 total += len;
2351 if (total <= INT32_MAX) {
2352 assertFalse("dest is not bogus", dest.isBogus());
2353 } else {
2354 assertTrue("dest should be bogus", dest.isBogus());
2355 }
2356 }
2357 dest.remove();
2358 total = 0;
2359 for (int32_t i = 0; i < 16; i++) {
2360 dest.append(str);
2361 total += len;
2362 if (total + len <= INT32_MAX) {
2363 assertFalse("dest is not bogus", dest.isBogus());
2364 } else if (total <= INT32_MAX) {
2365 // Check that a string of exactly the maximum size works
2366 UnicodeString str2;
2367 int32_t remain = static_cast<int32_t>(INT32_MAX - total);
2368 char16_t *buf2 = str2.getBuffer(remain);
2369 if (buf2 == nullptr) {
2370 // if somehow memory allocation fail, return the test
2371 return;
2372 }
2373 uprv_memset(buf2, 0x4e, remain * 2);
2374 str2.releaseBuffer(remain);
2375 dest.append(str2);
2376 total += remain;
2377 assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2378 assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2379 assertFalse("dest is not bogus", dest.isBogus());
2380
2381 // Check that a string size+1 goes bogus
2382 str2.truncate(1);
2383 dest.append(str2);
2384 total++;
2385 assertTrue("dest should be bogus", dest.isBogus());
2386 } else {
2387 assertTrue("dest should be bogus", dest.isBogus());
2388 }
2389 }
2390 }
2391