1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include <utility>
10
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24
25 #if 0
26 #include "unicode/ustream.h"
27
28 #include <iostream>
29 using namespace std;
30
31 #endif
32
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34
35 extern IntlTest *createStringCaseTest();
36
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39 if (exec) logln("TestSuite UnicodeStringTest: ");
40 TESTCASE_AUTO_BEGIN;
41 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42 TESTCASE_AUTO(TestBasicManipulation);
43 TESTCASE_AUTO(TestCompare);
44 TESTCASE_AUTO(TestExtract);
45 TESTCASE_AUTO(TestRemoveReplace);
46 TESTCASE_AUTO(TestSearching);
47 TESTCASE_AUTO(TestSpacePadding);
48 TESTCASE_AUTO(TestPrefixAndSuffix);
49 TESTCASE_AUTO(TestFindAndReplace);
50 TESTCASE_AUTO(TestBogus);
51 TESTCASE_AUTO(TestReverse);
52 TESTCASE_AUTO(TestMiscellaneous);
53 TESTCASE_AUTO(TestStackAllocation);
54 TESTCASE_AUTO(TestUnescape);
55 TESTCASE_AUTO(TestCountChar32);
56 TESTCASE_AUTO(TestStringEnumeration);
57 TESTCASE_AUTO(TestNameSpace);
58 TESTCASE_AUTO(TestUTF32);
59 TESTCASE_AUTO(TestUTF8);
60 TESTCASE_AUTO(TestReadOnlyAlias);
61 TESTCASE_AUTO(TestAppendable);
62 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63 TESTCASE_AUTO(TestSizeofUnicodeString);
64 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65 TESTCASE_AUTO(TestMoveSwap);
66 TESTCASE_AUTO(TestUInt16Pointers);
67 TESTCASE_AUTO(TestWCharPointers);
68 TESTCASE_AUTO(TestNullPointers);
69 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70 TESTCASE_AUTO(TestLargeAppend);
71 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 begin */
72 TESTCASE_AUTO(TestLargeMemory);
73 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 end */
74 TESTCASE_AUTO_END;
75 }
76
77 void
TestBasicManipulation()78 UnicodeStringTest::TestBasicManipulation()
79 {
80 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
81 UnicodeString expectedValue;
82 UnicodeString *c;
83
84 c=test1.clone();
85 test1.insert(24, "good ");
86 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
87 if (test1 != expectedValue)
88 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
89
90 c->insert(24, "good ");
91 if(*c != expectedValue) {
92 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
93 }
94 delete c;
95
96 test1.remove(41, 8);
97 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
98 if (test1 != expectedValue)
99 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
100
101 test1.replace(58, 6, "ir country");
102 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
103 if (test1 != expectedValue)
104 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
105
106 UChar temp[80];
107 test1.extract(0, 15, temp);
108
109 UnicodeString test2(temp, 15);
110
111 expectedValue = "Now is the time";
112 if (test2 != expectedValue)
113 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
114
115 test2 += " for me to go!\n";
116 expectedValue = "Now is the time for me to go!\n";
117 if (test2 != expectedValue)
118 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
119
120 if (test1.length() != 70)
121 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
122 if (test2.length() != 30)
123 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
124
125 UnicodeString test3;
126 test3.append((UChar32)0x20402);
127 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
128 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
129 }
130 if(test3.length() != 2){
131 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
132 }
133 test3.append((UChar32)0x0074);
134 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
135 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
136 }
137 if(test3.length() != 3){
138 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
139 }
140
141 // test some UChar32 overloads
142 if( test3.setTo((UChar32)0x10330).length() != 2 ||
143 test3.insert(0, (UChar32)0x20100).length() != 4 ||
144 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
145 (test3 = (UChar32)0x14001).length() != 2
146 ) {
147 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
148 }
149
150 {
151 // test moveIndex32()
152 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
153
154 if(
155 s.moveIndex32(2, -1)!=0 ||
156 s.moveIndex32(2, 1)!=4 ||
157 s.moveIndex32(2, 2)!=5 ||
158 s.moveIndex32(5, -2)!=2 ||
159 s.moveIndex32(0, -1)!=0 ||
160 s.moveIndex32(6, 1)!=6
161 ) {
162 errln("UnicodeString::moveIndex32() failed");
163 }
164
165 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
166 errln("UnicodeString::getChar32Start() failed");
167 }
168
169 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
170 errln("UnicodeString::getChar32Limit() failed");
171 }
172 }
173
174 {
175 // test new 2.2 constructors and setTo function that parallel Java's substring function.
176 UnicodeString src("Hello folks how are you?");
177 UnicodeString target1("how are you?");
178 if (target1 != UnicodeString(src, 12)) {
179 errln("UnicodeString(const UnicodeString&, int32_t) failed");
180 }
181 UnicodeString target2("folks");
182 if (target2 != UnicodeString(src, 6, 5)) {
183 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
184 }
185 if (target1 != target2.setTo(src, 12)) {
186 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
187 }
188 }
189
190 {
191 // op+ is new in ICU 2.8
192 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
193 if(s!=UnicodeString("abcdefghi", "")) {
194 errln("operator+(UniStr, UniStr) failed");
195 }
196 }
197
198 {
199 // tests for Jitterbug 2360
200 // verify that APIs with source pointer + length accept length == -1
201 // mostly test only where modified, only few functions did not already do this
202 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
203 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
204 }
205
206 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
207 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
208
209 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
210 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
211 }
212 if(t.length()!=u_strlen(buffer)) {
213 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
214 }
215
216 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
217 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
218 }
219 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
220 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
221 }
222
223 buffer[u_strlen(buffer)]=0xe4;
224 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
225 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
226 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
227 }
228 if(u.length()!=UPRV_LENGTHOF(buffer)) {
229 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
230 }
231
232 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
233 UConverter *cnv;
234 UErrorCode errorCode=U_ZERO_ERROR;
235
236 cnv=ucnv_open("ISO-8859-1", &errorCode);
237 UnicodeString v(cs, -1, cnv, errorCode);
238 ucnv_close(cnv);
239 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
240 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
241 }
242 }
243
244 #if U_CHARSET_IS_UTF8
245 {
246 // Test the hardcoded-UTF-8 UnicodeString optimizations.
247 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
248 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
249 UnicodeString from8a = UnicodeString((const char *)utf8);
250 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
251 UnicodeString from16(false, utf16, UPRV_LENGTHOF(utf16));
252 if(from8a != from16 || from8b != from16) {
253 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
254 }
255 char buffer[16];
256 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
257 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
258 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
259 }
260 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
261 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
262 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
263 }
264 }
265 #endif
266 }
267
268 void
TestCompare()269 UnicodeStringTest::TestCompare()
270 {
271 UnicodeString test1("this is a test");
272 UnicodeString test2("this is a test");
273 UnicodeString test3("this is a test of the emergency broadcast system");
274 UnicodeString test4("never say, \"this is a test\"!!");
275
276 UnicodeString test5((UChar)0x5000);
277 UnicodeString test6((UChar)0x5100);
278
279 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
280 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
281 char chars[] = "this is a test";
282
283 // test operator== and operator!=
284 if (test1 != test2 || test1 == test3 || test1 == test4)
285 errln("operator== or operator!= failed");
286
287 // test operator> and operator<
288 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
289 !(test5 < test6)
290 ) {
291 errln("operator> or operator< failed");
292 }
293
294 // test operator>= and operator<=
295 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
296 errln("operator>= or operator<= failed");
297
298 // test compare(UnicodeString)
299 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
300 errln("compare(UnicodeString) failed");
301
302 //test compare(offset, length, UnicodeString)
303 if(test1.compare(0, 14, test2) != 0 ||
304 test3.compare(0, 14, test2) != 0 ||
305 test4.compare(12, 14, test2) != 0 ||
306 test3.compare(0, 18, test1) <=0 )
307 errln("compare(offset, length, UnicodeString) fails");
308
309 // test compare(UChar*)
310 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
311 errln("compare(UChar*) failed");
312
313 // test compare(char*)
314 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
315 errln("compare(char*) failed");
316
317 // test compare(UChar*, length)
318 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
319 errln("compare(UChar*, length) failed");
320
321 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
322 if (test1.compare(0, 14, test2, 0, 14) != 0
323 || test1.compare(0, 14, test3, 0, 14) != 0
324 || test1.compare(0, 14, test4, 12, 14) != 0)
325 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
326
327 if (test1.compare(10, 4, test2, 0, 4) >= 0
328 || test1.compare(10, 4, test3, 22, 9) <= 0
329 || test1.compare(10, 4, test4, 22, 4) != 0)
330 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
331
332 // test compareBetween
333 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
334 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
335 errln("compareBetween failed");
336
337 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
338 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
339 errln("compareBetween failed");
340
341 // test compare() etc. with strings that share a buffer but are not equal
342 test2=test1; // share the buffer, length() too large for the stackBuffer
343 test2.truncate(1); // change only the length, not the buffer
344 if( test1==test2 || test1<=test2 ||
345 test1.compare(test2)<=0 ||
346 test1.compareCodePointOrder(test2)<=0 ||
347 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
348 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
349 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
350 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
351 ) {
352 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
353 }
354
355 /* test compareCodePointOrder() */
356 {
357 /* these strings are in ascending order */
358 static const UChar strings[][4]={
359 { 0x61, 0 }, /* U+0061 */
360 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
361 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
362 { 0xd800, 0 }, /* U+d800 */
363 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
364 { 0xdfff, 0 }, /* U+dfff */
365 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
366 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
367 { 0xd800, 0xdc02, 0 }, /* U+10002 */
368 { 0xd84d, 0xdc56, 0 } /* U+23456 */
369 };
370 UnicodeString u[20]; // must be at least as long as strings[]
371 int32_t i;
372
373 for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
374 u[i]=UnicodeString(true, strings[i], -1);
375 }
376
377 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
378 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
379 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
380 }
381 }
382 }
383
384 /* test caseCompare() */
385 {
386 static const UChar
387 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
388 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
389 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
390 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
391
392 UnicodeString
393 mixed(true, _mixed, -1),
394 otherDefault(true, _otherDefault, -1),
395 otherExcludeSpecialI(true, _otherExcludeSpecialI, -1),
396 different(true, _different, -1);
397
398 int8_t result;
399
400 /* test caseCompare() */
401 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
402 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
403 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
404 }
405 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
406 if(result!=0) {
407 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
408 }
409 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
410 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
411 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
412 }
413
414 /* test caseCompare() */
415 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
416 if(result<=0) {
417 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
418 }
419
420 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
421 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
422 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
423 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
424 }
425
426 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
427 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
428 if(result<=0) {
429 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
430 }
431 }
432
433 // test that srcLength=-1 is handled in functions that
434 // take input const UChar */int32_t srcLength (j785)
435 {
436 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
437 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
438
439 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
440 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
441 }
442
443 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
444 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
445 }
446
447 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
448 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
449 }
450
451 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
452 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
453 }
454
455 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
456 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
457 }
458
459 UnicodeString s2, s3;
460 s2.replace(0, 0, u+1, -1);
461 s3.replace(0, 0, u, 1, -1);
462 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
463 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
464 }
465 }
466 }
467
468 void
TestExtract()469 UnicodeStringTest::TestExtract()
470 {
471 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
472 UnicodeString test2;
473 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
475 UnicodeString test5;
476 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
477
478 test1.extract(11, 12, test2);
479 test1.extract(11, 12, test3);
480 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
481 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
482 }
483
484 // test proper pinning in extractBetween()
485 test1.extractBetween(-3, 7, test5);
486 if(test5!=UNICODE_STRING("Now is ", 7)) {
487 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
488 }
489
490 test1.extractBetween(11, 23, test5);
491 if (test1.extract(60, 71, test6) != 9) {
492 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
493 }
494 if (test1.extract(11, 12, test6) != 12) {
495 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
496 }
497
498 // convert test4 back to Unicode for comparison
499 UnicodeString test4b(test4, 12);
500
501 if (test1.extract(11, 12, (char *)NULL) != 12) {
502 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
503 }
504 if (test1.extract(11, -1, test6) != 0) {
505 errln("UnicodeString.extract(-1) failed to stop reading the string.");
506 }
507
508 for (int32_t i = 0; i < 12; i++) {
509 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
510 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
511 break;
512 }
513 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
514 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
515 break;
516 }
517 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
518 errln(UnicodeString("extracting into an array of char failed at position ") + i);
519 break;
520 }
521 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
522 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
523 break;
524 }
525 }
526
527 // test preflighting and overflows with invariant conversion
528 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
529 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
530 }
531
532 test4[2] = (char)0xff;
533 if (test1.extract(0, 10, test4, 2, "") != 10) {
534 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
535 }
536 if (test4[2] != (char)0xff) {
537 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
538 }
539
540 {
541 // test new, NUL-terminating extract() function
542 UnicodeString s("terminate", "");
543 UChar dest[20]={
544 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
545 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
546 };
547 UErrorCode errorCode;
548 int32_t length;
549
550 errorCode=U_ZERO_ERROR;
551 length=s.extract((UChar *)NULL, 0, errorCode);
552 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
553 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
554 }
555
556 errorCode=U_ZERO_ERROR;
557 length=s.extract(dest, s.length()-1, errorCode);
558 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
559 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
560 length, u_errorName(errorCode), s.length());
561 }
562
563 errorCode=U_ZERO_ERROR;
564 length=s.extract(dest, s.length(), errorCode);
565 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
566 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
567 length, u_errorName(errorCode), s.length());
568 }
569 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
570 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
571 }
572
573 errorCode=U_ZERO_ERROR;
574 length=s.extract(dest, s.length()+1, errorCode);
575 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
576 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
577 length, u_errorName(errorCode), s.length());
578 }
579 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
580 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
581 }
582 }
583
584 {
585 // test new UConverter extract() and constructor
586 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
587 char buffer[32];
588 static const char expect[]={
589 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
590 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
591 (char)0xc3, (char)0x84,
592 (char)0xe1, (char)0xbb, (char)0x90
593 };
594 UErrorCode errorCode=U_ZERO_ERROR;
595 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
596 int32_t length;
597
598 if(U_SUCCESS(errorCode)) {
599 // test preflighting
600 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
601 errorCode!=U_BUFFER_OVERFLOW_ERROR
602 ) {
603 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
604 length, u_errorName(errorCode));
605 }
606 errorCode=U_ZERO_ERROR;
607 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
608 errorCode!=U_BUFFER_OVERFLOW_ERROR
609 ) {
610 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
611 length, u_errorName(errorCode));
612 }
613
614 // try error cases
615 errorCode=U_ZERO_ERROR;
616 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
617 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
618 }
619 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
620 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
621 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
622 }
623 errorCode=U_ZERO_ERROR;
624
625 // extract for real
626 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
627 uprv_memcmp(buffer, expect, 13)!=0 ||
628 buffer[13]!=0 ||
629 U_FAILURE(errorCode)
630 ) {
631 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
632 length, u_errorName(errorCode));
633 }
634 // Test again with just the converter name.
635 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
636 uprv_memcmp(buffer, expect, 13)!=0 ||
637 buffer[13]!=0 ||
638 U_FAILURE(errorCode)
639 ) {
640 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
641 length, u_errorName(errorCode));
642 }
643
644 // try the constructor
645 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
646 if(U_FAILURE(errorCode) || s!=t) {
647 errln("UnicodeString(UConverter) conversion failed (%s)",
648 u_errorName(errorCode));
649 }
650
651 ucnv_close(cnv);
652 }
653 }
654 }
655
656 void
TestRemoveReplace()657 UnicodeStringTest::TestRemoveReplace()
658 {
659 UnicodeString test1("The rain in Spain stays mainly on the plain");
660 UnicodeString test2("eat SPAMburgers!");
661 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
662 char test4[] = "SPAM";
663 UnicodeString& test5 = test1;
664
665 test1.replace(4, 4, test2, 4, 4);
666 test1.replace(12, 5, test3, 4);
667 test3[4] = 0;
668 test1.replace(17, 4, test3);
669 test1.replace(23, 4, test4);
670 test1.replaceBetween(37, 42, test2, 4, 8);
671
672 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
673 errln("One of the replace methods failed:\n"
674 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
675 " got \"" + test1 + "\"");
676
677 test1.remove(21, 1);
678 test1.removeBetween(26, 28);
679
680 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
681 errln("One of the remove methods failed:\n"
682 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
683 " got \"" + test1 + "\"");
684
685 for (int32_t i = 0; i < test1.length(); i++) {
686 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
687 test1.setCharAt(i, 0x78);
688 }
689 }
690
691 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
692 errln("One of the remove methods failed:\n"
693 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
694 " got \"" + test1 + "\"");
695
696 test1.remove();
697 if (test1.length() != 0)
698 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
699 }
700
701 void
TestSearching()702 UnicodeStringTest::TestSearching()
703 {
704 UnicodeString test1("test test ttest tetest testesteststt");
705 UnicodeString test2("test");
706 UChar testChar = 0x74;
707
708 UChar32 testChar32 = 0x20402;
709 UChar testData[]={
710 // 0 1 2 3 4 5 6 7
711 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
712
713 // 8 9 10 11 12 13 14 15
714 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
715
716 // 16 17 18 19
717 0xdc02, 0xd841, 0x0073, 0x0000
718 };
719 UnicodeString test3(testData);
720 UnicodeString test4(testChar32);
721
722 uint16_t occurrences = 0;
723 int32_t startPos = 0;
724 for ( ;
725 startPos != -1 && startPos < test1.length();
726 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
727 ;
728 if (occurrences != 6)
729 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
730
731 for ( occurrences = 0, startPos = 10;
732 startPos != -1 && startPos < test1.length();
733 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
734 ;
735 if (occurrences != 4)
736 errln(UnicodeString("indexOf with starting offset failed: "
737 "expected to find 4 occurrences, found ") + occurrences);
738
739 int32_t endPos = 28;
740 for ( occurrences = 0, startPos = 5;
741 startPos != -1 && startPos < test1.length();
742 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
743 ;
744 if (occurrences != 4)
745 errln(UnicodeString("indexOf with starting and ending offsets failed: "
746 "expected to find 4 occurrences, found ") + occurrences);
747
748 //using UChar32 string
749 for ( startPos=0, occurrences=0;
750 startPos != -1 && startPos < test3.length();
751 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
752 ;
753 if (occurrences != 4)
754 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
755
756 for ( startPos=10, occurrences=0;
757 startPos != -1 && startPos < test3.length();
758 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
759 ;
760 if (occurrences != 2)
761 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
762 //---
763
764 for ( occurrences = 0, startPos = 0;
765 startPos != -1 && startPos < test1.length();
766 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
767 ;
768 if (occurrences != 16)
769 errln(UnicodeString("indexOf with character failed: "
770 "expected to find 16 occurrences, found ") + occurrences);
771
772 for ( occurrences = 0, startPos = 10;
773 startPos != -1 && startPos < test1.length();
774 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
775 ;
776 if (occurrences != 12)
777 errln(UnicodeString("indexOf with character & start offset failed: "
778 "expected to find 12 occurrences, found ") + occurrences);
779
780 for ( occurrences = 0, startPos = 5, endPos = 28;
781 startPos != -1 && startPos < test1.length();
782 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
783 ;
784 if (occurrences != 10)
785 errln(UnicodeString("indexOf with character & start & end offsets failed: "
786 "expected to find 10 occurrences, found ") + occurrences);
787
788 //testing for UChar32
789 UnicodeString subString;
790 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
791 subString.append(test3, startPos, test3.length());
792 if(subString.indexOf(testChar32) != -1 ){
793 ++occurrences;
794 }
795 subString.remove();
796 }
797 if (occurrences != 14)
798 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
799
800 for ( occurrences = 0, startPos = 0;
801 startPos != -1 && startPos < test3.length();
802 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
803 ;
804 if (occurrences != 4)
805 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
806
807 endPos=test3.length();
808 for ( occurrences = 0, startPos = 5;
809 startPos != -1 && startPos < test3.length();
810 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
811 ;
812 if (occurrences != 3)
813 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
814 //---
815
816 if(test1.lastIndexOf(test2)!=29) {
817 errln("test1.lastIndexOf(test2)!=29");
818 }
819
820 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
821 errln("test1.lastIndexOf(test2, start) failed");
822 }
823
824 for ( occurrences = 0, startPos = 32;
825 startPos != -1;
826 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
827 ;
828 if (occurrences != 4)
829 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
830 "expected to find 4 occurrences, found ") + occurrences);
831
832 for ( occurrences = 0, startPos = 32;
833 startPos != -1;
834 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
835 ;
836 if (occurrences != 11)
837 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
838 "expected to find 11 occurrences, found ") + occurrences);
839
840 //testing UChar32
841 startPos=test3.length();
842 for ( occurrences = 0;
843 startPos != -1;
844 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
845 ;
846 if (occurrences != 3)
847 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
848
849
850 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
851 subString.remove();
852 subString.append(test3, 0, endPos);
853 if(subString.lastIndexOf(testChar32) != -1 ){
854 ++occurrences;
855 }
856 }
857 if (occurrences != 18)
858 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
859 //---
860
861 // test that indexOf(UChar32) and lastIndexOf(UChar32)
862 // do not find surrogate code points when they are part of matched pairs
863 // (= part of supplementary code points)
864 // Jitterbug 1542
865 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
866 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
867 }
868 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
869 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
870 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
871 ) {
872 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
873 }
874 }
875
876 void
TestSpacePadding()877 UnicodeStringTest::TestSpacePadding()
878 {
879 UnicodeString test1("hello");
880 UnicodeString test2(" there");
881 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
882 UnicodeString test4;
883 UBool returnVal;
884 UnicodeString expectedValue;
885
886 returnVal = test1.padLeading(15);
887 expectedValue = " hello";
888 if (returnVal == false || test1 != expectedValue)
889 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
890
891 returnVal = test2.padTrailing(15);
892 expectedValue = " there ";
893 if (returnVal == false || test2 != expectedValue)
894 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
895
896 expectedValue = test3;
897 returnVal = test3.padTrailing(15);
898 if (returnVal == true || test3 != expectedValue)
899 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
900
901 expectedValue = "hello";
902 test4.setTo(test1).trim();
903
904 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
905 errln("trim(UnicodeString&) failed");
906
907 test1.trim();
908 if (test1 != expectedValue)
909 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
910
911 test2.trim();
912 expectedValue = "there";
913 if (test2 != expectedValue)
914 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
915
916 test3.trim();
917 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
918 if (test3 != expectedValue)
919 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
920
921 returnVal = test1.truncate(15);
922 expectedValue = "hello";
923 if (returnVal == true || test1 != expectedValue)
924 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
925
926 returnVal = test2.truncate(15);
927 expectedValue = "there";
928 if (returnVal == true || test2 != expectedValue)
929 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
930
931 returnVal = test3.truncate(15);
932 expectedValue = "Hi! How ya doi";
933 if (returnVal == false || test3 != expectedValue)
934 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
935 }
936
937 void
TestPrefixAndSuffix()938 UnicodeStringTest::TestPrefixAndSuffix()
939 {
940 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
941 UnicodeString test2("Now");
942 UnicodeString test3("country.");
943 UnicodeString test4("count");
944
945 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
946 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
947 }
948
949 if (test1.startsWith(test3) ||
950 test1.startsWith(test3.getBuffer(), test3.length()) ||
951 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
952 ) {
953 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
954 }
955
956 if (test1.endsWith(test2)) {
957 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
958 }
959
960 if (!test1.endsWith(test3)) {
961 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962 }
963 if (!test1.endsWith(test3, 0, INT32_MAX)) {
964 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
965 }
966
967 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
968 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969 }
970 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
971 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
972 }
973
974 if (!test3.startsWith(test4)) {
975 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
976 }
977
978 if (test4.startsWith(test3)) {
979 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
980 }
981 }
982
983 void
TestStartsWithAndEndsWithNulTerminated()984 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
985 UnicodeString test("abcde");
986 const UChar ab[] = { 0x61, 0x62, 0 };
987 const UChar de[] = { 0x64, 0x65, 0 };
988 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
989 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
990 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
991 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
992 }
993
994 void
TestFindAndReplace()995 UnicodeStringTest::TestFindAndReplace()
996 {
997 UnicodeString test1("One potato, two potato, three potato, four\n");
998 UnicodeString test2("potato");
999 UnicodeString test3("MISSISSIPPI");
1000
1001 UnicodeString expectedValue;
1002
1003 test1.findAndReplace(test2, test3);
1004 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1005 if (test1 != expectedValue)
1006 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1007 test1.findAndReplace(2, 32, test3, test2);
1008 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1009 if (test1 != expectedValue)
1010 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1011 }
1012
1013 void
TestReverse()1014 UnicodeStringTest::TestReverse()
1015 {
1016 UnicodeString test("backwards words say to used I");
1017
1018 test.reverse();
1019 test.reverse(2, 4);
1020 test.reverse(7, 2);
1021 test.reverse(10, 3);
1022 test.reverse(14, 5);
1023 test.reverse(20, 9);
1024
1025 if (test != "I used to say words backwards")
1026 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1027 + test + "\"");
1028
1029 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1030 test.reverse();
1031 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1032 errln("reverse() failed with supplementary characters");
1033 }
1034
1035 // Test case for ticket #8091:
1036 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1037 // an odd-length string that contains no other lead surrogates.
1038 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1039 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1040 test.reverse();
1041 if(test!=expected) {
1042 errln("reverse() failed with only lead surrogate in the middle");
1043 }
1044 }
1045
1046 void
TestMiscellaneous()1047 UnicodeStringTest::TestMiscellaneous()
1048 {
1049 UnicodeString test1("This is a test");
1050 UnicodeString test2("This is a test");
1051 UnicodeString test3("Me too!");
1052
1053 // test getBuffer(minCapacity) and releaseBuffer()
1054 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1055 UChar *p=test1.getBuffer(20);
1056 if(test1.getCapacity()<20) {
1057 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1058 }
1059
1060 test1.append((UChar)7); // must not be able to modify the string here
1061 test1.setCharAt(3, 7);
1062 test1.reverse();
1063 if( test1.length()!=0 ||
1064 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1065 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1066 ) {
1067 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1068 }
1069
1070 p[0]=1;
1071 p[1]=2;
1072 p[2]=3;
1073 test1.releaseBuffer(3);
1074 test1.append((UChar)4);
1075
1076 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1077 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1078 }
1079
1080 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1081 test1.releaseBuffer(1);
1082 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1083 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1084 }
1085
1086 // test getBuffer(const)
1087 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1088 if( test1.length()!=4 ||
1089 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1090 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1091 ) {
1092 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1093 }
1094
1095 // test releaseBuffer() with a NUL-terminated buffer
1096 test1.getBuffer(20)[2]=0;
1097 test1.releaseBuffer(); // implicit -1
1098 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1099 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1100 }
1101
1102 // test releaseBuffer() with a non-NUL-terminated buffer
1103 p=test1.getBuffer(256);
1104 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1105 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1106 }
1107 test1.releaseBuffer(); // implicit -1
1108 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1109 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1110 }
1111
1112 // test getTerminatedBuffer()
1113 test1=UnicodeString("This is another test.", "");
1114 test2=UnicodeString("This is another test.", "");
1115 q=test1.getTerminatedBuffer();
1116 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1117 errln("getTerminatedBuffer()[length]!=0");
1118 }
1119
1120 const UChar u[]={ 5, 6, 7, 8, 0 };
1121 test1.setTo(false, u, 3);
1122 q=test1.getTerminatedBuffer();
1123 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1124 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1125 }
1126
1127 test1.setTo(true, u, -1);
1128 q=test1.getTerminatedBuffer();
1129 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1130 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1131 }
1132
1133 // NOTE: Some compilers will optimize u"la" to point to the same static memory
1134 // as u" lila", offset by 3 code units
1135 test1=UnicodeString(true, u"la", 2);
1136 test1.append(UnicodeString(true, u" lila", 5).getTerminatedBuffer(), 0, -1);
1137 assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1138 u"la lila", test1);
1139
1140 test1.insert(3, UnicodeString(true, u"dudum ", 6), 0, INT32_MAX);
1141 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1142 u"la dudum lila", test1);
1143
1144 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1145 test1.insert(9, ucs, -1);
1146 assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1147 u"la dudum hm lila", test1);
1148
1149 test1.replace(9, 2, (UChar)0x2b);
1150 assertEquals("UnicodeString::replace(start, length, UChar) failed",
1151 u"la dudum + lila", test1);
1152
1153 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1154 errln("UnicodeString::hasMetaData() returns true");
1155 }
1156
1157 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1158 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1159 test1.truncate(36); // ensure length()<getCapacity()
1160 test2=test1; // share the buffer
1161 test1.truncate(5);
1162 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1163 errln("UnicodeString(shared buffer).truncate() failed");
1164 }
1165 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1166 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1167 "modified another copy of the string!");
1168 }
1169 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1170 test1.truncate(36); // ensure length()<getCapacity()
1171 test2=test1; // share the buffer
1172 test1.remove();
1173 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1174 errln("UnicodeString(shared buffer).remove() failed");
1175 }
1176 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1177 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1178 "modified another copy of the string!");
1179 }
1180
1181 // ticket #9740
1182 test1.setTo(true, ucs, 3);
1183 assertEquals("length of read-only alias", 3, test1.length());
1184 test1.trim();
1185 assertEquals("length of read-only alias after trim()", 2, test1.length());
1186 assertEquals("length of terminated buffer of read-only alias + trim()",
1187 2, u_strlen(test1.getTerminatedBuffer()));
1188 }
1189
1190 void
TestStackAllocation()1191 UnicodeStringTest::TestStackAllocation()
1192 {
1193 UChar testString[] ={
1194 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1195 UChar guardWord = 0x4DED;
1196 UnicodeString* test = 0;
1197
1198 test = new UnicodeString(testString);
1199 if (*test != "This is a crazy test.")
1200 errln("Test string failed to initialize properly.");
1201 if (guardWord != 0x04DED)
1202 errln("Test string initialization overwrote guard word!");
1203
1204 test->insert(8, "only ");
1205 test->remove(15, 6);
1206 if (*test != "This is only a test.")
1207 errln("Manipulation of test string failed to work right.");
1208 if (guardWord != 0x4DED)
1209 errln("Manipulation of test string overwrote guard word!");
1210
1211 // we have to deinitialize and release the backing store by calling the destructor
1212 // explicitly, since we can't overload operator delete
1213 delete test;
1214
1215 UChar workingBuffer[] = {
1216 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1217 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1218 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1221 UChar guardWord2 = 0x4DED;
1222
1223 test = new UnicodeString(workingBuffer, 35, 100);
1224 if (*test != "Now is the time for all men to come")
1225 errln("Stack-allocated backing store failed to initialize correctly.");
1226 if (guardWord2 != 0x4DED)
1227 errln("Stack-allocated backing store overwrote guard word!");
1228
1229 test->insert(24, "good ");
1230 if (*test != "Now is the time for all good men to come")
1231 errln("insert() on stack-allocated UnicodeString didn't work right");
1232 if (guardWord2 != 0x4DED)
1233 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1234
1235 if (workingBuffer[24] != 0x67)
1236 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1237
1238 *test += " to the aid of their country.";
1239 if (*test != "Now is the time for all good men to come to the aid of their country.")
1240 errln("Stack-allocated UnicodeString overflow didn't work");
1241 if (guardWord2 != 0x4DED)
1242 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1243
1244 *test = "ha!";
1245 if (*test != "ha!")
1246 errln("Assignment to stack-allocated UnicodeString didn't work");
1247 if (workingBuffer[0] != 0x4e)
1248 errln("Change to UnicodeString after overflow are still affecting original buffer");
1249 if (guardWord2 != 0x4DED)
1250 errln("Change to UnicodeString after overflow overwrote guard word!");
1251
1252 // test read-only aliasing with setTo()
1253 workingBuffer[0] = 0x20ac;
1254 workingBuffer[1] = 0x125;
1255 workingBuffer[2] = 0;
1256 test->setTo(true, workingBuffer, 2);
1257 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1258 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1259 }
1260
1261 UnicodeString *c=test->clone();
1262
1263 workingBuffer[1] = 0x109;
1264 if(test->charAt(1) != 0x109) {
1265 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1266 }
1267
1268 if(c->length() != 2 || c->charAt(1) != 0x125) {
1269 errln("clone(alias) did not copy the buffer");
1270 }
1271 delete c;
1272
1273 test->setTo(true, workingBuffer, -1);
1274 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1275 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1276 }
1277
1278 test->setTo(false, workingBuffer, -1);
1279 if(!test->isBogus()) {
1280 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1281 }
1282
1283 delete test;
1284
1285 test=new UnicodeString();
1286 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1287 test->setTo(buffer, 4, 10);
1288 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1289 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1290 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1291 }
1292 delete test;
1293
1294
1295 // test the UChar32 constructor
1296 UnicodeString c32Test((UChar32)0x10ff2a);
1297 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1298 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1299 ) {
1300 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1301 }
1302
1303 // test the (new) capacity constructor
1304 UnicodeString capTest(5, (UChar32)0x2a, 5);
1305 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1306 capTest.char32At(0) != 0x2a ||
1307 capTest.char32At(4) != 0x2a
1308 ) {
1309 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1310 }
1311
1312 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1313 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1314 capTest.char32At(0) != 0x10ff2a ||
1315 capTest.char32At(4) != 0x10ff2a
1316 ) {
1317 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1318 }
1319
1320 capTest = UnicodeString(5, (UChar32)0, 0);
1321 if(capTest.length() != 0) {
1322 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1323 }
1324 }
1325
1326 /**
1327 * Test the unescape() function.
1328 */
TestUnescape(void)1329 void UnicodeStringTest::TestUnescape(void) {
1330 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1331 UnicodeString OUT("abc");
1332 OUT.append((UChar)0x4567);
1333 OUT.append(" ");
1334 OUT.append((UChar)0xA);
1335 OUT.append((UChar)0xD);
1336 OUT.append(" ");
1337 OUT.append((UChar32)0x00101234);
1338 OUT.append("xyz");
1339 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1340 UnicodeString result = IN.unescape();
1341 if (result != OUT) {
1342 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1343 prettify(result) + ", expected " +
1344 prettify(OUT));
1345 }
1346
1347 // test that an empty string is returned in case of an error
1348 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1349 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1350 }
1351
1352 // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1353 UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1354 assertEquals("ASCII Euro", u"€", euro);
1355 UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1356 assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1357 }
1358
1359 /* test code point counting functions --------------------------------------- */
1360
1361 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1362 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1363 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1364 int32_t count=s.countChar32(start, length);
1365 return count>number;
1366 }
1367
1368 /* compare the real function against the reference */
1369 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1370 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1371 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1372 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1373 start, length, number, s.hasMoreChar32Than(start, length, number));
1374 }
1375 }
1376
1377 void
TestCountChar32(void)1378 UnicodeStringTest::TestCountChar32(void) {
1379 {
1380 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1381
1382 // test countChar32()
1383 // note that this also calls and tests u_countChar32(length>=0)
1384 if(
1385 s.countChar32()!=4 ||
1386 s.countChar32(1)!=4 ||
1387 s.countChar32(2)!=3 ||
1388 s.countChar32(2, 3)!=2 ||
1389 s.countChar32(2, 0)!=0
1390 ) {
1391 errln("UnicodeString::countChar32() failed");
1392 }
1393
1394 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1395 const UChar *buffer=s.getTerminatedBuffer();
1396 if(
1397 u_countChar32(buffer, -1)!=4 ||
1398 u_countChar32(buffer+1, -1)!=4 ||
1399 u_countChar32(buffer+2, -1)!=3 ||
1400 u_countChar32(buffer+3, -1)!=3 ||
1401 u_countChar32(buffer+4, -1)!=2 ||
1402 u_countChar32(buffer+5, -1)!=1 ||
1403 u_countChar32(buffer+6, -1)!=0
1404 ) {
1405 errln("u_countChar32(length=-1) failed");
1406 }
1407
1408 // test u_countChar32() with bad input
1409 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1410 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1411 }
1412 }
1413
1414 /* test data and variables for hasMoreChar32Than() */
1415 static const UChar str[]={
1416 0x61, 0x62, 0xd800, 0xdc00,
1417 0xd801, 0xdc01, 0x63, 0xd802,
1418 0x64, 0xdc03, 0x65, 0x66,
1419 0xd804, 0xdc04, 0xd805, 0xdc05,
1420 0x67
1421 };
1422 UnicodeString string(str, UPRV_LENGTHOF(str));
1423 int32_t start, length, number;
1424
1425 /* test hasMoreChar32Than() */
1426 for(length=string.length(); length>=0; --length) {
1427 for(start=0; start<=length; ++start) {
1428 for(number=-1; number<=((length-start)+2); ++number) {
1429 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1430 }
1431 }
1432 }
1433
1434 /* test hasMoreChar32Than() with pinning */
1435 for(start=-1; start<=string.length()+1; ++start) {
1436 for(number=-1; number<=((string.length()-start)+2); ++number) {
1437 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1438 }
1439 }
1440
1441 /* test hasMoreChar32Than() with a bogus string */
1442 string.setToBogus();
1443 for(length=-1; length<=1; ++length) {
1444 for(start=-1; start<=length; ++start) {
1445 for(number=-1; number<=((length-start)+2); ++number) {
1446 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1447 }
1448 }
1449 }
1450 }
1451
1452 void
TestBogus()1453 UnicodeStringTest::TestBogus() {
1454 UnicodeString test1("This is a test");
1455 UnicodeString test2("This is a test");
1456 UnicodeString test3("Me too!");
1457
1458 // test isBogus() and setToBogus()
1459 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1460 errln("A string returned true for isBogus()!");
1461 }
1462
1463 // NULL pointers are treated like empty strings
1464 // use other illegal arguments to make a bogus string
1465 test3.setTo(false, test1.getBuffer(), -2);
1466 if(!test3.isBogus()) {
1467 errln("A bogus string returned false for isBogus()!");
1468 }
1469 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1470 errln("hashCode() failed");
1471 }
1472 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1473 errln("bogus.getBuffer()!=0");
1474 }
1475 if (test1.indexOf(test3) != -1) {
1476 errln("bogus.indexOf() != -1");
1477 }
1478 if (test1.lastIndexOf(test3) != -1) {
1479 errln("bogus.lastIndexOf() != -1");
1480 }
1481 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1482 errln("caseCompare() doesn't work with bogus strings");
1483 }
1484 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1485 errln("compareCodePointOrder() doesn't work with bogus strings");
1486 }
1487
1488 // verify that non-assignment modifications fail and do not revive a bogus string
1489 test3.setToBogus();
1490 test3.append((UChar)0x61);
1491 if(!test3.isBogus() || test3.getBuffer()!=0) {
1492 errln("bogus.append('a') worked but must not");
1493 }
1494
1495 test3.setToBogus();
1496 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1497 if(!test3.isBogus() || test3.getBuffer()!=0) {
1498 errln("bogus.findAndReplace() worked but must not");
1499 }
1500
1501 test3.setToBogus();
1502 test3.trim();
1503 if(!test3.isBogus() || test3.getBuffer()!=0) {
1504 errln("bogus.trim() revived bogus but must not");
1505 }
1506
1507 test3.setToBogus();
1508 test3.remove(1);
1509 if(!test3.isBogus() || test3.getBuffer()!=0) {
1510 errln("bogus.remove(1) revived bogus but must not");
1511 }
1512
1513 test3.setToBogus();
1514 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1515 errln("bogus.setCharAt(0, 'b') worked but must not");
1516 }
1517
1518 test3.setToBogus();
1519 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1520 errln("bogus.truncate(1) revived bogus but must not");
1521 }
1522
1523 // verify that assignments revive a bogus string
1524 test3.setToBogus();
1525 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1526 errln("bogus.operator=() failed");
1527 }
1528
1529 test3.setToBogus();
1530 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1531 errln("bogus.fastCopyFrom() failed");
1532 }
1533
1534 test3.setToBogus();
1535 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1536 errln("bogus.setTo(UniStr) failed");
1537 }
1538
1539 test3.setToBogus();
1540 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1541 errln("bogus.setTo(UniStr, 0) failed");
1542 }
1543
1544 test3.setToBogus();
1545 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1546 errln("bogus.setTo(UniStr, 0, len) failed");
1547 }
1548
1549 test3.setToBogus();
1550 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1551 errln("bogus.setTo(const UChar *, len) failed");
1552 }
1553
1554 test3.setToBogus();
1555 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1556 errln("bogus.setTo(UChar) failed");
1557 }
1558
1559 test3.setToBogus();
1560 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1561 errln("bogus.setTo(UChar32) failed");
1562 }
1563
1564 test3.setToBogus();
1565 if(!test3.isBogus() || test3.setTo(false, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1566 errln("bogus.setTo(readonly alias) failed");
1567 }
1568
1569 // writable alias to another string's buffer: very bad idea, just convenient for this test
1570 test3.setToBogus();
1571 if(!test3.isBogus() ||
1572 test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1573 test1.length(), test1.getCapacity()).isBogus() ||
1574 test3!=test1) {
1575 errln("bogus.setTo(writable alias) failed");
1576 }
1577
1578 // verify simple, documented ways to turn a bogus string into an empty one
1579 test3.setToBogus();
1580 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1581 errln("bogus.operator=(UnicodeString()) failed");
1582 }
1583
1584 test3.setToBogus();
1585 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1586 errln("bogus.setTo(UnicodeString()) failed");
1587 }
1588
1589 test3.setToBogus();
1590 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1591 errln("bogus.remove() failed");
1592 }
1593
1594 test3.setToBogus();
1595 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1596 errln("bogus.remove(0, INT32_MAX) failed");
1597 }
1598
1599 test3.setToBogus();
1600 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1601 errln("bogus.truncate(0) failed");
1602 }
1603
1604 test3.setToBogus();
1605 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1606 errln("bogus.setTo((UChar32)-1) failed");
1607 }
1608
1609 static const UChar nul=0;
1610
1611 test3.setToBogus();
1612 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1613 errln("bogus.setTo(&nul, 0) failed");
1614 }
1615
1616 test3.setToBogus();
1617 if(!test3.isBogus() || test3.getBuffer()!=0) {
1618 errln("setToBogus() failed to make a string bogus");
1619 }
1620
1621 test3.setToBogus();
1622 if(test1.isBogus() || !(test1=test3).isBogus()) {
1623 errln("normal=bogus failed to make the left string bogus");
1624 }
1625
1626 // test that NULL primitive input string values are treated like
1627 // empty strings, not errors (bogus)
1628 test2.setTo((UChar32)0x10005);
1629 if(test2.insert(1, nullptr, 1).length()!=2) {
1630 errln("UniStr.insert(...nullptr...) should not modify the string but does");
1631 }
1632
1633 UErrorCode errorCode=U_ZERO_ERROR;
1634 UnicodeString
1635 test4((const UChar *)NULL),
1636 test5(true, (const UChar *)NULL, 1),
1637 test6((UChar *)NULL, 5, 5),
1638 test7((const char *)NULL, 3, NULL, errorCode);
1639 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1640 errln("a constructor set to bogus for a NULL input string, should be empty");
1641 }
1642
1643 test4.setTo(NULL, 3);
1644 test5.setTo(true, (const UChar *)NULL, 1);
1645 test6.setTo((UChar *)NULL, 5, 5);
1646 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1647 errln("a setTo() set to bogus for a NULL input string, should be empty");
1648 }
1649
1650 // test that bogus==bogus<any
1651 if(test1!=test3 || test1.compare(test3)!=0) {
1652 errln("bogus==bogus failed");
1653 }
1654
1655 test2.remove();
1656 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1657 errln("bogus<empty failed");
1658 }
1659
1660 // test that copy constructor of bogus is bogus & clone of bogus is nullptr
1661 {
1662 test3.setToBogus();
1663 UnicodeString test3Copy(test3);
1664 UnicodeString *test3Clone = test3.clone();
1665 assertTrue(WHERE, test3.isBogus());
1666 assertTrue(WHERE, test3Copy.isBogus());
1667 assertTrue(WHERE, test3Clone == nullptr);
1668 }
1669 }
1670
1671 // StringEnumeration ------------------------------------------------------- ***
1672 // most of StringEnumeration is tested elsewhere
1673 // this test improves code coverage
1674
1675 static const char *const
1676 testEnumStrings[]={
1677 "a",
1678 "b",
1679 "c",
1680 "this is a long string which helps us test some buffer limits",
1681 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1682 };
1683
1684 class TestEnumeration : public StringEnumeration {
1685 public:
TestEnumeration()1686 TestEnumeration() : i(0) {}
1687
count(UErrorCode &) const1688 virtual int32_t count(UErrorCode& /*status*/) const override {
1689 return UPRV_LENGTHOF(testEnumStrings);
1690 }
1691
snext(UErrorCode & status)1692 virtual const UnicodeString *snext(UErrorCode &status) override {
1693 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1694 unistr=UnicodeString(testEnumStrings[i++], "");
1695 return &unistr;
1696 }
1697
1698 return NULL;
1699 }
1700
reset(UErrorCode &)1701 virtual void reset(UErrorCode& /*status*/) override {
1702 i=0;
1703 }
1704
getStaticClassID()1705 static inline UClassID getStaticClassID() {
1706 return (UClassID)&fgClassID;
1707 }
getDynamicClassID() const1708 virtual UClassID getDynamicClassID() const override {
1709 return getStaticClassID();
1710 }
1711
1712 private:
1713 static const char fgClassID;
1714
1715 int32_t i;
1716 };
1717
1718 const char TestEnumeration::fgClassID=0;
1719
1720 void
TestStringEnumeration()1721 UnicodeStringTest::TestStringEnumeration() {
1722 UnicodeString s;
1723 TestEnumeration ten;
1724 int32_t i, length;
1725 UErrorCode status;
1726
1727 const UChar *pu;
1728 const char *pc;
1729
1730 // test the next() default implementation and ensureCharsCapacity()
1731 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1732 status=U_ZERO_ERROR;
1733 pc=ten.next(&length, status);
1734 s=UnicodeString(testEnumStrings[i], "");
1735 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1736 errln("StringEnumeration.next(%d) failed", i);
1737 }
1738 }
1739 status=U_ZERO_ERROR;
1740 if(ten.next(&length, status)!=NULL) {
1741 errln("StringEnumeration.next(done)!=NULL");
1742 }
1743
1744 // test the unext() default implementation
1745 ten.reset(status);
1746 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1747 status=U_ZERO_ERROR;
1748 pu=ten.unext(&length, status);
1749 s=UnicodeString(testEnumStrings[i], "");
1750 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1751 errln("StringEnumeration.unext(%d) failed", i);
1752 }
1753 }
1754 status=U_ZERO_ERROR;
1755 if(ten.unext(&length, status)!=NULL) {
1756 errln("StringEnumeration.unext(done)!=NULL");
1757 }
1758
1759 // test that the default clone() implementation works, and returns NULL
1760 if(ten.clone()!=NULL) {
1761 errln("StringEnumeration.clone()!=NULL");
1762 }
1763
1764 // test that uenum_openFromStringEnumeration() works
1765 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1766 StringEnumeration *newTen = new TestEnumeration;
1767 status=U_ZERO_ERROR;
1768 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1769 if (uten==NULL || U_FAILURE(status)) {
1770 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1771 return;
1772 }
1773
1774 // test uenum_next()
1775 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1776 status=U_ZERO_ERROR;
1777 pc=uenum_next(uten, &length, &status);
1778 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1779 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1780 }
1781 }
1782 status=U_ZERO_ERROR;
1783 if(uenum_next(uten, &length, &status)!=NULL) {
1784 errln("File %s, line %d, uenum_next(done)!=NULL");
1785 }
1786
1787 // test the uenum_unext()
1788 uenum_reset(uten, &status);
1789 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1790 status=U_ZERO_ERROR;
1791 pu=uenum_unext(uten, &length, &status);
1792 s=UnicodeString(testEnumStrings[i], "");
1793 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(true, pu, length)!=s) {
1794 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1795 }
1796 }
1797 status=U_ZERO_ERROR;
1798 if(uenum_unext(uten, &length, &status)!=NULL) {
1799 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1800 }
1801
1802 uenum_close(uten);
1803 }
1804
1805 /*
1806 * Namespace test, to make sure that macros like UNICODE_STRING include the
1807 * namespace qualifier.
1808 *
1809 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1810 */
1811 namespace bogus {
1812 class UnicodeString {
1813 public:
1814 enum EInvariant { kInvariant };
UnicodeString()1815 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1816 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1817 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1818 ) : i(length) {}
1819 private:
1820 int32_t i;
1821 };
1822 }
1823
1824 void
TestNameSpace()1825 UnicodeStringTest::TestNameSpace() {
1826 // Provoke name collision unless the UnicodeString macros properly
1827 // qualify the icu::UnicodeString class.
1828 using namespace bogus;
1829
1830 // Use all UnicodeString macros from unistr.h.
1831 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1832 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1833 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1834
1835 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1836 icu::UnicodeString s4=s1+s2+s3;
1837 if(s4.length()!=9) {
1838 errln("Something wrong with UnicodeString::operator+().");
1839 }
1840 }
1841
1842 void
TestUTF32()1843 UnicodeStringTest::TestUTF32() {
1844 // Input string length US_STACKBUF_SIZE to cause overflow of the
1845 // initially chosen fStackBuffer due to supplementary characters.
1846 static const UChar32 utf32[] = {
1847 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1848 0x10000, 0x20000, 0xe0000, 0x10ffff
1849 };
1850 static const UChar expected_utf16[] = {
1851 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1852 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1853 };
1854 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1855 UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1856 if(from32 != expected) {
1857 errln("UnicodeString::fromUTF32() did not create the expected string.");
1858 }
1859
1860 static const UChar utf16[] = {
1861 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1862 };
1863 static const UChar32 expected_utf32[] = {
1864 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1865 };
1866 UChar32 result32[16];
1867 UErrorCode errorCode = U_ZERO_ERROR;
1868 int32_t length32 =
1869 UnicodeString(false, utf16, UPRV_LENGTHOF(utf16)).
1870 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1871 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1872 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1873 result32[length32] != 0
1874 ) {
1875 errln("UnicodeString::toUTF32() did not create the expected string.");
1876 }
1877 }
1878
1879 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1880 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1881 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1882 : CheckedArrayByteSink(outbuf, capacity), calledFlush(false) {}
Flush()1883 virtual void Flush() override { calledFlush = true; }
1884 UBool calledFlush;
1885 };
1886
1887 void
TestUTF8()1888 UnicodeStringTest::TestUTF8() {
1889 static const uint8_t utf8[] = {
1890 // Code points:
1891 // 0x41, 0xd900,
1892 // 0x61, 0xdc00,
1893 // 0x110000, 0x5a,
1894 // 0x50000, 0x7a,
1895 // 0x10000, 0x20000,
1896 // 0xe0000, 0x10ffff
1897 0x41, 0xed, 0xa4, 0x80,
1898 0x61, 0xed, 0xb0, 0x80,
1899 0xf4, 0x90, 0x80, 0x80, 0x5a,
1900 0xf1, 0x90, 0x80, 0x80, 0x7a,
1901 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1902 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1903 };
1904 static const UChar expected_utf16[] = {
1905 0x41, 0xfffd, 0xfffd, 0xfffd,
1906 0x61, 0xfffd, 0xfffd, 0xfffd,
1907 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
1908 0xd900, 0xdc00, 0x7a,
1909 0xd800, 0xdc00, 0xd840, 0xdc00,
1910 0xdb40, 0xdc00, 0xdbff, 0xdfff
1911 };
1912 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1913 UnicodeString expected(false, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1914
1915 if(from8 != expected) {
1916 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1917 }
1918 std::string utf8_string((const char *)utf8, sizeof(utf8));
1919 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1920 if(from8b != expected) {
1921 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1922 }
1923
1924 static const UChar utf16[] = {
1925 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1926 };
1927 static const uint8_t expected_utf8[] = {
1928 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1929 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1930 };
1931 UnicodeString us(false, utf16, UPRV_LENGTHOF(utf16));
1932
1933 char buffer[64];
1934 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1935 us.toUTF8(sink);
1936 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1937 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1938 ) {
1939 errln("UnicodeString::toUTF8() did not create the expected string.");
1940 }
1941 if(!sink.calledFlush) {
1942 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1943 }
1944 // Initial contents for testing that toUTF8String() appends.
1945 std::string result8 = "-->";
1946 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1947 // Use the return value just for testing.
1948 std::string &result8r = us.toUTF8String(result8);
1949 if(result8r != expected8 || &result8r != &result8) {
1950 errln("UnicodeString::toUTF8String() did not create the expected string.");
1951 }
1952 }
1953
1954 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1955 static UnicodeString wrapUChars(const UChar *uchars) {
1956 return UnicodeString(true, uchars, -1);
1957 }
1958
1959 void
TestReadOnlyAlias()1960 UnicodeStringTest::TestReadOnlyAlias() {
1961 UChar uchars[]={ 0x61, 0x62, 0 };
1962 UnicodeString alias(true, uchars, 2);
1963 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1964 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1965 return;
1966 }
1967 alias.truncate(1);
1968 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1969 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1970 }
1971 if(alias.getTerminatedBuffer()==uchars) {
1972 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1973 "did not allocate and copy as expected.");
1974 }
1975 if(uchars[1]!=0x62) {
1976 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1977 "modified the original buffer.");
1978 }
1979 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1980 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1981 "does not return a buffer terminated at the proper length.");
1982 }
1983
1984 alias.setTo(true, uchars, 2);
1985 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1986 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1987 return;
1988 }
1989 alias.remove();
1990 if(alias.length()!=0) {
1991 errln("UnicodeString(read-only-alias).remove() did not work.");
1992 }
1993 if(alias.getTerminatedBuffer()==uchars) {
1994 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1995 "did not un-alias as expected.");
1996 }
1997 if(uchars[0]!=0x61) {
1998 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1999 "modified the original buffer.");
2000 }
2001 if(0!=u_strlen(alias.getTerminatedBuffer())) {
2002 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
2003 "does not return a buffer terminated at length 0.");
2004 }
2005
2006 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
2007 alias.setTo(false, longString.getBuffer(), longString.length());
2008 alias.remove(0, 10);
2009 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
2010 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
2011 }
2012 alias.setTo(false, longString.getBuffer(), longString.length());
2013 alias.remove(27, 99);
2014 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2015 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2016 }
2017 alias.setTo(false, longString.getBuffer(), longString.length());
2018 alias.retainBetween(6, 30);
2019 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2020 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2021 }
2022
2023 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2024 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2025
2026 UnicodeString temp;
2027 temp.fastCopyFrom(longString.tempSubString());
2028 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2029 errln("UnicodeString.tempSubString() failed");
2030 }
2031 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2032 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2033 errln("UnicodeString.tempSubString(-3, 5) failed");
2034 }
2035 temp.fastCopyFrom(longString.tempSubString(17));
2036 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2037 errln("UnicodeString.tempSubString(17) failed");
2038 }
2039 temp.fastCopyFrom(longString.tempSubString(99));
2040 if(!temp.isEmpty()) {
2041 errln("UnicodeString.tempSubString(99) failed");
2042 }
2043 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2044 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2045 errln("UnicodeString.tempSubStringBetween(6) failed");
2046 }
2047 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2048 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2049 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2050 }
2051 UnicodeString bogusString;
2052 bogusString.setToBogus();
2053 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2054 if(!temp.isBogus()) {
2055 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2056 }
2057 }
2058
2059 void
doTestAppendable(UnicodeString & dest,Appendable & app)2060 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2061 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2062 static const UChar fg[3]={ 0x66, 0x67, 0 };
2063 if(!app.reserveAppendCapacity(12)) {
2064 errln("Appendable.reserve(12) failed");
2065 }
2066 app.appendCodeUnit(0x61);
2067 app.appendCodePoint(0x62);
2068 app.appendCodePoint(0x50000);
2069 app.appendString(cde, 3);
2070 app.appendString(fg, -1);
2071 UChar scratch[3];
2072 int32_t capacity=-1;
2073 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2074 if(capacity<3) {
2075 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2076 return;
2077 }
2078 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2079 u_memcpy(buffer, hij, 3);
2080 app.appendString(buffer, 3);
2081 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2082 errln("Appendable.append(...) failed");
2083 }
2084 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2085 if(buffer!=NULL || capacity!=0) {
2086 errln("Appendable.getAppendBuffer(min=0) failed");
2087 }
2088 capacity=1;
2089 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2090 if(buffer!=NULL || capacity!=0) {
2091 errln("Appendable.getAppendBuffer(scratch<min) failed");
2092 }
2093 }
2094
2095 class SimpleAppendable : public Appendable {
2096 public:
SimpleAppendable(UnicodeString & dest)2097 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2098 virtual UBool appendCodeUnit(UChar c) override { str.append(c); return true; }
reset()2099 SimpleAppendable &reset() { str.remove(); return *this; }
2100 private:
2101 UnicodeString &str;
2102 };
2103
2104 void
TestAppendable()2105 UnicodeStringTest::TestAppendable() {
2106 UnicodeString dest;
2107 SimpleAppendable app(dest);
2108 doTestAppendable(dest, app);
2109 }
2110
2111 void
TestUnicodeStringImplementsAppendable()2112 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2113 UnicodeString dest;
2114 UnicodeStringAppendable app(dest);
2115 doTestAppendable(dest, app);
2116 }
2117
2118 void
TestSizeofUnicodeString()2119 UnicodeStringTest::TestSizeofUnicodeString() {
2120 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2121 // See the API comments for UNISTR_OBJECT_SIZE.
2122 size_t sizeofUniStr=sizeof(UnicodeString);
2123 size_t expected=UNISTR_OBJECT_SIZE;
2124 if(expected!=sizeofUniStr) {
2125 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2126 // of the compiler might add more internal padding than expected.
2127 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2128 (int)sizeofUniStr, (int)expected);
2129 }
2130 if(sizeofUniStr<32) {
2131 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2132 }
2133 // We assume that the entire UnicodeString object,
2134 // minus the vtable pointer and 2 bytes for flags and short length,
2135 // is available for internal storage of UChars.
2136 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2137 UnicodeString s;
2138 const UChar *emptyBuffer=s.getBuffer();
2139 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2140 s.append((UChar)0x2e);
2141 }
2142 const UChar *fullBuffer=s.getBuffer();
2143 if(fullBuffer!=emptyBuffer) {
2144 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2145 expectedStackBufferLength);
2146 }
2147 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2148 if(terminatedBuffer==emptyBuffer) {
2149 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2150 expectedStackBufferLength);
2151 }
2152 }
2153
2154 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2155 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2156 dest = std::move(src);
2157 }
2158
2159 void
TestMoveSwap()2160 UnicodeStringTest::TestMoveSwap() {
2161 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2162 UnicodeString s1(false, abc, UPRV_LENGTHOF(abc)); // read-only alias
2163 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2164 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2165 const UChar *p = s2.getBuffer();
2166 s1.swap(s2);
2167 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2168 errln("UnicodeString.swap() did not swap");
2169 }
2170 swap(s2, s3);
2171 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2172 errln("swap(UnicodeString) did not swap back");
2173 }
2174 UnicodeString s4;
2175 s4 = std::move(s1);
2176 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2177 errln("UnicodeString = std::move(heap) did not move");
2178 }
2179 UnicodeString s5;
2180 s5 = std::move(s2);
2181 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2182 errln("UnicodeString = std::move(stack) did not move");
2183 }
2184 UnicodeString s6;
2185 s6 = std::move(s3);
2186 if(s6.getBuffer() != abc || s6.length() != 3) {
2187 errln("UnicodeString = std::move(alias) did not move");
2188 }
2189 infoln("TestMoveSwap() with rvalue references");
2190 s1 = static_cast<UnicodeString &&>(s6);
2191 if(s1.getBuffer() != abc || s1.length() != 3) {
2192 errln("UnicodeString move assignment operator did not move");
2193 }
2194 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2195 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2196 errln("UnicodeString move constructor did not move");
2197 }
2198
2199 // Move self assignment leaves the object valid but in an undefined state.
2200 // Do it to make sure there is no crash,
2201 // but do not check for any particular resulting value.
2202 moveFrom(s1, s1);
2203 moveFrom(s2, s2);
2204 moveFrom(s3, s3);
2205 moveFrom(s4, s4);
2206 moveFrom(s5, s5);
2207 moveFrom(s6, s6);
2208 moveFrom(s7, s7);
2209 // Simple copy assignment must work.
2210 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2211 s1 = s6 = s4 = s7 = simple;
2212 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2213 errln("UnicodeString copy after self-move did not work");
2214 }
2215 }
2216
2217 void
TestUInt16Pointers()2218 UnicodeStringTest::TestUInt16Pointers() {
2219 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2220 uint16_t arr[4];
2221
2222 UnicodeString expected(u"abc");
2223 assertEquals("abc from pointer", expected, UnicodeString(carr));
2224 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2225 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2226
2227 UnicodeString alias(arr, 0, 4);
2228 alias.append(u'a').append(u'b').append(u'c');
2229 assertEquals("abc from writable alias", expected, alias);
2230 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2231
2232 UErrorCode errorCode = U_ZERO_ERROR;
2233 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2234 assertSuccess(WHERE, errorCode);
2235 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2236 }
2237
2238 void
TestWCharPointers()2239 UnicodeStringTest::TestWCharPointers() {
2240 #if U_SIZEOF_WCHAR_T==2
2241 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2242 wchar_t arr[4];
2243
2244 UnicodeString expected(u"abc");
2245 assertEquals("abc from pointer", expected, UnicodeString(carr));
2246 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2247 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(true, carr, 3));
2248
2249 UnicodeString alias(arr, 0, 4);
2250 alias.append(u'a').append(u'b').append(u'c');
2251 assertEquals("abc from writable alias", expected, alias);
2252 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2253
2254 UErrorCode errorCode = U_ZERO_ERROR;
2255 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2256 assertSuccess(WHERE, errorCode);
2257 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2258 #endif
2259 }
2260
2261 void
TestNullPointers()2262 UnicodeStringTest::TestNullPointers() {
2263 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2264 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2265 assertTrue("empty from read-only-alias nullptr", UnicodeString(true, nullptr, 3).isEmpty());
2266
2267 UnicodeString alias(nullptr, 4, 4); // empty, no alias
2268 assertTrue("empty from writable alias", alias.isEmpty());
2269 alias.append(u'a').append(u'b').append(u'c');
2270 UnicodeString expected(u"abc");
2271 assertEquals("abc from writable alias", expected, alias);
2272
2273 UErrorCode errorCode = U_ZERO_ERROR;
2274 UnicodeString(u"def").extract(nullptr, 0, errorCode);
2275 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2276 }
2277
TestUnicodeStringInsertAppendToSelf()2278 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2279 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2280
2281 // Test append operation
2282 UnicodeString str(u"foo ");
2283 str.append(str);
2284 str.append(str);
2285 str.append(str);
2286 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2287
2288 // Test append operation with readonly alias to start
2289 str = UnicodeString(true, u"foo ", 4);
2290 str.append(str);
2291 str.append(str);
2292 str.append(str);
2293 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2294
2295 // Test append operation with aliased substring
2296 str = u"abcde";
2297 UnicodeString sub = str.tempSubString(1, 2);
2298 str.append(sub);
2299 assertEquals("", u"abcdebc", str);
2300
2301 // Test append operation with double-aliased substring
2302 str = UnicodeString(true, u"abcde", 5);
2303 sub = str.tempSubString(1, 2);
2304 str.append(sub);
2305 assertEquals("", u"abcdebc", str);
2306
2307 // Test insert operation
2308 str = u"a-*b";
2309 str.insert(2, str);
2310 str.insert(4, str);
2311 str.insert(8, str);
2312 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2313
2314 // Test insert operation with readonly alias to start
2315 str = UnicodeString(true, u"a-*b", 4);
2316 str.insert(2, str);
2317 str.insert(4, str);
2318 str.insert(8, str);
2319 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2320
2321 // Test insert operation with aliased substring
2322 str = u"abcde";
2323 sub = str.tempSubString(1, 3);
2324 str.insert(2, sub);
2325 assertEquals("", u"abbcdcde", str);
2326
2327 // Test insert operation with double-aliased substring
2328 str = UnicodeString(true, u"abcde", 5);
2329 sub = str.tempSubString(1, 3);
2330 str.insert(2, sub);
2331 assertEquals("", u"abbcdcde", str);
2332 }
2333
2334 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 begin */
TestLargeMemory()2335 void UnicodeStringTest::TestLargeMemory() {
2336 #if U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED
2337 if(quick) { return; }
2338 IcuTestErrorCode status(*this, "TestLargeMemory");
2339 constexpr uint32_t len = 2147483643;
2340 char16_t *buf = new char16_t[len];
2341 if (buf == nullptr) { return; }
2342 uprv_memset(buf, 0x4e, len * 2);
2343 icu::UnicodeString test(buf, len);
2344 delete [] buf;
2345 #endif
2346 }
2347 /* <issue: https://github.com/unicode-org/icu/pull/3416> 20250417 end */
2348
TestLargeAppend()2349 void UnicodeStringTest::TestLargeAppend() {
2350 if(quick) return;
2351
2352 IcuTestErrorCode status(*this, "TestLargeAppend");
2353 // Make a large UnicodeString
2354 int32_t len = 0xAFFFFFF;
2355 UnicodeString str;
2356 char16_t *buf = str.getBuffer(len);
2357 // A fast way to set buffer to valid Unicode.
2358 // 4E4E is a valid unicode character
2359 uprv_memset(buf, 0x4e, len * 2);
2360 str.releaseBuffer(len);
2361 UnicodeString dest;
2362 // Append it 16 times
2363 // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2364 // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2365 int64_t total = 0;
2366 for (int32_t i = 0; i < 16; i++) {
2367 dest.append(str);
2368 total += len;
2369 if (total <= INT32_MAX) {
2370 assertFalse("dest is not bogus", dest.isBogus());
2371 } else {
2372 assertTrue("dest should be bogus", dest.isBogus());
2373 }
2374 }
2375 dest.remove();
2376 total = 0;
2377 for (int32_t i = 0; i < 16; i++) {
2378 dest.append(str);
2379 total += len;
2380 if (total + len <= INT32_MAX) {
2381 assertFalse("dest is not bogus", dest.isBogus());
2382 } else if (total <= INT32_MAX) {
2383 // Check that a string of exactly the maximum size works
2384 UnicodeString str2;
2385 int32_t remain = static_cast<int32_t>(INT32_MAX - total);
2386 char16_t *buf2 = str2.getBuffer(remain);
2387 if (buf2 == nullptr) {
2388 // if somehow memory allocation fail, return the test
2389 return;
2390 }
2391 uprv_memset(buf2, 0x4e, remain * 2);
2392 str2.releaseBuffer(remain);
2393 dest.append(str2);
2394 total += remain;
2395 assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2396 assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2397 assertFalse("dest is not bogus", dest.isBogus());
2398
2399 // Check that a string size+1 goes bogus
2400 str2.truncate(1);
2401 dest.append(str2);
2402 total++;
2403 assertTrue("dest should be bogus", dest.isBogus());
2404 } else {
2405 assertTrue("dest should be bogus", dest.isBogus());
2406 }
2407 }
2408 }
2409