1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include <utility>
10
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24
25 #if 0
26 #include "unicode/ustream.h"
27
28 #include <iostream>
29 using namespace std;
30
31 #endif
32
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34
35 extern IntlTest *createStringCaseTest();
36
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39 if (exec) logln("TestSuite UnicodeStringTest: ");
40 TESTCASE_AUTO_BEGIN;
41 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42 TESTCASE_AUTO(TestBasicManipulation);
43 TESTCASE_AUTO(TestCompare);
44 TESTCASE_AUTO(TestExtract);
45 TESTCASE_AUTO(TestRemoveReplace);
46 TESTCASE_AUTO(TestSearching);
47 TESTCASE_AUTO(TestSpacePadding);
48 TESTCASE_AUTO(TestPrefixAndSuffix);
49 TESTCASE_AUTO(TestFindAndReplace);
50 TESTCASE_AUTO(TestBogus);
51 TESTCASE_AUTO(TestReverse);
52 TESTCASE_AUTO(TestMiscellaneous);
53 TESTCASE_AUTO(TestStackAllocation);
54 TESTCASE_AUTO(TestUnescape);
55 TESTCASE_AUTO(TestCountChar32);
56 TESTCASE_AUTO(TestStringEnumeration);
57 TESTCASE_AUTO(TestNameSpace);
58 TESTCASE_AUTO(TestUTF32);
59 TESTCASE_AUTO(TestUTF8);
60 TESTCASE_AUTO(TestReadOnlyAlias);
61 TESTCASE_AUTO(TestAppendable);
62 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63 TESTCASE_AUTO(TestSizeofUnicodeString);
64 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65 TESTCASE_AUTO(TestMoveSwap);
66 TESTCASE_AUTO(TestUInt16Pointers);
67 TESTCASE_AUTO(TestWCharPointers);
68 TESTCASE_AUTO(TestNullPointers);
69 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70 TESTCASE_AUTO(TestLargeAppend);
71 TESTCASE_AUTO_END;
72 }
73
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78 UnicodeString expectedValue;
79 UnicodeString *c;
80
81 c=test1.clone();
82 test1.insert(24, "good ");
83 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84 if (test1 != expectedValue)
85 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86
87 c->insert(24, "good ");
88 if(*c != expectedValue) {
89 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90 }
91 delete c;
92
93 test1.remove(41, 8);
94 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95 if (test1 != expectedValue)
96 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97
98 test1.replace(58, 6, "ir country");
99 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100 if (test1 != expectedValue)
101 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102
103 UChar temp[80];
104 test1.extract(0, 15, temp);
105
106 UnicodeString test2(temp, 15);
107
108 expectedValue = "Now is the time";
109 if (test2 != expectedValue)
110 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111
112 test2 += " for me to go!\n";
113 expectedValue = "Now is the time for me to go!\n";
114 if (test2 != expectedValue)
115 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116
117 if (test1.length() != 70)
118 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119 if (test2.length() != 30)
120 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121
122 UnicodeString test3;
123 test3.append((UChar32)0x20402);
124 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126 }
127 if(test3.length() != 2){
128 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129 }
130 test3.append((UChar32)0x0074);
131 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133 }
134 if(test3.length() != 3){
135 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136 }
137
138 // test some UChar32 overloads
139 if( test3.setTo((UChar32)0x10330).length() != 2 ||
140 test3.insert(0, (UChar32)0x20100).length() != 4 ||
141 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142 (test3 = (UChar32)0x14001).length() != 2
143 ) {
144 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145 }
146
147 {
148 // test moveIndex32()
149 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150
151 if(
152 s.moveIndex32(2, -1)!=0 ||
153 s.moveIndex32(2, 1)!=4 ||
154 s.moveIndex32(2, 2)!=5 ||
155 s.moveIndex32(5, -2)!=2 ||
156 s.moveIndex32(0, -1)!=0 ||
157 s.moveIndex32(6, 1)!=6
158 ) {
159 errln("UnicodeString::moveIndex32() failed");
160 }
161
162 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163 errln("UnicodeString::getChar32Start() failed");
164 }
165
166 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167 errln("UnicodeString::getChar32Limit() failed");
168 }
169 }
170
171 {
172 // test new 2.2 constructors and setTo function that parallel Java's substring function.
173 UnicodeString src("Hello folks how are you?");
174 UnicodeString target1("how are you?");
175 if (target1 != UnicodeString(src, 12)) {
176 errln("UnicodeString(const UnicodeString&, int32_t) failed");
177 }
178 UnicodeString target2("folks");
179 if (target2 != UnicodeString(src, 6, 5)) {
180 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181 }
182 if (target1 != target2.setTo(src, 12)) {
183 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184 }
185 }
186
187 {
188 // op+ is new in ICU 2.8
189 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190 if(s!=UnicodeString("abcdefghi", "")) {
191 errln("operator+(UniStr, UniStr) failed");
192 }
193 }
194
195 {
196 // tests for Jitterbug 2360
197 // verify that APIs with source pointer + length accept length == -1
198 // mostly test only where modified, only few functions did not already do this
199 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201 }
202
203 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
204 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205
206 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208 }
209 if(t.length()!=u_strlen(buffer)) {
210 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211 }
212
213 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215 }
216 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218 }
219
220 buffer[u_strlen(buffer)]=0xe4;
221 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224 }
225 if(u.length()!=UPRV_LENGTHOF(buffer)) {
226 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227 }
228
229 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230 UConverter *cnv;
231 UErrorCode errorCode=U_ZERO_ERROR;
232
233 cnv=ucnv_open("ISO-8859-1", &errorCode);
234 UnicodeString v(cs, -1, cnv, errorCode);
235 ucnv_close(cnv);
236 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238 }
239 }
240
241 #if U_CHARSET_IS_UTF8
242 {
243 // Test the hardcoded-UTF-8 UnicodeString optimizations.
244 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246 UnicodeString from8a = UnicodeString((const char *)utf8);
247 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248 UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
249 if(from8a != from16 || from8b != from16) {
250 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251 }
252 char buffer[16];
253 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256 }
257 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260 }
261 }
262 #endif
263 }
264
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268 UnicodeString test1("this is a test");
269 UnicodeString test2("this is a test");
270 UnicodeString test3("this is a test of the emergency broadcast system");
271 UnicodeString test4("never say, \"this is a test\"!!");
272
273 UnicodeString test5((UChar)0x5000);
274 UnicodeString test6((UChar)0x5100);
275
276 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278 char chars[] = "this is a test";
279
280 // test operator== and operator!=
281 if (test1 != test2 || test1 == test3 || test1 == test4)
282 errln("operator== or operator!= failed");
283
284 // test operator> and operator<
285 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286 !(test5 < test6)
287 ) {
288 errln("operator> or operator< failed");
289 }
290
291 // test operator>= and operator<=
292 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293 errln("operator>= or operator<= failed");
294
295 // test compare(UnicodeString)
296 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297 errln("compare(UnicodeString) failed");
298
299 //test compare(offset, length, UnicodeString)
300 if(test1.compare(0, 14, test2) != 0 ||
301 test3.compare(0, 14, test2) != 0 ||
302 test4.compare(12, 14, test2) != 0 ||
303 test3.compare(0, 18, test1) <=0 )
304 errln("compare(offset, length, UnicodeString) fails");
305
306 // test compare(UChar*)
307 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308 errln("compare(UChar*) failed");
309
310 // test compare(char*)
311 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312 errln("compare(char*) failed");
313
314 // test compare(UChar*, length)
315 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316 errln("compare(UChar*, length) failed");
317
318 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319 if (test1.compare(0, 14, test2, 0, 14) != 0
320 || test1.compare(0, 14, test3, 0, 14) != 0
321 || test1.compare(0, 14, test4, 12, 14) != 0)
322 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323
324 if (test1.compare(10, 4, test2, 0, 4) >= 0
325 || test1.compare(10, 4, test3, 22, 9) <= 0
326 || test1.compare(10, 4, test4, 22, 4) != 0)
327 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328
329 // test compareBetween
330 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332 errln("compareBetween failed");
333
334 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336 errln("compareBetween failed");
337
338 // test compare() etc. with strings that share a buffer but are not equal
339 test2=test1; // share the buffer, length() too large for the stackBuffer
340 test2.truncate(1); // change only the length, not the buffer
341 if( test1==test2 || test1<=test2 ||
342 test1.compare(test2)<=0 ||
343 test1.compareCodePointOrder(test2)<=0 ||
344 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348 ) {
349 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350 }
351
352 /* test compareCodePointOrder() */
353 {
354 /* these strings are in ascending order */
355 static const UChar strings[][4]={
356 { 0x61, 0 }, /* U+0061 */
357 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
358 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
359 { 0xd800, 0 }, /* U+d800 */
360 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
361 { 0xdfff, 0 }, /* U+dfff */
362 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
363 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
364 { 0xd800, 0xdc02, 0 }, /* U+10002 */
365 { 0xd84d, 0xdc56, 0 } /* U+23456 */
366 };
367 UnicodeString u[20]; // must be at least as long as strings[]
368 int32_t i;
369
370 for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371 u[i]=UnicodeString(TRUE, strings[i], -1);
372 }
373
374 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377 }
378 }
379 }
380
381 /* test caseCompare() */
382 {
383 static const UChar
384 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
385 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388
389 UnicodeString
390 mixed(TRUE, _mixed, -1),
391 otherDefault(TRUE, _otherDefault, -1),
392 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
393 different(TRUE, _different, -1);
394
395 int8_t result;
396
397 /* test caseCompare() */
398 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401 }
402 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403 if(result!=0) {
404 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405 }
406 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409 }
410
411 /* test caseCompare() */
412 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413 if(result<=0) {
414 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415 }
416
417 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421 }
422
423 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425 if(result<=0) {
426 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427 }
428 }
429
430 // test that srcLength=-1 is handled in functions that
431 // take input const UChar */int32_t srcLength (j785)
432 {
433 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435
436 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438 }
439
440 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442 }
443
444 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446 }
447
448 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450 }
451
452 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454 }
455
456 UnicodeString s2, s3;
457 s2.replace(0, 0, u+1, -1);
458 s3.replace(0, 0, u, 1, -1);
459 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461 }
462 }
463 }
464
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
469 UnicodeString test2;
470 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472 UnicodeString test5;
473 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474
475 test1.extract(11, 12, test2);
476 test1.extract(11, 12, test3);
477 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479 }
480
481 // test proper pinning in extractBetween()
482 test1.extractBetween(-3, 7, test5);
483 if(test5!=UNICODE_STRING("Now is ", 7)) {
484 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485 }
486
487 test1.extractBetween(11, 23, test5);
488 if (test1.extract(60, 71, test6) != 9) {
489 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490 }
491 if (test1.extract(11, 12, test6) != 12) {
492 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493 }
494
495 // convert test4 back to Unicode for comparison
496 UnicodeString test4b(test4, 12);
497
498 if (test1.extract(11, 12, (char *)NULL) != 12) {
499 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500 }
501 if (test1.extract(11, -1, test6) != 0) {
502 errln("UnicodeString.extract(-1) failed to stop reading the string.");
503 }
504
505 for (int32_t i = 0; i < 12; i++) {
506 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508 break;
509 }
510 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512 break;
513 }
514 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515 errln(UnicodeString("extracting into an array of char failed at position ") + i);
516 break;
517 }
518 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520 break;
521 }
522 }
523
524 // test preflighting and overflows with invariant conversion
525 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527 }
528
529 test4[2] = (char)0xff;
530 if (test1.extract(0, 10, test4, 2, "") != 10) {
531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532 }
533 if (test4[2] != (char)0xff) {
534 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535 }
536
537 {
538 // test new, NUL-terminating extract() function
539 UnicodeString s("terminate", "");
540 UChar dest[20]={
541 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543 };
544 UErrorCode errorCode;
545 int32_t length;
546
547 errorCode=U_ZERO_ERROR;
548 length=s.extract((UChar *)NULL, 0, errorCode);
549 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551 }
552
553 errorCode=U_ZERO_ERROR;
554 length=s.extract(dest, s.length()-1, errorCode);
555 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557 length, u_errorName(errorCode), s.length());
558 }
559
560 errorCode=U_ZERO_ERROR;
561 length=s.extract(dest, s.length(), errorCode);
562 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564 length, u_errorName(errorCode), s.length());
565 }
566 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568 }
569
570 errorCode=U_ZERO_ERROR;
571 length=s.extract(dest, s.length()+1, errorCode);
572 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574 length, u_errorName(errorCode), s.length());
575 }
576 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578 }
579 }
580
581 {
582 // test new UConverter extract() and constructor
583 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584 char buffer[32];
585 static const char expect[]={
586 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588 (char)0xc3, (char)0x84,
589 (char)0xe1, (char)0xbb, (char)0x90
590 };
591 UErrorCode errorCode=U_ZERO_ERROR;
592 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593 int32_t length;
594
595 if(U_SUCCESS(errorCode)) {
596 // test preflighting
597 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598 errorCode!=U_BUFFER_OVERFLOW_ERROR
599 ) {
600 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601 length, u_errorName(errorCode));
602 }
603 errorCode=U_ZERO_ERROR;
604 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605 errorCode!=U_BUFFER_OVERFLOW_ERROR
606 ) {
607 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608 length, u_errorName(errorCode));
609 }
610
611 // try error cases
612 errorCode=U_ZERO_ERROR;
613 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615 }
616 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619 }
620 errorCode=U_ZERO_ERROR;
621
622 // extract for real
623 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624 uprv_memcmp(buffer, expect, 13)!=0 ||
625 buffer[13]!=0 ||
626 U_FAILURE(errorCode)
627 ) {
628 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629 length, u_errorName(errorCode));
630 }
631 // Test again with just the converter name.
632 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633 uprv_memcmp(buffer, expect, 13)!=0 ||
634 buffer[13]!=0 ||
635 U_FAILURE(errorCode)
636 ) {
637 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638 length, u_errorName(errorCode));
639 }
640
641 // try the constructor
642 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643 if(U_FAILURE(errorCode) || s!=t) {
644 errln("UnicodeString(UConverter) conversion failed (%s)",
645 u_errorName(errorCode));
646 }
647
648 ucnv_close(cnv);
649 }
650 }
651 }
652
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656 UnicodeString test1("The rain in Spain stays mainly on the plain");
657 UnicodeString test2("eat SPAMburgers!");
658 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659 char test4[] = "SPAM";
660 UnicodeString& test5 = test1;
661
662 test1.replace(4, 4, test2, 4, 4);
663 test1.replace(12, 5, test3, 4);
664 test3[4] = 0;
665 test1.replace(17, 4, test3);
666 test1.replace(23, 4, test4);
667 test1.replaceBetween(37, 42, test2, 4, 8);
668
669 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670 errln("One of the replace methods failed:\n"
671 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672 " got \"" + test1 + "\"");
673
674 test1.remove(21, 1);
675 test1.removeBetween(26, 28);
676
677 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678 errln("One of the remove methods failed:\n"
679 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680 " got \"" + test1 + "\"");
681
682 for (int32_t i = 0; i < test1.length(); i++) {
683 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684 test1.setCharAt(i, 0x78);
685 }
686 }
687
688 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689 errln("One of the remove methods failed:\n"
690 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691 " got \"" + test1 + "\"");
692
693 test1.remove();
694 if (test1.length() != 0)
695 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701 UnicodeString test1("test test ttest tetest testesteststt");
702 UnicodeString test2("test");
703 UChar testChar = 0x74;
704
705 UChar32 testChar32 = 0x20402;
706 UChar testData[]={
707 // 0 1 2 3 4 5 6 7
708 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709
710 // 8 9 10 11 12 13 14 15
711 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712
713 // 16 17 18 19
714 0xdc02, 0xd841, 0x0073, 0x0000
715 };
716 UnicodeString test3(testData);
717 UnicodeString test4(testChar32);
718
719 uint16_t occurrences = 0;
720 int32_t startPos = 0;
721 for ( ;
722 startPos != -1 && startPos < test1.length();
723 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724 ;
725 if (occurrences != 6)
726 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727
728 for ( occurrences = 0, startPos = 10;
729 startPos != -1 && startPos < test1.length();
730 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731 ;
732 if (occurrences != 4)
733 errln(UnicodeString("indexOf with starting offset failed: "
734 "expected to find 4 occurrences, found ") + occurrences);
735
736 int32_t endPos = 28;
737 for ( occurrences = 0, startPos = 5;
738 startPos != -1 && startPos < test1.length();
739 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740 ;
741 if (occurrences != 4)
742 errln(UnicodeString("indexOf with starting and ending offsets failed: "
743 "expected to find 4 occurrences, found ") + occurrences);
744
745 //using UChar32 string
746 for ( startPos=0, occurrences=0;
747 startPos != -1 && startPos < test3.length();
748 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749 ;
750 if (occurrences != 4)
751 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752
753 for ( startPos=10, occurrences=0;
754 startPos != -1 && startPos < test3.length();
755 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756 ;
757 if (occurrences != 2)
758 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759 //---
760
761 for ( occurrences = 0, startPos = 0;
762 startPos != -1 && startPos < test1.length();
763 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764 ;
765 if (occurrences != 16)
766 errln(UnicodeString("indexOf with character failed: "
767 "expected to find 16 occurrences, found ") + occurrences);
768
769 for ( occurrences = 0, startPos = 10;
770 startPos != -1 && startPos < test1.length();
771 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772 ;
773 if (occurrences != 12)
774 errln(UnicodeString("indexOf with character & start offset failed: "
775 "expected to find 12 occurrences, found ") + occurrences);
776
777 for ( occurrences = 0, startPos = 5, endPos = 28;
778 startPos != -1 && startPos < test1.length();
779 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780 ;
781 if (occurrences != 10)
782 errln(UnicodeString("indexOf with character & start & end offsets failed: "
783 "expected to find 10 occurrences, found ") + occurrences);
784
785 //testing for UChar32
786 UnicodeString subString;
787 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788 subString.append(test3, startPos, test3.length());
789 if(subString.indexOf(testChar32) != -1 ){
790 ++occurrences;
791 }
792 subString.remove();
793 }
794 if (occurrences != 14)
795 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796
797 for ( occurrences = 0, startPos = 0;
798 startPos != -1 && startPos < test3.length();
799 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800 ;
801 if (occurrences != 4)
802 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803
804 endPos=test3.length();
805 for ( occurrences = 0, startPos = 5;
806 startPos != -1 && startPos < test3.length();
807 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808 ;
809 if (occurrences != 3)
810 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811 //---
812
813 if(test1.lastIndexOf(test2)!=29) {
814 errln("test1.lastIndexOf(test2)!=29");
815 }
816
817 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818 errln("test1.lastIndexOf(test2, start) failed");
819 }
820
821 for ( occurrences = 0, startPos = 32;
822 startPos != -1;
823 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824 ;
825 if (occurrences != 4)
826 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827 "expected to find 4 occurrences, found ") + occurrences);
828
829 for ( occurrences = 0, startPos = 32;
830 startPos != -1;
831 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832 ;
833 if (occurrences != 11)
834 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835 "expected to find 11 occurrences, found ") + occurrences);
836
837 //testing UChar32
838 startPos=test3.length();
839 for ( occurrences = 0;
840 startPos != -1;
841 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842 ;
843 if (occurrences != 3)
844 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845
846
847 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
848 subString.remove();
849 subString.append(test3, 0, endPos);
850 if(subString.lastIndexOf(testChar32) != -1 ){
851 ++occurrences;
852 }
853 }
854 if (occurrences != 18)
855 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856 //---
857
858 // test that indexOf(UChar32) and lastIndexOf(UChar32)
859 // do not find surrogate code points when they are part of matched pairs
860 // (= part of supplementary code points)
861 // Jitterbug 1542
862 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864 }
865 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868 ) {
869 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870 }
871 }
872
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876 UnicodeString test1("hello");
877 UnicodeString test2(" there");
878 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
879 UnicodeString test4;
880 UBool returnVal;
881 UnicodeString expectedValue;
882
883 returnVal = test1.padLeading(15);
884 expectedValue = " hello";
885 if (returnVal == FALSE || test1 != expectedValue)
886 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887
888 returnVal = test2.padTrailing(15);
889 expectedValue = " there ";
890 if (returnVal == FALSE || test2 != expectedValue)
891 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892
893 expectedValue = test3;
894 returnVal = test3.padTrailing(15);
895 if (returnVal == TRUE || test3 != expectedValue)
896 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897
898 expectedValue = "hello";
899 test4.setTo(test1).trim();
900
901 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902 errln("trim(UnicodeString&) failed");
903
904 test1.trim();
905 if (test1 != expectedValue)
906 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907
908 test2.trim();
909 expectedValue = "there";
910 if (test2 != expectedValue)
911 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912
913 test3.trim();
914 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
915 if (test3 != expectedValue)
916 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917
918 returnVal = test1.truncate(15);
919 expectedValue = "hello";
920 if (returnVal == TRUE || test1 != expectedValue)
921 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922
923 returnVal = test2.truncate(15);
924 expectedValue = "there";
925 if (returnVal == TRUE || test2 != expectedValue)
926 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927
928 returnVal = test3.truncate(15);
929 expectedValue = "Hi! How ya doi";
930 if (returnVal == FALSE || test3 != expectedValue)
931 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938 UnicodeString test2("Now");
939 UnicodeString test3("country.");
940 UnicodeString test4("count");
941
942 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944 }
945
946 if (test1.startsWith(test3) ||
947 test1.startsWith(test3.getBuffer(), test3.length()) ||
948 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949 ) {
950 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951 }
952
953 if (test1.endsWith(test2)) {
954 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955 }
956
957 if (!test1.endsWith(test3)) {
958 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959 }
960 if (!test1.endsWith(test3, 0, INT32_MAX)) {
961 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962 }
963
964 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966 }
967 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969 }
970
971 if (!test3.startsWith(test4)) {
972 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973 }
974
975 if (test4.startsWith(test3)) {
976 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977 }
978 }
979
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982 UnicodeString test("abcde");
983 const UChar ab[] = { 0x61, 0x62, 0 };
984 const UChar de[] = { 0x64, 0x65, 0 };
985 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994 UnicodeString test1("One potato, two potato, three potato, four\n");
995 UnicodeString test2("potato");
996 UnicodeString test3("MISSISSIPPI");
997
998 UnicodeString expectedValue;
999
1000 test1.findAndReplace(test2, test3);
1001 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002 if (test1 != expectedValue)
1003 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004 test1.findAndReplace(2, 32, test3, test2);
1005 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006 if (test1 != expectedValue)
1007 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013 UnicodeString test("backwards words say to used I");
1014
1015 test.reverse();
1016 test.reverse(2, 4);
1017 test.reverse(7, 2);
1018 test.reverse(10, 3);
1019 test.reverse(14, 5);
1020 test.reverse(20, 9);
1021
1022 if (test != "I used to say words backwards")
1023 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1024 + test + "\"");
1025
1026 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027 test.reverse();
1028 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029 errln("reverse() failed with supplementary characters");
1030 }
1031
1032 // Test case for ticket #8091:
1033 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034 // an odd-length string that contains no other lead surrogates.
1035 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037 test.reverse();
1038 if(test!=expected) {
1039 errln("reverse() failed with only lead surrogate in the middle");
1040 }
1041 }
1042
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046 UnicodeString test1("This is a test");
1047 UnicodeString test2("This is a test");
1048 UnicodeString test3("Me too!");
1049
1050 // test getBuffer(minCapacity) and releaseBuffer()
1051 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052 UChar *p=test1.getBuffer(20);
1053 if(test1.getCapacity()<20) {
1054 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055 }
1056
1057 test1.append((UChar)7); // must not be able to modify the string here
1058 test1.setCharAt(3, 7);
1059 test1.reverse();
1060 if( test1.length()!=0 ||
1061 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063 ) {
1064 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065 }
1066
1067 p[0]=1;
1068 p[1]=2;
1069 p[2]=3;
1070 test1.releaseBuffer(3);
1071 test1.append((UChar)4);
1072
1073 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075 }
1076
1077 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078 test1.releaseBuffer(1);
1079 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081 }
1082
1083 // test getBuffer(const)
1084 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085 if( test1.length()!=4 ||
1086 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088 ) {
1089 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090 }
1091
1092 // test releaseBuffer() with a NUL-terminated buffer
1093 test1.getBuffer(20)[2]=0;
1094 test1.releaseBuffer(); // implicit -1
1095 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097 }
1098
1099 // test releaseBuffer() with a non-NUL-terminated buffer
1100 p=test1.getBuffer(256);
1101 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1103 }
1104 test1.releaseBuffer(); // implicit -1
1105 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107 }
1108
1109 // test getTerminatedBuffer()
1110 test1=UnicodeString("This is another test.", "");
1111 test2=UnicodeString("This is another test.", "");
1112 q=test1.getTerminatedBuffer();
1113 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114 errln("getTerminatedBuffer()[length]!=0");
1115 }
1116
1117 const UChar u[]={ 5, 6, 7, 8, 0 };
1118 test1.setTo(FALSE, u, 3);
1119 q=test1.getTerminatedBuffer();
1120 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122 }
1123
1124 test1.setTo(TRUE, u, -1);
1125 q=test1.getTerminatedBuffer();
1126 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128 }
1129
1130 // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131 // as u" lila", offset by 3 code units
1132 test1=UnicodeString(TRUE, u"la", 2);
1133 test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134 assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135 u"la lila", test1);
1136
1137 test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1138 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139 u"la dudum lila", test1);
1140
1141 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142 test1.insert(9, ucs, -1);
1143 assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144 u"la dudum hm lila", test1);
1145
1146 test1.replace(9, 2, (UChar)0x2b);
1147 assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148 u"la dudum + lila", test1);
1149
1150 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151 errln("UnicodeString::hasMetaData() returns TRUE");
1152 }
1153
1154 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156 test1.truncate(36); // ensure length()<getCapacity()
1157 test2=test1; // share the buffer
1158 test1.truncate(5);
1159 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160 errln("UnicodeString(shared buffer).truncate() failed");
1161 }
1162 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164 "modified another copy of the string!");
1165 }
1166 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167 test1.truncate(36); // ensure length()<getCapacity()
1168 test2=test1; // share the buffer
1169 test1.remove();
1170 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171 errln("UnicodeString(shared buffer).remove() failed");
1172 }
1173 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175 "modified another copy of the string!");
1176 }
1177
1178 // ticket #9740
1179 test1.setTo(TRUE, ucs, 3);
1180 assertEquals("length of read-only alias", 3, test1.length());
1181 test1.trim();
1182 assertEquals("length of read-only alias after trim()", 2, test1.length());
1183 assertEquals("length of terminated buffer of read-only alias + trim()",
1184 2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190 UChar testString[] ={
1191 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192 UChar guardWord = 0x4DED;
1193 UnicodeString* test = 0;
1194
1195 test = new UnicodeString(testString);
1196 if (*test != "This is a crazy test.")
1197 errln("Test string failed to initialize properly.");
1198 if (guardWord != 0x04DED)
1199 errln("Test string initialization overwrote guard word!");
1200
1201 test->insert(8, "only ");
1202 test->remove(15, 6);
1203 if (*test != "This is only a test.")
1204 errln("Manipulation of test string failed to work right.");
1205 if (guardWord != 0x4DED)
1206 errln("Manipulation of test string overwrote guard word!");
1207
1208 // we have to deinitialize and release the backing store by calling the destructor
1209 // explicitly, since we can't overload operator delete
1210 delete test;
1211
1212 UChar workingBuffer[] = {
1213 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218 UChar guardWord2 = 0x4DED;
1219
1220 test = new UnicodeString(workingBuffer, 35, 100);
1221 if (*test != "Now is the time for all men to come")
1222 errln("Stack-allocated backing store failed to initialize correctly.");
1223 if (guardWord2 != 0x4DED)
1224 errln("Stack-allocated backing store overwrote guard word!");
1225
1226 test->insert(24, "good ");
1227 if (*test != "Now is the time for all good men to come")
1228 errln("insert() on stack-allocated UnicodeString didn't work right");
1229 if (guardWord2 != 0x4DED)
1230 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231
1232 if (workingBuffer[24] != 0x67)
1233 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234
1235 *test += " to the aid of their country.";
1236 if (*test != "Now is the time for all good men to come to the aid of their country.")
1237 errln("Stack-allocated UnicodeString overflow didn't work");
1238 if (guardWord2 != 0x4DED)
1239 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240
1241 *test = "ha!";
1242 if (*test != "ha!")
1243 errln("Assignment to stack-allocated UnicodeString didn't work");
1244 if (workingBuffer[0] != 0x4e)
1245 errln("Change to UnicodeString after overflow are still affecting original buffer");
1246 if (guardWord2 != 0x4DED)
1247 errln("Change to UnicodeString after overflow overwrote guard word!");
1248
1249 // test read-only aliasing with setTo()
1250 workingBuffer[0] = 0x20ac;
1251 workingBuffer[1] = 0x125;
1252 workingBuffer[2] = 0;
1253 test->setTo(TRUE, workingBuffer, 2);
1254 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256 }
1257
1258 UnicodeString *c=test->clone();
1259
1260 workingBuffer[1] = 0x109;
1261 if(test->charAt(1) != 0x109) {
1262 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263 }
1264
1265 if(c->length() != 2 || c->charAt(1) != 0x125) {
1266 errln("clone(alias) did not copy the buffer");
1267 }
1268 delete c;
1269
1270 test->setTo(TRUE, workingBuffer, -1);
1271 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273 }
1274
1275 test->setTo(FALSE, workingBuffer, -1);
1276 if(!test->isBogus()) {
1277 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278 }
1279
1280 delete test;
1281
1282 test=new UnicodeString();
1283 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284 test->setTo(buffer, 4, 10);
1285 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288 }
1289 delete test;
1290
1291
1292 // test the UChar32 constructor
1293 UnicodeString c32Test((UChar32)0x10ff2a);
1294 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296 ) {
1297 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298 }
1299
1300 // test the (new) capacity constructor
1301 UnicodeString capTest(5, (UChar32)0x2a, 5);
1302 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303 capTest.char32At(0) != 0x2a ||
1304 capTest.char32At(4) != 0x2a
1305 ) {
1306 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307 }
1308
1309 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311 capTest.char32At(0) != 0x10ff2a ||
1312 capTest.char32At(4) != 0x10ff2a
1313 ) {
1314 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315 }
1316
1317 capTest = UnicodeString(5, (UChar32)0, 0);
1318 if(capTest.length() != 0) {
1319 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320 }
1321 }
1322
1323 /**
1324 * Test the unescape() function.
1325 */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328 UnicodeString OUT("abc");
1329 OUT.append((UChar)0x4567);
1330 OUT.append(" ");
1331 OUT.append((UChar)0xA);
1332 OUT.append((UChar)0xD);
1333 OUT.append(" ");
1334 OUT.append((UChar32)0x00101234);
1335 OUT.append("xyz");
1336 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337 UnicodeString result = IN.unescape();
1338 if (result != OUT) {
1339 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340 prettify(result) + ", expected " +
1341 prettify(OUT));
1342 }
1343
1344 // test that an empty string is returned in case of an error
1345 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347 }
1348
1349 // ICU-21648 limit backslash-uhhhh escapes to ASCII hex digits
1350 UnicodeString euro = UnicodeString(u"\\u20aC").unescape();
1351 assertEquals("ASCII Euro", u"€", euro);
1352 UnicodeString nonASCIIEuro = UnicodeString(u"\\u୨෦aC").unescape();
1353 assertTrue("unescape() accepted non-ASCII digits", nonASCIIEuro.isEmpty());
1354 }
1355
1356 /* test code point counting functions --------------------------------------- */
1357
1358 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1359 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1360 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1361 int32_t count=s.countChar32(start, length);
1362 return count>number;
1363 }
1364
1365 /* compare the real function against the reference */
1366 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1367 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1368 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1369 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1370 start, length, number, s.hasMoreChar32Than(start, length, number));
1371 }
1372 }
1373
1374 void
TestCountChar32(void)1375 UnicodeStringTest::TestCountChar32(void) {
1376 {
1377 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1378
1379 // test countChar32()
1380 // note that this also calls and tests u_countChar32(length>=0)
1381 if(
1382 s.countChar32()!=4 ||
1383 s.countChar32(1)!=4 ||
1384 s.countChar32(2)!=3 ||
1385 s.countChar32(2, 3)!=2 ||
1386 s.countChar32(2, 0)!=0
1387 ) {
1388 errln("UnicodeString::countChar32() failed");
1389 }
1390
1391 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1392 const UChar *buffer=s.getTerminatedBuffer();
1393 if(
1394 u_countChar32(buffer, -1)!=4 ||
1395 u_countChar32(buffer+1, -1)!=4 ||
1396 u_countChar32(buffer+2, -1)!=3 ||
1397 u_countChar32(buffer+3, -1)!=3 ||
1398 u_countChar32(buffer+4, -1)!=2 ||
1399 u_countChar32(buffer+5, -1)!=1 ||
1400 u_countChar32(buffer+6, -1)!=0
1401 ) {
1402 errln("u_countChar32(length=-1) failed");
1403 }
1404
1405 // test u_countChar32() with bad input
1406 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1407 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1408 }
1409 }
1410
1411 /* test data and variables for hasMoreChar32Than() */
1412 static const UChar str[]={
1413 0x61, 0x62, 0xd800, 0xdc00,
1414 0xd801, 0xdc01, 0x63, 0xd802,
1415 0x64, 0xdc03, 0x65, 0x66,
1416 0xd804, 0xdc04, 0xd805, 0xdc05,
1417 0x67
1418 };
1419 UnicodeString string(str, UPRV_LENGTHOF(str));
1420 int32_t start, length, number;
1421
1422 /* test hasMoreChar32Than() */
1423 for(length=string.length(); length>=0; --length) {
1424 for(start=0; start<=length; ++start) {
1425 for(number=-1; number<=((length-start)+2); ++number) {
1426 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1427 }
1428 }
1429 }
1430
1431 /* test hasMoreChar32Than() with pinning */
1432 for(start=-1; start<=string.length()+1; ++start) {
1433 for(number=-1; number<=((string.length()-start)+2); ++number) {
1434 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1435 }
1436 }
1437
1438 /* test hasMoreChar32Than() with a bogus string */
1439 string.setToBogus();
1440 for(length=-1; length<=1; ++length) {
1441 for(start=-1; start<=length; ++start) {
1442 for(number=-1; number<=((length-start)+2); ++number) {
1443 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1444 }
1445 }
1446 }
1447 }
1448
1449 void
TestBogus()1450 UnicodeStringTest::TestBogus() {
1451 UnicodeString test1("This is a test");
1452 UnicodeString test2("This is a test");
1453 UnicodeString test3("Me too!");
1454
1455 // test isBogus() and setToBogus()
1456 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1457 errln("A string returned TRUE for isBogus()!");
1458 }
1459
1460 // NULL pointers are treated like empty strings
1461 // use other illegal arguments to make a bogus string
1462 test3.setTo(FALSE, test1.getBuffer(), -2);
1463 if(!test3.isBogus()) {
1464 errln("A bogus string returned FALSE for isBogus()!");
1465 }
1466 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1467 errln("hashCode() failed");
1468 }
1469 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1470 errln("bogus.getBuffer()!=0");
1471 }
1472 if (test1.indexOf(test3) != -1) {
1473 errln("bogus.indexOf() != -1");
1474 }
1475 if (test1.lastIndexOf(test3) != -1) {
1476 errln("bogus.lastIndexOf() != -1");
1477 }
1478 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1479 errln("caseCompare() doesn't work with bogus strings");
1480 }
1481 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1482 errln("compareCodePointOrder() doesn't work with bogus strings");
1483 }
1484
1485 // verify that non-assignment modifications fail and do not revive a bogus string
1486 test3.setToBogus();
1487 test3.append((UChar)0x61);
1488 if(!test3.isBogus() || test3.getBuffer()!=0) {
1489 errln("bogus.append('a') worked but must not");
1490 }
1491
1492 test3.setToBogus();
1493 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1494 if(!test3.isBogus() || test3.getBuffer()!=0) {
1495 errln("bogus.findAndReplace() worked but must not");
1496 }
1497
1498 test3.setToBogus();
1499 test3.trim();
1500 if(!test3.isBogus() || test3.getBuffer()!=0) {
1501 errln("bogus.trim() revived bogus but must not");
1502 }
1503
1504 test3.setToBogus();
1505 test3.remove(1);
1506 if(!test3.isBogus() || test3.getBuffer()!=0) {
1507 errln("bogus.remove(1) revived bogus but must not");
1508 }
1509
1510 test3.setToBogus();
1511 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1512 errln("bogus.setCharAt(0, 'b') worked but must not");
1513 }
1514
1515 test3.setToBogus();
1516 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1517 errln("bogus.truncate(1) revived bogus but must not");
1518 }
1519
1520 // verify that assignments revive a bogus string
1521 test3.setToBogus();
1522 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1523 errln("bogus.operator=() failed");
1524 }
1525
1526 test3.setToBogus();
1527 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1528 errln("bogus.fastCopyFrom() failed");
1529 }
1530
1531 test3.setToBogus();
1532 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1533 errln("bogus.setTo(UniStr) failed");
1534 }
1535
1536 test3.setToBogus();
1537 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1538 errln("bogus.setTo(UniStr, 0) failed");
1539 }
1540
1541 test3.setToBogus();
1542 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1543 errln("bogus.setTo(UniStr, 0, len) failed");
1544 }
1545
1546 test3.setToBogus();
1547 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1548 errln("bogus.setTo(const UChar *, len) failed");
1549 }
1550
1551 test3.setToBogus();
1552 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1553 errln("bogus.setTo(UChar) failed");
1554 }
1555
1556 test3.setToBogus();
1557 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1558 errln("bogus.setTo(UChar32) failed");
1559 }
1560
1561 test3.setToBogus();
1562 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1563 errln("bogus.setTo(readonly alias) failed");
1564 }
1565
1566 // writable alias to another string's buffer: very bad idea, just convenient for this test
1567 test3.setToBogus();
1568 if(!test3.isBogus() ||
1569 test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1570 test1.length(), test1.getCapacity()).isBogus() ||
1571 test3!=test1) {
1572 errln("bogus.setTo(writable alias) failed");
1573 }
1574
1575 // verify simple, documented ways to turn a bogus string into an empty one
1576 test3.setToBogus();
1577 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1578 errln("bogus.operator=(UnicodeString()) failed");
1579 }
1580
1581 test3.setToBogus();
1582 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1583 errln("bogus.setTo(UnicodeString()) failed");
1584 }
1585
1586 test3.setToBogus();
1587 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1588 errln("bogus.remove() failed");
1589 }
1590
1591 test3.setToBogus();
1592 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1593 errln("bogus.remove(0, INT32_MAX) failed");
1594 }
1595
1596 test3.setToBogus();
1597 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1598 errln("bogus.truncate(0) failed");
1599 }
1600
1601 test3.setToBogus();
1602 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1603 errln("bogus.setTo((UChar32)-1) failed");
1604 }
1605
1606 static const UChar nul=0;
1607
1608 test3.setToBogus();
1609 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1610 errln("bogus.setTo(&nul, 0) failed");
1611 }
1612
1613 test3.setToBogus();
1614 if(!test3.isBogus() || test3.getBuffer()!=0) {
1615 errln("setToBogus() failed to make a string bogus");
1616 }
1617
1618 test3.setToBogus();
1619 if(test1.isBogus() || !(test1=test3).isBogus()) {
1620 errln("normal=bogus failed to make the left string bogus");
1621 }
1622
1623 // test that NULL primitive input string values are treated like
1624 // empty strings, not errors (bogus)
1625 test2.setTo((UChar32)0x10005);
1626 if(test2.insert(1, nullptr, 1).length()!=2) {
1627 errln("UniStr.insert(...nullptr...) should not modify the string but does");
1628 }
1629
1630 UErrorCode errorCode=U_ZERO_ERROR;
1631 UnicodeString
1632 test4((const UChar *)NULL),
1633 test5(TRUE, (const UChar *)NULL, 1),
1634 test6((UChar *)NULL, 5, 5),
1635 test7((const char *)NULL, 3, NULL, errorCode);
1636 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1637 errln("a constructor set to bogus for a NULL input string, should be empty");
1638 }
1639
1640 test4.setTo(NULL, 3);
1641 test5.setTo(TRUE, (const UChar *)NULL, 1);
1642 test6.setTo((UChar *)NULL, 5, 5);
1643 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1644 errln("a setTo() set to bogus for a NULL input string, should be empty");
1645 }
1646
1647 // test that bogus==bogus<any
1648 if(test1!=test3 || test1.compare(test3)!=0) {
1649 errln("bogus==bogus failed");
1650 }
1651
1652 test2.remove();
1653 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1654 errln("bogus<empty failed");
1655 }
1656 }
1657
1658 // StringEnumeration ------------------------------------------------------- ***
1659 // most of StringEnumeration is tested elsewhere
1660 // this test improves code coverage
1661
1662 static const char *const
1663 testEnumStrings[]={
1664 "a",
1665 "b",
1666 "c",
1667 "this is a long string which helps us test some buffer limits",
1668 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1669 };
1670
1671 class TestEnumeration : public StringEnumeration {
1672 public:
TestEnumeration()1673 TestEnumeration() : i(0) {}
1674
count(UErrorCode &) const1675 virtual int32_t count(UErrorCode& /*status*/) const override {
1676 return UPRV_LENGTHOF(testEnumStrings);
1677 }
1678
snext(UErrorCode & status)1679 virtual const UnicodeString *snext(UErrorCode &status) override {
1680 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1681 unistr=UnicodeString(testEnumStrings[i++], "");
1682 return &unistr;
1683 }
1684
1685 return NULL;
1686 }
1687
reset(UErrorCode &)1688 virtual void reset(UErrorCode& /*status*/) override {
1689 i=0;
1690 }
1691
getStaticClassID()1692 static inline UClassID getStaticClassID() {
1693 return (UClassID)&fgClassID;
1694 }
getDynamicClassID() const1695 virtual UClassID getDynamicClassID() const override {
1696 return getStaticClassID();
1697 }
1698
1699 private:
1700 static const char fgClassID;
1701
1702 int32_t i;
1703 };
1704
1705 const char TestEnumeration::fgClassID=0;
1706
1707 void
TestStringEnumeration()1708 UnicodeStringTest::TestStringEnumeration() {
1709 UnicodeString s;
1710 TestEnumeration ten;
1711 int32_t i, length;
1712 UErrorCode status;
1713
1714 const UChar *pu;
1715 const char *pc;
1716
1717 // test the next() default implementation and ensureCharsCapacity()
1718 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1719 status=U_ZERO_ERROR;
1720 pc=ten.next(&length, status);
1721 s=UnicodeString(testEnumStrings[i], "");
1722 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1723 errln("StringEnumeration.next(%d) failed", i);
1724 }
1725 }
1726 status=U_ZERO_ERROR;
1727 if(ten.next(&length, status)!=NULL) {
1728 errln("StringEnumeration.next(done)!=NULL");
1729 }
1730
1731 // test the unext() default implementation
1732 ten.reset(status);
1733 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1734 status=U_ZERO_ERROR;
1735 pu=ten.unext(&length, status);
1736 s=UnicodeString(testEnumStrings[i], "");
1737 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1738 errln("StringEnumeration.unext(%d) failed", i);
1739 }
1740 }
1741 status=U_ZERO_ERROR;
1742 if(ten.unext(&length, status)!=NULL) {
1743 errln("StringEnumeration.unext(done)!=NULL");
1744 }
1745
1746 // test that the default clone() implementation works, and returns NULL
1747 if(ten.clone()!=NULL) {
1748 errln("StringEnumeration.clone()!=NULL");
1749 }
1750
1751 // test that uenum_openFromStringEnumeration() works
1752 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1753 StringEnumeration *newTen = new TestEnumeration;
1754 status=U_ZERO_ERROR;
1755 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1756 if (uten==NULL || U_FAILURE(status)) {
1757 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1758 return;
1759 }
1760
1761 // test uenum_next()
1762 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1763 status=U_ZERO_ERROR;
1764 pc=uenum_next(uten, &length, &status);
1765 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1766 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1767 }
1768 }
1769 status=U_ZERO_ERROR;
1770 if(uenum_next(uten, &length, &status)!=NULL) {
1771 errln("File %s, line %d, uenum_next(done)!=NULL");
1772 }
1773
1774 // test the uenum_unext()
1775 uenum_reset(uten, &status);
1776 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1777 status=U_ZERO_ERROR;
1778 pu=uenum_unext(uten, &length, &status);
1779 s=UnicodeString(testEnumStrings[i], "");
1780 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1781 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1782 }
1783 }
1784 status=U_ZERO_ERROR;
1785 if(uenum_unext(uten, &length, &status)!=NULL) {
1786 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1787 }
1788
1789 uenum_close(uten);
1790 }
1791
1792 /*
1793 * Namespace test, to make sure that macros like UNICODE_STRING include the
1794 * namespace qualifier.
1795 *
1796 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1797 */
1798 namespace bogus {
1799 class UnicodeString {
1800 public:
1801 enum EInvariant { kInvariant };
UnicodeString()1802 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1803 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1804 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1805 ) : i(length) {}
1806 private:
1807 int32_t i;
1808 };
1809 }
1810
1811 void
TestNameSpace()1812 UnicodeStringTest::TestNameSpace() {
1813 // Provoke name collision unless the UnicodeString macros properly
1814 // qualify the icu::UnicodeString class.
1815 using namespace bogus;
1816
1817 // Use all UnicodeString macros from unistr.h.
1818 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1819 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1820 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1821
1822 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1823 icu::UnicodeString s4=s1+s2+s3;
1824 if(s4.length()!=9) {
1825 errln("Something wrong with UnicodeString::operator+().");
1826 }
1827 }
1828
1829 void
TestUTF32()1830 UnicodeStringTest::TestUTF32() {
1831 // Input string length US_STACKBUF_SIZE to cause overflow of the
1832 // initially chosen fStackBuffer due to supplementary characters.
1833 static const UChar32 utf32[] = {
1834 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1835 0x10000, 0x20000, 0xe0000, 0x10ffff
1836 };
1837 static const UChar expected_utf16[] = {
1838 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1839 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1840 };
1841 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1842 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1843 if(from32 != expected) {
1844 errln("UnicodeString::fromUTF32() did not create the expected string.");
1845 }
1846
1847 static const UChar utf16[] = {
1848 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1849 };
1850 static const UChar32 expected_utf32[] = {
1851 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1852 };
1853 UChar32 result32[16];
1854 UErrorCode errorCode = U_ZERO_ERROR;
1855 int32_t length32 =
1856 UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1857 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1858 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1859 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1860 result32[length32] != 0
1861 ) {
1862 errln("UnicodeString::toUTF32() did not create the expected string.");
1863 }
1864 }
1865
1866 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1867 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1868 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1869 : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1870 virtual void Flush() override { calledFlush = TRUE; }
1871 UBool calledFlush;
1872 };
1873
1874 void
TestUTF8()1875 UnicodeStringTest::TestUTF8() {
1876 static const uint8_t utf8[] = {
1877 // Code points:
1878 // 0x41, 0xd900,
1879 // 0x61, 0xdc00,
1880 // 0x110000, 0x5a,
1881 // 0x50000, 0x7a,
1882 // 0x10000, 0x20000,
1883 // 0xe0000, 0x10ffff
1884 0x41, 0xed, 0xa4, 0x80,
1885 0x61, 0xed, 0xb0, 0x80,
1886 0xf4, 0x90, 0x80, 0x80, 0x5a,
1887 0xf1, 0x90, 0x80, 0x80, 0x7a,
1888 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1889 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1890 };
1891 static const UChar expected_utf16[] = {
1892 0x41, 0xfffd, 0xfffd, 0xfffd,
1893 0x61, 0xfffd, 0xfffd, 0xfffd,
1894 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
1895 0xd900, 0xdc00, 0x7a,
1896 0xd800, 0xdc00, 0xd840, 0xdc00,
1897 0xdb40, 0xdc00, 0xdbff, 0xdfff
1898 };
1899 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1900 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1901
1902 if(from8 != expected) {
1903 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1904 }
1905 std::string utf8_string((const char *)utf8, sizeof(utf8));
1906 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1907 if(from8b != expected) {
1908 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1909 }
1910
1911 static const UChar utf16[] = {
1912 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1913 };
1914 static const uint8_t expected_utf8[] = {
1915 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1916 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1917 };
1918 UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1919
1920 char buffer[64];
1921 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1922 us.toUTF8(sink);
1923 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1924 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1925 ) {
1926 errln("UnicodeString::toUTF8() did not create the expected string.");
1927 }
1928 if(!sink.calledFlush) {
1929 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1930 }
1931 // Initial contents for testing that toUTF8String() appends.
1932 std::string result8 = "-->";
1933 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1934 // Use the return value just for testing.
1935 std::string &result8r = us.toUTF8String(result8);
1936 if(result8r != expected8 || &result8r != &result8) {
1937 errln("UnicodeString::toUTF8String() did not create the expected string.");
1938 }
1939 }
1940
1941 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1942 static UnicodeString wrapUChars(const UChar *uchars) {
1943 return UnicodeString(TRUE, uchars, -1);
1944 }
1945
1946 void
TestReadOnlyAlias()1947 UnicodeStringTest::TestReadOnlyAlias() {
1948 UChar uchars[]={ 0x61, 0x62, 0 };
1949 UnicodeString alias(TRUE, uchars, 2);
1950 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1951 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1952 return;
1953 }
1954 alias.truncate(1);
1955 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1956 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1957 }
1958 if(alias.getTerminatedBuffer()==uchars) {
1959 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1960 "did not allocate and copy as expected.");
1961 }
1962 if(uchars[1]!=0x62) {
1963 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1964 "modified the original buffer.");
1965 }
1966 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1967 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1968 "does not return a buffer terminated at the proper length.");
1969 }
1970
1971 alias.setTo(TRUE, uchars, 2);
1972 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1973 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1974 return;
1975 }
1976 alias.remove();
1977 if(alias.length()!=0) {
1978 errln("UnicodeString(read-only-alias).remove() did not work.");
1979 }
1980 if(alias.getTerminatedBuffer()==uchars) {
1981 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1982 "did not un-alias as expected.");
1983 }
1984 if(uchars[0]!=0x61) {
1985 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1986 "modified the original buffer.");
1987 }
1988 if(0!=u_strlen(alias.getTerminatedBuffer())) {
1989 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1990 "does not return a buffer terminated at length 0.");
1991 }
1992
1993 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1994 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1995 alias.remove(0, 10);
1996 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1997 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1998 }
1999 alias.setTo(FALSE, longString.getBuffer(), longString.length());
2000 alias.remove(27, 99);
2001 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
2002 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
2003 }
2004 alias.setTo(FALSE, longString.getBuffer(), longString.length());
2005 alias.retainBetween(6, 30);
2006 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2007 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2008 }
2009
2010 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2011 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2012
2013 UnicodeString temp;
2014 temp.fastCopyFrom(longString.tempSubString());
2015 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2016 errln("UnicodeString.tempSubString() failed");
2017 }
2018 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2019 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2020 errln("UnicodeString.tempSubString(-3, 5) failed");
2021 }
2022 temp.fastCopyFrom(longString.tempSubString(17));
2023 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2024 errln("UnicodeString.tempSubString(17) failed");
2025 }
2026 temp.fastCopyFrom(longString.tempSubString(99));
2027 if(!temp.isEmpty()) {
2028 errln("UnicodeString.tempSubString(99) failed");
2029 }
2030 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2031 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2032 errln("UnicodeString.tempSubStringBetween(6) failed");
2033 }
2034 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2035 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2036 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2037 }
2038 UnicodeString bogusString;
2039 bogusString.setToBogus();
2040 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2041 if(!temp.isBogus()) {
2042 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2043 }
2044 }
2045
2046 void
doTestAppendable(UnicodeString & dest,Appendable & app)2047 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2048 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2049 static const UChar fg[3]={ 0x66, 0x67, 0 };
2050 if(!app.reserveAppendCapacity(12)) {
2051 errln("Appendable.reserve(12) failed");
2052 }
2053 app.appendCodeUnit(0x61);
2054 app.appendCodePoint(0x62);
2055 app.appendCodePoint(0x50000);
2056 app.appendString(cde, 3);
2057 app.appendString(fg, -1);
2058 UChar scratch[3];
2059 int32_t capacity=-1;
2060 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2061 if(capacity<3) {
2062 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2063 return;
2064 }
2065 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2066 u_memcpy(buffer, hij, 3);
2067 app.appendString(buffer, 3);
2068 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2069 errln("Appendable.append(...) failed");
2070 }
2071 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2072 if(buffer!=NULL || capacity!=0) {
2073 errln("Appendable.getAppendBuffer(min=0) failed");
2074 }
2075 capacity=1;
2076 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2077 if(buffer!=NULL || capacity!=0) {
2078 errln("Appendable.getAppendBuffer(scratch<min) failed");
2079 }
2080 }
2081
2082 class SimpleAppendable : public Appendable {
2083 public:
SimpleAppendable(UnicodeString & dest)2084 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2085 virtual UBool appendCodeUnit(UChar c) override { str.append(c); return TRUE; }
reset()2086 SimpleAppendable &reset() { str.remove(); return *this; }
2087 private:
2088 UnicodeString &str;
2089 };
2090
2091 void
TestAppendable()2092 UnicodeStringTest::TestAppendable() {
2093 UnicodeString dest;
2094 SimpleAppendable app(dest);
2095 doTestAppendable(dest, app);
2096 }
2097
2098 void
TestUnicodeStringImplementsAppendable()2099 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2100 UnicodeString dest;
2101 UnicodeStringAppendable app(dest);
2102 doTestAppendable(dest, app);
2103 }
2104
2105 void
TestSizeofUnicodeString()2106 UnicodeStringTest::TestSizeofUnicodeString() {
2107 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2108 // See the API comments for UNISTR_OBJECT_SIZE.
2109 size_t sizeofUniStr=sizeof(UnicodeString);
2110 size_t expected=UNISTR_OBJECT_SIZE;
2111 if(expected!=sizeofUniStr) {
2112 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2113 // of the compiler might add more internal padding than expected.
2114 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2115 (int)sizeofUniStr, (int)expected);
2116 }
2117 if(sizeofUniStr<32) {
2118 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2119 }
2120 // We assume that the entire UnicodeString object,
2121 // minus the vtable pointer and 2 bytes for flags and short length,
2122 // is available for internal storage of UChars.
2123 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2124 UnicodeString s;
2125 const UChar *emptyBuffer=s.getBuffer();
2126 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2127 s.append((UChar)0x2e);
2128 }
2129 const UChar *fullBuffer=s.getBuffer();
2130 if(fullBuffer!=emptyBuffer) {
2131 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2132 expectedStackBufferLength);
2133 }
2134 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2135 if(terminatedBuffer==emptyBuffer) {
2136 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2137 expectedStackBufferLength);
2138 }
2139 }
2140
2141 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2142 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2143 dest = std::move(src);
2144 }
2145
2146 void
TestMoveSwap()2147 UnicodeStringTest::TestMoveSwap() {
2148 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2149 UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias
2150 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2151 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2152 const UChar *p = s2.getBuffer();
2153 s1.swap(s2);
2154 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2155 errln("UnicodeString.swap() did not swap");
2156 }
2157 swap(s2, s3);
2158 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2159 errln("swap(UnicodeString) did not swap back");
2160 }
2161 UnicodeString s4;
2162 s4 = std::move(s1);
2163 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2164 errln("UnicodeString = std::move(heap) did not move");
2165 }
2166 UnicodeString s5;
2167 s5 = std::move(s2);
2168 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2169 errln("UnicodeString = std::move(stack) did not move");
2170 }
2171 UnicodeString s6;
2172 s6 = std::move(s3);
2173 if(s6.getBuffer() != abc || s6.length() != 3) {
2174 errln("UnicodeString = std::move(alias) did not move");
2175 }
2176 infoln("TestMoveSwap() with rvalue references");
2177 s1 = static_cast<UnicodeString &&>(s6);
2178 if(s1.getBuffer() != abc || s1.length() != 3) {
2179 errln("UnicodeString move assignment operator did not move");
2180 }
2181 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2182 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2183 errln("UnicodeString move constructor did not move");
2184 }
2185
2186 // Move self assignment leaves the object valid but in an undefined state.
2187 // Do it to make sure there is no crash,
2188 // but do not check for any particular resulting value.
2189 moveFrom(s1, s1);
2190 moveFrom(s2, s2);
2191 moveFrom(s3, s3);
2192 moveFrom(s4, s4);
2193 moveFrom(s5, s5);
2194 moveFrom(s6, s6);
2195 moveFrom(s7, s7);
2196 // Simple copy assignment must work.
2197 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2198 s1 = s6 = s4 = s7 = simple;
2199 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2200 errln("UnicodeString copy after self-move did not work");
2201 }
2202 }
2203
2204 void
TestUInt16Pointers()2205 UnicodeStringTest::TestUInt16Pointers() {
2206 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2207 uint16_t arr[4];
2208
2209 UnicodeString expected(u"abc");
2210 assertEquals("abc from pointer", expected, UnicodeString(carr));
2211 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2212 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2213
2214 UnicodeString alias(arr, 0, 4);
2215 alias.append(u'a').append(u'b').append(u'c');
2216 assertEquals("abc from writable alias", expected, alias);
2217 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2218
2219 UErrorCode errorCode = U_ZERO_ERROR;
2220 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2221 assertSuccess(WHERE, errorCode);
2222 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2223 }
2224
2225 void
TestWCharPointers()2226 UnicodeStringTest::TestWCharPointers() {
2227 #if U_SIZEOF_WCHAR_T==2
2228 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2229 wchar_t arr[4];
2230
2231 UnicodeString expected(u"abc");
2232 assertEquals("abc from pointer", expected, UnicodeString(carr));
2233 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2234 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2235
2236 UnicodeString alias(arr, 0, 4);
2237 alias.append(u'a').append(u'b').append(u'c');
2238 assertEquals("abc from writable alias", expected, alias);
2239 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2240
2241 UErrorCode errorCode = U_ZERO_ERROR;
2242 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2243 assertSuccess(WHERE, errorCode);
2244 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2245 #endif
2246 }
2247
2248 void
TestNullPointers()2249 UnicodeStringTest::TestNullPointers() {
2250 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2251 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2252 assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2253
2254 UnicodeString alias(nullptr, 4, 4); // empty, no alias
2255 assertTrue("empty from writable alias", alias.isEmpty());
2256 alias.append(u'a').append(u'b').append(u'c');
2257 UnicodeString expected(u"abc");
2258 assertEquals("abc from writable alias", expected, alias);
2259
2260 UErrorCode errorCode = U_ZERO_ERROR;
2261 UnicodeString(u"def").extract(nullptr, 0, errorCode);
2262 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2263 }
2264
TestUnicodeStringInsertAppendToSelf()2265 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2266 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2267
2268 // Test append operation
2269 UnicodeString str(u"foo ");
2270 str.append(str);
2271 str.append(str);
2272 str.append(str);
2273 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2274
2275 // Test append operation with readonly alias to start
2276 str = UnicodeString(TRUE, u"foo ", 4);
2277 str.append(str);
2278 str.append(str);
2279 str.append(str);
2280 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2281
2282 // Test append operation with aliased substring
2283 str = u"abcde";
2284 UnicodeString sub = str.tempSubString(1, 2);
2285 str.append(sub);
2286 assertEquals("", u"abcdebc", str);
2287
2288 // Test append operation with double-aliased substring
2289 str = UnicodeString(TRUE, u"abcde", 5);
2290 sub = str.tempSubString(1, 2);
2291 str.append(sub);
2292 assertEquals("", u"abcdebc", str);
2293
2294 // Test insert operation
2295 str = u"a-*b";
2296 str.insert(2, str);
2297 str.insert(4, str);
2298 str.insert(8, str);
2299 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2300
2301 // Test insert operation with readonly alias to start
2302 str = UnicodeString(TRUE, u"a-*b", 4);
2303 str.insert(2, str);
2304 str.insert(4, str);
2305 str.insert(8, str);
2306 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2307
2308 // Test insert operation with aliased substring
2309 str = u"abcde";
2310 sub = str.tempSubString(1, 3);
2311 str.insert(2, sub);
2312 assertEquals("", u"abbcdcde", str);
2313
2314 // Test insert operation with double-aliased substring
2315 str = UnicodeString(TRUE, u"abcde", 5);
2316 sub = str.tempSubString(1, 3);
2317 str.insert(2, sub);
2318 assertEquals("", u"abbcdcde", str);
2319 }
2320
TestLargeAppend()2321 void UnicodeStringTest::TestLargeAppend() {
2322 if(quick) return;
2323
2324 IcuTestErrorCode status(*this, "TestLargeAppend");
2325 // Make a large UnicodeString
2326 int32_t len = 0xAFFFFFF;
2327 UnicodeString str;
2328 char16_t *buf = str.getBuffer(len);
2329 // A fast way to set buffer to valid Unicode.
2330 // 4E4E is a valid unicode character
2331 uprv_memset(buf, 0x4e, len * 2);
2332 str.releaseBuffer(len);
2333 UnicodeString dest;
2334 // Append it 16 times
2335 // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2336 // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2337 int64_t total = 0;
2338 for (int32_t i = 0; i < 16; i++) {
2339 dest.append(str);
2340 total += len;
2341 if (total <= INT32_MAX) {
2342 assertFalse("dest is not bogus", dest.isBogus());
2343 } else {
2344 assertTrue("dest should be bogus", dest.isBogus());
2345 }
2346 }
2347 dest.remove();
2348 total = 0;
2349 for (int32_t i = 0; i < 16; i++) {
2350 dest.append(str);
2351 total += len;
2352 if (total + len <= INT32_MAX) {
2353 assertFalse("dest is not bogus", dest.isBogus());
2354 } else if (total <= INT32_MAX) {
2355 // Check that a string of exactly the maximum size works
2356 UnicodeString str2;
2357 int32_t remain = INT32_MAX - total;
2358 char16_t *buf2 = str2.getBuffer(remain);
2359 if (buf2 == nullptr) {
2360 // if somehow memory allocation fail, return the test
2361 return;
2362 }
2363 uprv_memset(buf2, 0x4e, remain * 2);
2364 str2.releaseBuffer(remain);
2365 dest.append(str2);
2366 total += remain;
2367 assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2368 assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2369 assertFalse("dest is not bogus", dest.isBogus());
2370
2371 // Check that a string size+1 goes bogus
2372 str2.truncate(1);
2373 dest.append(str2);
2374 total++;
2375 assertTrue("dest should be bogus", dest.isBogus());
2376 } else {
2377 assertTrue("dest should be bogus", dest.isBogus());
2378 }
2379 }
2380 }
2381