1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8
9 #include <utility>
10
11 #include "ustrtest.h"
12 #include "unicode/appendable.h"
13 #include "unicode/std_string.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uchar.h"
16 #include "unicode/ustring.h"
17 #include "unicode/locid.h"
18 #include "unicode/strenum.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/uenum.h"
21 #include "unicode/utf16.h"
22 #include "cmemory.h"
23 #include "charstr.h"
24
25 #if 0
26 #include "unicode/ustream.h"
27
28 #include <iostream>
29 using namespace std;
30
31 #endif
32
~UnicodeStringTest()33 UnicodeStringTest::~UnicodeStringTest() {}
34
35 extern IntlTest *createStringCaseTest();
36
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)37 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
38 {
39 if (exec) logln("TestSuite UnicodeStringTest: ");
40 TESTCASE_AUTO_BEGIN;
41 TESTCASE_AUTO_CREATE_CLASS(StringCaseTest);
42 TESTCASE_AUTO(TestBasicManipulation);
43 TESTCASE_AUTO(TestCompare);
44 TESTCASE_AUTO(TestExtract);
45 TESTCASE_AUTO(TestRemoveReplace);
46 TESTCASE_AUTO(TestSearching);
47 TESTCASE_AUTO(TestSpacePadding);
48 TESTCASE_AUTO(TestPrefixAndSuffix);
49 TESTCASE_AUTO(TestFindAndReplace);
50 TESTCASE_AUTO(TestBogus);
51 TESTCASE_AUTO(TestReverse);
52 TESTCASE_AUTO(TestMiscellaneous);
53 TESTCASE_AUTO(TestStackAllocation);
54 TESTCASE_AUTO(TestUnescape);
55 TESTCASE_AUTO(TestCountChar32);
56 TESTCASE_AUTO(TestStringEnumeration);
57 TESTCASE_AUTO(TestNameSpace);
58 TESTCASE_AUTO(TestUTF32);
59 TESTCASE_AUTO(TestUTF8);
60 TESTCASE_AUTO(TestReadOnlyAlias);
61 TESTCASE_AUTO(TestAppendable);
62 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
63 TESTCASE_AUTO(TestSizeofUnicodeString);
64 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
65 TESTCASE_AUTO(TestMoveSwap);
66 TESTCASE_AUTO(TestUInt16Pointers);
67 TESTCASE_AUTO(TestWCharPointers);
68 TESTCASE_AUTO(TestNullPointers);
69 TESTCASE_AUTO(TestUnicodeStringInsertAppendToSelf);
70 TESTCASE_AUTO(TestLargeAppend);
71 TESTCASE_AUTO_END;
72 }
73
74 void
TestBasicManipulation()75 UnicodeStringTest::TestBasicManipulation()
76 {
77 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
78 UnicodeString expectedValue;
79 UnicodeString *c;
80
81 c=test1.clone();
82 test1.insert(24, "good ");
83 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
84 if (test1 != expectedValue)
85 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
86
87 c->insert(24, "good ");
88 if(*c != expectedValue) {
89 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
90 }
91 delete c;
92
93 test1.remove(41, 8);
94 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
95 if (test1 != expectedValue)
96 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
97
98 test1.replace(58, 6, "ir country");
99 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
100 if (test1 != expectedValue)
101 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
102
103 UChar temp[80];
104 test1.extract(0, 15, temp);
105
106 UnicodeString test2(temp, 15);
107
108 expectedValue = "Now is the time";
109 if (test2 != expectedValue)
110 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
111
112 test2 += " for me to go!\n";
113 expectedValue = "Now is the time for me to go!\n";
114 if (test2 != expectedValue)
115 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
116
117 if (test1.length() != 70)
118 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
119 if (test2.length() != 30)
120 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
121
122 UnicodeString test3;
123 test3.append((UChar32)0x20402);
124 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
125 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
126 }
127 if(test3.length() != 2){
128 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
129 }
130 test3.append((UChar32)0x0074);
131 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
132 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
133 }
134 if(test3.length() != 3){
135 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
136 }
137
138 // test some UChar32 overloads
139 if( test3.setTo((UChar32)0x10330).length() != 2 ||
140 test3.insert(0, (UChar32)0x20100).length() != 4 ||
141 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
142 (test3 = (UChar32)0x14001).length() != 2
143 ) {
144 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
145 }
146
147 {
148 // test moveIndex32()
149 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
150
151 if(
152 s.moveIndex32(2, -1)!=0 ||
153 s.moveIndex32(2, 1)!=4 ||
154 s.moveIndex32(2, 2)!=5 ||
155 s.moveIndex32(5, -2)!=2 ||
156 s.moveIndex32(0, -1)!=0 ||
157 s.moveIndex32(6, 1)!=6
158 ) {
159 errln("UnicodeString::moveIndex32() failed");
160 }
161
162 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
163 errln("UnicodeString::getChar32Start() failed");
164 }
165
166 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
167 errln("UnicodeString::getChar32Limit() failed");
168 }
169 }
170
171 {
172 // test new 2.2 constructors and setTo function that parallel Java's substring function.
173 UnicodeString src("Hello folks how are you?");
174 UnicodeString target1("how are you?");
175 if (target1 != UnicodeString(src, 12)) {
176 errln("UnicodeString(const UnicodeString&, int32_t) failed");
177 }
178 UnicodeString target2("folks");
179 if (target2 != UnicodeString(src, 6, 5)) {
180 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
181 }
182 if (target1 != target2.setTo(src, 12)) {
183 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
184 }
185 }
186
187 {
188 // op+ is new in ICU 2.8
189 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
190 if(s!=UnicodeString("abcdefghi", "")) {
191 errln("operator+(UniStr, UniStr) failed");
192 }
193 }
194
195 {
196 // tests for Jitterbug 2360
197 // verify that APIs with source pointer + length accept length == -1
198 // mostly test only where modified, only few functions did not already do this
199 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
200 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
201 }
202
203 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
204 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
205
206 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
207 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
208 }
209 if(t.length()!=u_strlen(buffer)) {
210 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
211 }
212
213 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
214 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
215 }
216 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
217 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
218 }
219
220 buffer[u_strlen(buffer)]=0xe4;
221 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
222 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
223 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
224 }
225 if(u.length()!=UPRV_LENGTHOF(buffer)) {
226 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
227 }
228
229 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
230 UConverter *cnv;
231 UErrorCode errorCode=U_ZERO_ERROR;
232
233 cnv=ucnv_open("ISO-8859-1", &errorCode);
234 UnicodeString v(cs, -1, cnv, errorCode);
235 ucnv_close(cnv);
236 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
237 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
238 }
239 }
240
241 #if U_CHARSET_IS_UTF8
242 {
243 // Test the hardcoded-UTF-8 UnicodeString optimizations.
244 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
245 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
246 UnicodeString from8a = UnicodeString((const char *)utf8);
247 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
248 UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
249 if(from8a != from16 || from8b != from16) {
250 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
251 }
252 char buffer[16];
253 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
254 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
255 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
256 }
257 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
258 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
259 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
260 }
261 }
262 #endif
263 }
264
265 void
TestCompare()266 UnicodeStringTest::TestCompare()
267 {
268 UnicodeString test1("this is a test");
269 UnicodeString test2("this is a test");
270 UnicodeString test3("this is a test of the emergency broadcast system");
271 UnicodeString test4("never say, \"this is a test\"!!");
272
273 UnicodeString test5((UChar)0x5000);
274 UnicodeString test6((UChar)0x5100);
275
276 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
277 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
278 char chars[] = "this is a test";
279
280 // test operator== and operator!=
281 if (test1 != test2 || test1 == test3 || test1 == test4)
282 errln("operator== or operator!= failed");
283
284 // test operator> and operator<
285 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
286 !(test5 < test6)
287 ) {
288 errln("operator> or operator< failed");
289 }
290
291 // test operator>= and operator<=
292 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
293 errln("operator>= or operator<= failed");
294
295 // test compare(UnicodeString)
296 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
297 errln("compare(UnicodeString) failed");
298
299 //test compare(offset, length, UnicodeString)
300 if(test1.compare(0, 14, test2) != 0 ||
301 test3.compare(0, 14, test2) != 0 ||
302 test4.compare(12, 14, test2) != 0 ||
303 test3.compare(0, 18, test1) <=0 )
304 errln("compare(offset, length, UnicodeString) failes");
305
306 // test compare(UChar*)
307 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
308 errln("compare(UChar*) failed");
309
310 // test compare(char*)
311 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
312 errln("compare(char*) failed");
313
314 // test compare(UChar*, length)
315 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
316 errln("compare(UChar*, length) failed");
317
318 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
319 if (test1.compare(0, 14, test2, 0, 14) != 0
320 || test1.compare(0, 14, test3, 0, 14) != 0
321 || test1.compare(0, 14, test4, 12, 14) != 0)
322 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
323
324 if (test1.compare(10, 4, test2, 0, 4) >= 0
325 || test1.compare(10, 4, test3, 22, 9) <= 0
326 || test1.compare(10, 4, test4, 22, 4) != 0)
327 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
328
329 // test compareBetween
330 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
331 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
332 errln("compareBetween failed");
333
334 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
335 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
336 errln("compareBetween failed");
337
338 // test compare() etc. with strings that share a buffer but are not equal
339 test2=test1; // share the buffer, length() too large for the stackBuffer
340 test2.truncate(1); // change only the length, not the buffer
341 if( test1==test2 || test1<=test2 ||
342 test1.compare(test2)<=0 ||
343 test1.compareCodePointOrder(test2)<=0 ||
344 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
345 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
346 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
347 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
348 ) {
349 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
350 }
351
352 /* test compareCodePointOrder() */
353 {
354 /* these strings are in ascending order */
355 static const UChar strings[][4]={
356 { 0x61, 0 }, /* U+0061 */
357 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
358 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
359 { 0xd800, 0 }, /* U+d800 */
360 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
361 { 0xdfff, 0 }, /* U+dfff */
362 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
363 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
364 { 0xd800, 0xdc02, 0 }, /* U+10002 */
365 { 0xd84d, 0xdc56, 0 } /* U+23456 */
366 };
367 UnicodeString u[20]; // must be at least as long as strings[]
368 int32_t i;
369
370 for(i=0; i<UPRV_LENGTHOF(strings); ++i) {
371 u[i]=UnicodeString(TRUE, strings[i], -1);
372 }
373
374 for(i=0; i<UPRV_LENGTHOF(strings)-1; ++i) {
375 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
376 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
377 }
378 }
379 }
380
381 /* test caseCompare() */
382 {
383 static const UChar
384 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
385 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
386 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
387 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
388
389 UnicodeString
390 mixed(TRUE, _mixed, -1),
391 otherDefault(TRUE, _otherDefault, -1),
392 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
393 different(TRUE, _different, -1);
394
395 int8_t result;
396
397 /* test caseCompare() */
398 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
399 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
400 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
401 }
402 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
403 if(result!=0) {
404 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
405 }
406 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
407 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
408 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
409 }
410
411 /* test caseCompare() */
412 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
413 if(result<=0) {
414 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
415 }
416
417 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
418 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
419 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
420 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
421 }
422
423 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
424 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
425 if(result<=0) {
426 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
427 }
428 }
429
430 // test that srcLength=-1 is handled in functions that
431 // take input const UChar */int32_t srcLength (j785)
432 {
433 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
434 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
435
436 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
437 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
438 }
439
440 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
441 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
442 }
443
444 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
445 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
446 }
447
448 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
449 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
450 }
451
452 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
453 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
454 }
455
456 UnicodeString s2, s3;
457 s2.replace(0, 0, u+1, -1);
458 s3.replace(0, 0, u, 1, -1);
459 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
460 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
461 }
462 }
463 }
464
465 void
TestExtract()466 UnicodeStringTest::TestExtract()
467 {
468 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
469 UnicodeString test2;
470 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
471 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
472 UnicodeString test5;
473 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
474
475 test1.extract(11, 12, test2);
476 test1.extract(11, 12, test3);
477 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
478 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
479 }
480
481 // test proper pinning in extractBetween()
482 test1.extractBetween(-3, 7, test5);
483 if(test5!=UNICODE_STRING("Now is ", 7)) {
484 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
485 }
486
487 test1.extractBetween(11, 23, test5);
488 if (test1.extract(60, 71, test6) != 9) {
489 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
490 }
491 if (test1.extract(11, 12, test6) != 12) {
492 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
493 }
494
495 // convert test4 back to Unicode for comparison
496 UnicodeString test4b(test4, 12);
497
498 if (test1.extract(11, 12, (char *)NULL) != 12) {
499 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
500 }
501 if (test1.extract(11, -1, test6) != 0) {
502 errln("UnicodeString.extract(-1) failed to stop reading the string.");
503 }
504
505 for (int32_t i = 0; i < 12; i++) {
506 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
507 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
508 break;
509 }
510 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
511 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
512 break;
513 }
514 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
515 errln(UnicodeString("extracting into an array of char failed at position ") + i);
516 break;
517 }
518 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
519 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
520 break;
521 }
522 }
523
524 // test preflighting and overflows with invariant conversion
525 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
526 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
527 }
528
529 test4[2] = (char)0xff;
530 if (test1.extract(0, 10, test4, 2, "") != 10) {
531 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
532 }
533 if (test4[2] != (char)0xff) {
534 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
535 }
536
537 {
538 // test new, NUL-terminating extract() function
539 UnicodeString s("terminate", "");
540 UChar dest[20]={
541 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
542 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
543 };
544 UErrorCode errorCode;
545 int32_t length;
546
547 errorCode=U_ZERO_ERROR;
548 length=s.extract((UChar *)NULL, 0, errorCode);
549 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
550 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
551 }
552
553 errorCode=U_ZERO_ERROR;
554 length=s.extract(dest, s.length()-1, errorCode);
555 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
556 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
557 length, u_errorName(errorCode), s.length());
558 }
559
560 errorCode=U_ZERO_ERROR;
561 length=s.extract(dest, s.length(), errorCode);
562 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
563 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
564 length, u_errorName(errorCode), s.length());
565 }
566 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
567 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
568 }
569
570 errorCode=U_ZERO_ERROR;
571 length=s.extract(dest, s.length()+1, errorCode);
572 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
573 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
574 length, u_errorName(errorCode), s.length());
575 }
576 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
577 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
578 }
579 }
580
581 {
582 // test new UConverter extract() and constructor
583 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
584 char buffer[32];
585 static const char expect[]={
586 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
587 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
588 (char)0xc3, (char)0x84,
589 (char)0xe1, (char)0xbb, (char)0x90
590 };
591 UErrorCode errorCode=U_ZERO_ERROR;
592 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
593 int32_t length;
594
595 if(U_SUCCESS(errorCode)) {
596 // test preflighting
597 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
598 errorCode!=U_BUFFER_OVERFLOW_ERROR
599 ) {
600 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
601 length, u_errorName(errorCode));
602 }
603 errorCode=U_ZERO_ERROR;
604 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
605 errorCode!=U_BUFFER_OVERFLOW_ERROR
606 ) {
607 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
608 length, u_errorName(errorCode));
609 }
610
611 // try error cases
612 errorCode=U_ZERO_ERROR;
613 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
614 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
615 }
616 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
617 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
618 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
619 }
620 errorCode=U_ZERO_ERROR;
621
622 // extract for real
623 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
624 uprv_memcmp(buffer, expect, 13)!=0 ||
625 buffer[13]!=0 ||
626 U_FAILURE(errorCode)
627 ) {
628 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
629 length, u_errorName(errorCode));
630 }
631 // Test again with just the converter name.
632 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
633 uprv_memcmp(buffer, expect, 13)!=0 ||
634 buffer[13]!=0 ||
635 U_FAILURE(errorCode)
636 ) {
637 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
638 length, u_errorName(errorCode));
639 }
640
641 // try the constructor
642 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
643 if(U_FAILURE(errorCode) || s!=t) {
644 errln("UnicodeString(UConverter) conversion failed (%s)",
645 u_errorName(errorCode));
646 }
647
648 ucnv_close(cnv);
649 }
650 }
651 }
652
653 void
TestRemoveReplace()654 UnicodeStringTest::TestRemoveReplace()
655 {
656 UnicodeString test1("The rain in Spain stays mainly on the plain");
657 UnicodeString test2("eat SPAMburgers!");
658 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
659 char test4[] = "SPAM";
660 UnicodeString& test5 = test1;
661
662 test1.replace(4, 4, test2, 4, 4);
663 test1.replace(12, 5, test3, 4);
664 test3[4] = 0;
665 test1.replace(17, 4, test3);
666 test1.replace(23, 4, test4);
667 test1.replaceBetween(37, 42, test2, 4, 8);
668
669 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
670 errln("One of the replace methods failed:\n"
671 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
672 " got \"" + test1 + "\"");
673
674 test1.remove(21, 1);
675 test1.removeBetween(26, 28);
676
677 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
678 errln("One of the remove methods failed:\n"
679 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
680 " got \"" + test1 + "\"");
681
682 for (int32_t i = 0; i < test1.length(); i++) {
683 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
684 test1.setCharAt(i, 0x78);
685 }
686 }
687
688 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
689 errln("One of the remove methods failed:\n"
690 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
691 " got \"" + test1 + "\"");
692
693 test1.remove();
694 if (test1.length() != 0)
695 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
696 }
697
698 void
TestSearching()699 UnicodeStringTest::TestSearching()
700 {
701 UnicodeString test1("test test ttest tetest testesteststt");
702 UnicodeString test2("test");
703 UChar testChar = 0x74;
704
705 UChar32 testChar32 = 0x20402;
706 UChar testData[]={
707 // 0 1 2 3 4 5 6 7
708 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
709
710 // 8 9 10 11 12 13 14 15
711 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
712
713 // 16 17 18 19
714 0xdc02, 0xd841, 0x0073, 0x0000
715 };
716 UnicodeString test3(testData);
717 UnicodeString test4(testChar32);
718
719 uint16_t occurrences = 0;
720 int32_t startPos = 0;
721 for ( ;
722 startPos != -1 && startPos < test1.length();
723 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
724 ;
725 if (occurrences != 6)
726 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
727
728 for ( occurrences = 0, startPos = 10;
729 startPos != -1 && startPos < test1.length();
730 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
731 ;
732 if (occurrences != 4)
733 errln(UnicodeString("indexOf with starting offset failed: "
734 "expected to find 4 occurrences, found ") + occurrences);
735
736 int32_t endPos = 28;
737 for ( occurrences = 0, startPos = 5;
738 startPos != -1 && startPos < test1.length();
739 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
740 ;
741 if (occurrences != 4)
742 errln(UnicodeString("indexOf with starting and ending offsets failed: "
743 "expected to find 4 occurrences, found ") + occurrences);
744
745 //using UChar32 string
746 for ( startPos=0, occurrences=0;
747 startPos != -1 && startPos < test3.length();
748 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
749 ;
750 if (occurrences != 4)
751 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
752
753 for ( startPos=10, occurrences=0;
754 startPos != -1 && startPos < test3.length();
755 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
756 ;
757 if (occurrences != 2)
758 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
759 //---
760
761 for ( occurrences = 0, startPos = 0;
762 startPos != -1 && startPos < test1.length();
763 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
764 ;
765 if (occurrences != 16)
766 errln(UnicodeString("indexOf with character failed: "
767 "expected to find 16 occurrences, found ") + occurrences);
768
769 for ( occurrences = 0, startPos = 10;
770 startPos != -1 && startPos < test1.length();
771 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
772 ;
773 if (occurrences != 12)
774 errln(UnicodeString("indexOf with character & start offset failed: "
775 "expected to find 12 occurrences, found ") + occurrences);
776
777 for ( occurrences = 0, startPos = 5, endPos = 28;
778 startPos != -1 && startPos < test1.length();
779 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
780 ;
781 if (occurrences != 10)
782 errln(UnicodeString("indexOf with character & start & end offsets failed: "
783 "expected to find 10 occurrences, found ") + occurrences);
784
785 //testing for UChar32
786 UnicodeString subString;
787 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
788 subString.append(test3, startPos, test3.length());
789 if(subString.indexOf(testChar32) != -1 ){
790 ++occurrences;
791 }
792 subString.remove();
793 }
794 if (occurrences != 14)
795 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
796
797 for ( occurrences = 0, startPos = 0;
798 startPos != -1 && startPos < test3.length();
799 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
800 ;
801 if (occurrences != 4)
802 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
803
804 endPos=test3.length();
805 for ( occurrences = 0, startPos = 5;
806 startPos != -1 && startPos < test3.length();
807 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
808 ;
809 if (occurrences != 3)
810 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
811 //---
812
813 if(test1.lastIndexOf(test2)!=29) {
814 errln("test1.lastIndexOf(test2)!=29");
815 }
816
817 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
818 errln("test1.lastIndexOf(test2, start) failed");
819 }
820
821 for ( occurrences = 0, startPos = 32;
822 startPos != -1;
823 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
824 ;
825 if (occurrences != 4)
826 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
827 "expected to find 4 occurrences, found ") + occurrences);
828
829 for ( occurrences = 0, startPos = 32;
830 startPos != -1;
831 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
832 ;
833 if (occurrences != 11)
834 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
835 "expected to find 11 occurrences, found ") + occurrences);
836
837 //testing UChar32
838 startPos=test3.length();
839 for ( occurrences = 0;
840 startPos != -1;
841 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
842 ;
843 if (occurrences != 3)
844 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
845
846
847 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
848 subString.remove();
849 subString.append(test3, 0, endPos);
850 if(subString.lastIndexOf(testChar32) != -1 ){
851 ++occurrences;
852 }
853 }
854 if (occurrences != 18)
855 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
856 //---
857
858 // test that indexOf(UChar32) and lastIndexOf(UChar32)
859 // do not find surrogate code points when they are part of matched pairs
860 // (= part of supplementary code points)
861 // Jitterbug 1542
862 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
863 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
864 }
865 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
866 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
867 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
868 ) {
869 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
870 }
871 }
872
873 void
TestSpacePadding()874 UnicodeStringTest::TestSpacePadding()
875 {
876 UnicodeString test1("hello");
877 UnicodeString test2(" there");
878 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
879 UnicodeString test4;
880 UBool returnVal;
881 UnicodeString expectedValue;
882
883 returnVal = test1.padLeading(15);
884 expectedValue = " hello";
885 if (returnVal == FALSE || test1 != expectedValue)
886 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
887
888 returnVal = test2.padTrailing(15);
889 expectedValue = " there ";
890 if (returnVal == FALSE || test2 != expectedValue)
891 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
892
893 expectedValue = test3;
894 returnVal = test3.padTrailing(15);
895 if (returnVal == TRUE || test3 != expectedValue)
896 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
897
898 expectedValue = "hello";
899 test4.setTo(test1).trim();
900
901 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
902 errln("trim(UnicodeString&) failed");
903
904 test1.trim();
905 if (test1 != expectedValue)
906 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
907
908 test2.trim();
909 expectedValue = "there";
910 if (test2 != expectedValue)
911 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
912
913 test3.trim();
914 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
915 if (test3 != expectedValue)
916 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
917
918 returnVal = test1.truncate(15);
919 expectedValue = "hello";
920 if (returnVal == TRUE || test1 != expectedValue)
921 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
922
923 returnVal = test2.truncate(15);
924 expectedValue = "there";
925 if (returnVal == TRUE || test2 != expectedValue)
926 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
927
928 returnVal = test3.truncate(15);
929 expectedValue = "Hi! How ya doi";
930 if (returnVal == FALSE || test3 != expectedValue)
931 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
932 }
933
934 void
TestPrefixAndSuffix()935 UnicodeStringTest::TestPrefixAndSuffix()
936 {
937 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
938 UnicodeString test2("Now");
939 UnicodeString test3("country.");
940 UnicodeString test4("count");
941
942 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
943 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
944 }
945
946 if (test1.startsWith(test3) ||
947 test1.startsWith(test3.getBuffer(), test3.length()) ||
948 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
949 ) {
950 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
951 }
952
953 if (test1.endsWith(test2)) {
954 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
955 }
956
957 if (!test1.endsWith(test3)) {
958 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
959 }
960 if (!test1.endsWith(test3, 0, INT32_MAX)) {
961 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
962 }
963
964 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
965 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
966 }
967 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
968 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
969 }
970
971 if (!test3.startsWith(test4)) {
972 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
973 }
974
975 if (test4.startsWith(test3)) {
976 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
977 }
978 }
979
980 void
TestStartsWithAndEndsWithNulTerminated()981 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
982 UnicodeString test("abcde");
983 const UChar ab[] = { 0x61, 0x62, 0 };
984 const UChar de[] = { 0x64, 0x65, 0 };
985 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
986 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
987 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
988 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
989 }
990
991 void
TestFindAndReplace()992 UnicodeStringTest::TestFindAndReplace()
993 {
994 UnicodeString test1("One potato, two potato, three potato, four\n");
995 UnicodeString test2("potato");
996 UnicodeString test3("MISSISSIPPI");
997
998 UnicodeString expectedValue;
999
1000 test1.findAndReplace(test2, test3);
1001 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
1002 if (test1 != expectedValue)
1003 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1004 test1.findAndReplace(2, 32, test3, test2);
1005 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
1006 if (test1 != expectedValue)
1007 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
1008 }
1009
1010 void
TestReverse()1011 UnicodeStringTest::TestReverse()
1012 {
1013 UnicodeString test("backwards words say to used I");
1014
1015 test.reverse();
1016 test.reverse(2, 4);
1017 test.reverse(7, 2);
1018 test.reverse(10, 3);
1019 test.reverse(14, 5);
1020 test.reverse(20, 9);
1021
1022 if (test != "I used to say words backwards")
1023 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1024 + test + "\"");
1025
1026 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1027 test.reverse();
1028 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1029 errln("reverse() failed with supplementary characters");
1030 }
1031
1032 // Test case for ticket #8091:
1033 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1034 // an odd-length string that contains no other lead surrogates.
1035 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1036 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1037 test.reverse();
1038 if(test!=expected) {
1039 errln("reverse() failed with only lead surrogate in the middle");
1040 }
1041 }
1042
1043 void
TestMiscellaneous()1044 UnicodeStringTest::TestMiscellaneous()
1045 {
1046 UnicodeString test1("This is a test");
1047 UnicodeString test2("This is a test");
1048 UnicodeString test3("Me too!");
1049
1050 // test getBuffer(minCapacity) and releaseBuffer()
1051 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1052 UChar *p=test1.getBuffer(20);
1053 if(test1.getCapacity()<20) {
1054 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1055 }
1056
1057 test1.append((UChar)7); // must not be able to modify the string here
1058 test1.setCharAt(3, 7);
1059 test1.reverse();
1060 if( test1.length()!=0 ||
1061 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1062 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1063 ) {
1064 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1065 }
1066
1067 p[0]=1;
1068 p[1]=2;
1069 p[2]=3;
1070 test1.releaseBuffer(3);
1071 test1.append((UChar)4);
1072
1073 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1074 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1075 }
1076
1077 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1078 test1.releaseBuffer(1);
1079 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1080 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1081 }
1082
1083 // test getBuffer(const)
1084 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1085 if( test1.length()!=4 ||
1086 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1087 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1088 ) {
1089 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1090 }
1091
1092 // test releaseBuffer() with a NUL-terminated buffer
1093 test1.getBuffer(20)[2]=0;
1094 test1.releaseBuffer(); // implicit -1
1095 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1096 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1097 }
1098
1099 // test releaseBuffer() with a non-NUL-terminated buffer
1100 p=test1.getBuffer(256);
1101 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1102 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1103 }
1104 test1.releaseBuffer(); // implicit -1
1105 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1106 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1107 }
1108
1109 // test getTerminatedBuffer()
1110 test1=UnicodeString("This is another test.", "");
1111 test2=UnicodeString("This is another test.", "");
1112 q=test1.getTerminatedBuffer();
1113 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1114 errln("getTerminatedBuffer()[length]!=0");
1115 }
1116
1117 const UChar u[]={ 5, 6, 7, 8, 0 };
1118 test1.setTo(FALSE, u, 3);
1119 q=test1.getTerminatedBuffer();
1120 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1121 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1122 }
1123
1124 test1.setTo(TRUE, u, -1);
1125 q=test1.getTerminatedBuffer();
1126 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1127 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1128 }
1129
1130 // NOTE: Some compilers will optimize u"la" to point to the same static memory
1131 // as u" lila", offset by 3 code units
1132 test1=UnicodeString(TRUE, u"la", 2);
1133 test1.append(UnicodeString(TRUE, u" lila", 5).getTerminatedBuffer(), 0, -1);
1134 assertEquals("UnicodeString::append(const UChar *, start, length) failed",
1135 u"la lila", test1);
1136
1137 test1.insert(3, UnicodeString(TRUE, u"dudum ", 6), 0, INT32_MAX);
1138 assertEquals("UnicodeString::insert(start, const UniStr &, start, length) failed",
1139 u"la dudum lila", test1);
1140
1141 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1142 test1.insert(9, ucs, -1);
1143 assertEquals("UnicodeString::insert(start, const UChar *, length) failed",
1144 u"la dudum hm lila", test1);
1145
1146 test1.replace(9, 2, (UChar)0x2b);
1147 assertEquals("UnicodeString::replace(start, length, UChar) failed",
1148 u"la dudum + lila", test1);
1149
1150 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1151 errln("UnicodeString::hasMetaData() returns TRUE");
1152 }
1153
1154 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1155 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1156 test1.truncate(36); // ensure length()<getCapacity()
1157 test2=test1; // share the buffer
1158 test1.truncate(5);
1159 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1160 errln("UnicodeString(shared buffer).truncate() failed");
1161 }
1162 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1163 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1164 "modified another copy of the string!");
1165 }
1166 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1167 test1.truncate(36); // ensure length()<getCapacity()
1168 test2=test1; // share the buffer
1169 test1.remove();
1170 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1171 errln("UnicodeString(shared buffer).remove() failed");
1172 }
1173 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1174 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1175 "modified another copy of the string!");
1176 }
1177
1178 // ticket #9740
1179 test1.setTo(TRUE, ucs, 3);
1180 assertEquals("length of read-only alias", 3, test1.length());
1181 test1.trim();
1182 assertEquals("length of read-only alias after trim()", 2, test1.length());
1183 assertEquals("length of terminated buffer of read-only alias + trim()",
1184 2, u_strlen(test1.getTerminatedBuffer()));
1185 }
1186
1187 void
TestStackAllocation()1188 UnicodeStringTest::TestStackAllocation()
1189 {
1190 UChar testString[] ={
1191 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1192 UChar guardWord = 0x4DED;
1193 UnicodeString* test = 0;
1194
1195 test = new UnicodeString(testString);
1196 if (*test != "This is a crazy test.")
1197 errln("Test string failed to initialize properly.");
1198 if (guardWord != 0x04DED)
1199 errln("Test string initialization overwrote guard word!");
1200
1201 test->insert(8, "only ");
1202 test->remove(15, 6);
1203 if (*test != "This is only a test.")
1204 errln("Manipulation of test string failed to work right.");
1205 if (guardWord != 0x4DED)
1206 errln("Manipulation of test string overwrote guard word!");
1207
1208 // we have to deinitialize and release the backing store by calling the destructor
1209 // explicitly, since we can't overload operator delete
1210 delete test;
1211
1212 UChar workingBuffer[] = {
1213 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1214 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1215 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1216 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1218 UChar guardWord2 = 0x4DED;
1219
1220 test = new UnicodeString(workingBuffer, 35, 100);
1221 if (*test != "Now is the time for all men to come")
1222 errln("Stack-allocated backing store failed to initialize correctly.");
1223 if (guardWord2 != 0x4DED)
1224 errln("Stack-allocated backing store overwrote guard word!");
1225
1226 test->insert(24, "good ");
1227 if (*test != "Now is the time for all good men to come")
1228 errln("insert() on stack-allocated UnicodeString didn't work right");
1229 if (guardWord2 != 0x4DED)
1230 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1231
1232 if (workingBuffer[24] != 0x67)
1233 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1234
1235 *test += " to the aid of their country.";
1236 if (*test != "Now is the time for all good men to come to the aid of their country.")
1237 errln("Stack-allocated UnicodeString overflow didn't work");
1238 if (guardWord2 != 0x4DED)
1239 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1240
1241 *test = "ha!";
1242 if (*test != "ha!")
1243 errln("Assignment to stack-allocated UnicodeString didn't work");
1244 if (workingBuffer[0] != 0x4e)
1245 errln("Change to UnicodeString after overflow are still affecting original buffer");
1246 if (guardWord2 != 0x4DED)
1247 errln("Change to UnicodeString after overflow overwrote guard word!");
1248
1249 // test read-only aliasing with setTo()
1250 workingBuffer[0] = 0x20ac;
1251 workingBuffer[1] = 0x125;
1252 workingBuffer[2] = 0;
1253 test->setTo(TRUE, workingBuffer, 2);
1254 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1255 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1256 }
1257
1258 UnicodeString *c=test->clone();
1259
1260 workingBuffer[1] = 0x109;
1261 if(test->charAt(1) != 0x109) {
1262 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1263 }
1264
1265 if(c->length() != 2 || c->charAt(1) != 0x125) {
1266 errln("clone(alias) did not copy the buffer");
1267 }
1268 delete c;
1269
1270 test->setTo(TRUE, workingBuffer, -1);
1271 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1272 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1273 }
1274
1275 test->setTo(FALSE, workingBuffer, -1);
1276 if(!test->isBogus()) {
1277 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1278 }
1279
1280 delete test;
1281
1282 test=new UnicodeString();
1283 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1284 test->setTo(buffer, 4, 10);
1285 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1286 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1287 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1288 }
1289 delete test;
1290
1291
1292 // test the UChar32 constructor
1293 UnicodeString c32Test((UChar32)0x10ff2a);
1294 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1295 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1296 ) {
1297 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1298 }
1299
1300 // test the (new) capacity constructor
1301 UnicodeString capTest(5, (UChar32)0x2a, 5);
1302 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1303 capTest.char32At(0) != 0x2a ||
1304 capTest.char32At(4) != 0x2a
1305 ) {
1306 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1307 }
1308
1309 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1310 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1311 capTest.char32At(0) != 0x10ff2a ||
1312 capTest.char32At(4) != 0x10ff2a
1313 ) {
1314 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1315 }
1316
1317 capTest = UnicodeString(5, (UChar32)0, 0);
1318 if(capTest.length() != 0) {
1319 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1320 }
1321 }
1322
1323 /**
1324 * Test the unescape() function.
1325 */
TestUnescape(void)1326 void UnicodeStringTest::TestUnescape(void) {
1327 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1328 UnicodeString OUT("abc");
1329 OUT.append((UChar)0x4567);
1330 OUT.append(" ");
1331 OUT.append((UChar)0xA);
1332 OUT.append((UChar)0xD);
1333 OUT.append(" ");
1334 OUT.append((UChar32)0x00101234);
1335 OUT.append("xyz");
1336 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1337 UnicodeString result = IN.unescape();
1338 if (result != OUT) {
1339 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1340 prettify(result) + ", expected " +
1341 prettify(OUT));
1342 }
1343
1344 // test that an empty string is returned in case of an error
1345 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1346 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1347 }
1348 }
1349
1350 /* test code point counting functions --------------------------------------- */
1351
1352 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1353 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1354 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1355 int32_t count=s.countChar32(start, length);
1356 return count>number;
1357 }
1358
1359 /* compare the real function against the reference */
1360 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1361 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1362 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1363 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1364 start, length, number, s.hasMoreChar32Than(start, length, number));
1365 }
1366 }
1367
1368 void
TestCountChar32(void)1369 UnicodeStringTest::TestCountChar32(void) {
1370 {
1371 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1372
1373 // test countChar32()
1374 // note that this also calls and tests u_countChar32(length>=0)
1375 if(
1376 s.countChar32()!=4 ||
1377 s.countChar32(1)!=4 ||
1378 s.countChar32(2)!=3 ||
1379 s.countChar32(2, 3)!=2 ||
1380 s.countChar32(2, 0)!=0
1381 ) {
1382 errln("UnicodeString::countChar32() failed");
1383 }
1384
1385 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1386 const UChar *buffer=s.getTerminatedBuffer();
1387 if(
1388 u_countChar32(buffer, -1)!=4 ||
1389 u_countChar32(buffer+1, -1)!=4 ||
1390 u_countChar32(buffer+2, -1)!=3 ||
1391 u_countChar32(buffer+3, -1)!=3 ||
1392 u_countChar32(buffer+4, -1)!=2 ||
1393 u_countChar32(buffer+5, -1)!=1 ||
1394 u_countChar32(buffer+6, -1)!=0
1395 ) {
1396 errln("u_countChar32(length=-1) failed");
1397 }
1398
1399 // test u_countChar32() with bad input
1400 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1401 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1402 }
1403 }
1404
1405 /* test data and variables for hasMoreChar32Than() */
1406 static const UChar str[]={
1407 0x61, 0x62, 0xd800, 0xdc00,
1408 0xd801, 0xdc01, 0x63, 0xd802,
1409 0x64, 0xdc03, 0x65, 0x66,
1410 0xd804, 0xdc04, 0xd805, 0xdc05,
1411 0x67
1412 };
1413 UnicodeString string(str, UPRV_LENGTHOF(str));
1414 int32_t start, length, number;
1415
1416 /* test hasMoreChar32Than() */
1417 for(length=string.length(); length>=0; --length) {
1418 for(start=0; start<=length; ++start) {
1419 for(number=-1; number<=((length-start)+2); ++number) {
1420 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1421 }
1422 }
1423 }
1424
1425 /* test hasMoreChar32Than() with pinning */
1426 for(start=-1; start<=string.length()+1; ++start) {
1427 for(number=-1; number<=((string.length()-start)+2); ++number) {
1428 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1429 }
1430 }
1431
1432 /* test hasMoreChar32Than() with a bogus string */
1433 string.setToBogus();
1434 for(length=-1; length<=1; ++length) {
1435 for(start=-1; start<=length; ++start) {
1436 for(number=-1; number<=((length-start)+2); ++number) {
1437 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1438 }
1439 }
1440 }
1441 }
1442
1443 void
TestBogus()1444 UnicodeStringTest::TestBogus() {
1445 UnicodeString test1("This is a test");
1446 UnicodeString test2("This is a test");
1447 UnicodeString test3("Me too!");
1448
1449 // test isBogus() and setToBogus()
1450 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1451 errln("A string returned TRUE for isBogus()!");
1452 }
1453
1454 // NULL pointers are treated like empty strings
1455 // use other illegal arguments to make a bogus string
1456 test3.setTo(FALSE, test1.getBuffer(), -2);
1457 if(!test3.isBogus()) {
1458 errln("A bogus string returned FALSE for isBogus()!");
1459 }
1460 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1461 errln("hashCode() failed");
1462 }
1463 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1464 errln("bogus.getBuffer()!=0");
1465 }
1466 if (test1.indexOf(test3) != -1) {
1467 errln("bogus.indexOf() != -1");
1468 }
1469 if (test1.lastIndexOf(test3) != -1) {
1470 errln("bogus.lastIndexOf() != -1");
1471 }
1472 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1473 errln("caseCompare() doesn't work with bogus strings");
1474 }
1475 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1476 errln("compareCodePointOrder() doesn't work with bogus strings");
1477 }
1478
1479 // verify that non-assignment modifications fail and do not revive a bogus string
1480 test3.setToBogus();
1481 test3.append((UChar)0x61);
1482 if(!test3.isBogus() || test3.getBuffer()!=0) {
1483 errln("bogus.append('a') worked but must not");
1484 }
1485
1486 test3.setToBogus();
1487 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1488 if(!test3.isBogus() || test3.getBuffer()!=0) {
1489 errln("bogus.findAndReplace() worked but must not");
1490 }
1491
1492 test3.setToBogus();
1493 test3.trim();
1494 if(!test3.isBogus() || test3.getBuffer()!=0) {
1495 errln("bogus.trim() revived bogus but must not");
1496 }
1497
1498 test3.setToBogus();
1499 test3.remove(1);
1500 if(!test3.isBogus() || test3.getBuffer()!=0) {
1501 errln("bogus.remove(1) revived bogus but must not");
1502 }
1503
1504 test3.setToBogus();
1505 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1506 errln("bogus.setCharAt(0, 'b') worked but must not");
1507 }
1508
1509 test3.setToBogus();
1510 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1511 errln("bogus.truncate(1) revived bogus but must not");
1512 }
1513
1514 // verify that assignments revive a bogus string
1515 test3.setToBogus();
1516 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1517 errln("bogus.operator=() failed");
1518 }
1519
1520 test3.setToBogus();
1521 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1522 errln("bogus.fastCopyFrom() failed");
1523 }
1524
1525 test3.setToBogus();
1526 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1527 errln("bogus.setTo(UniStr) failed");
1528 }
1529
1530 test3.setToBogus();
1531 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1532 errln("bogus.setTo(UniStr, 0) failed");
1533 }
1534
1535 test3.setToBogus();
1536 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1537 errln("bogus.setTo(UniStr, 0, len) failed");
1538 }
1539
1540 test3.setToBogus();
1541 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1542 errln("bogus.setTo(const UChar *, len) failed");
1543 }
1544
1545 test3.setToBogus();
1546 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1547 errln("bogus.setTo(UChar) failed");
1548 }
1549
1550 test3.setToBogus();
1551 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1552 errln("bogus.setTo(UChar32) failed");
1553 }
1554
1555 test3.setToBogus();
1556 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1557 errln("bogus.setTo(readonly alias) failed");
1558 }
1559
1560 // writable alias to another string's buffer: very bad idea, just convenient for this test
1561 test3.setToBogus();
1562 if(!test3.isBogus() ||
1563 test3.setTo(const_cast<UChar *>(test1.getBuffer()),
1564 test1.length(), test1.getCapacity()).isBogus() ||
1565 test3!=test1) {
1566 errln("bogus.setTo(writable alias) failed");
1567 }
1568
1569 // verify simple, documented ways to turn a bogus string into an empty one
1570 test3.setToBogus();
1571 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1572 errln("bogus.operator=(UnicodeString()) failed");
1573 }
1574
1575 test3.setToBogus();
1576 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1577 errln("bogus.setTo(UnicodeString()) failed");
1578 }
1579
1580 test3.setToBogus();
1581 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1582 errln("bogus.remove() failed");
1583 }
1584
1585 test3.setToBogus();
1586 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1587 errln("bogus.remove(0, INT32_MAX) failed");
1588 }
1589
1590 test3.setToBogus();
1591 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1592 errln("bogus.truncate(0) failed");
1593 }
1594
1595 test3.setToBogus();
1596 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1597 errln("bogus.setTo((UChar32)-1) failed");
1598 }
1599
1600 static const UChar nul=0;
1601
1602 test3.setToBogus();
1603 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1604 errln("bogus.setTo(&nul, 0) failed");
1605 }
1606
1607 test3.setToBogus();
1608 if(!test3.isBogus() || test3.getBuffer()!=0) {
1609 errln("setToBogus() failed to make a string bogus");
1610 }
1611
1612 test3.setToBogus();
1613 if(test1.isBogus() || !(test1=test3).isBogus()) {
1614 errln("normal=bogus failed to make the left string bogus");
1615 }
1616
1617 // test that NULL primitive input string values are treated like
1618 // empty strings, not errors (bogus)
1619 test2.setTo((UChar32)0x10005);
1620 if(test2.insert(1, nullptr, 1).length()!=2) {
1621 errln("UniStr.insert(...nullptr...) should not modify the string but does");
1622 }
1623
1624 UErrorCode errorCode=U_ZERO_ERROR;
1625 UnicodeString
1626 test4((const UChar *)NULL),
1627 test5(TRUE, (const UChar *)NULL, 1),
1628 test6((UChar *)NULL, 5, 5),
1629 test7((const char *)NULL, 3, NULL, errorCode);
1630 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1631 errln("a constructor set to bogus for a NULL input string, should be empty");
1632 }
1633
1634 test4.setTo(NULL, 3);
1635 test5.setTo(TRUE, (const UChar *)NULL, 1);
1636 test6.setTo((UChar *)NULL, 5, 5);
1637 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1638 errln("a setTo() set to bogus for a NULL input string, should be empty");
1639 }
1640
1641 // test that bogus==bogus<any
1642 if(test1!=test3 || test1.compare(test3)!=0) {
1643 errln("bogus==bogus failed");
1644 }
1645
1646 test2.remove();
1647 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1648 errln("bogus<empty failed");
1649 }
1650 }
1651
1652 // StringEnumeration ------------------------------------------------------- ***
1653 // most of StringEnumeration is tested elsewhere
1654 // this test improves code coverage
1655
1656 static const char *const
1657 testEnumStrings[]={
1658 "a",
1659 "b",
1660 "c",
1661 "this is a long string which helps us test some buffer limits",
1662 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1663 };
1664
1665 class TestEnumeration : public StringEnumeration {
1666 public:
TestEnumeration()1667 TestEnumeration() : i(0) {}
1668
count(UErrorCode &) const1669 virtual int32_t count(UErrorCode& /*status*/) const {
1670 return UPRV_LENGTHOF(testEnumStrings);
1671 }
1672
snext(UErrorCode & status)1673 virtual const UnicodeString *snext(UErrorCode &status) {
1674 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1675 unistr=UnicodeString(testEnumStrings[i++], "");
1676 return &unistr;
1677 }
1678
1679 return NULL;
1680 }
1681
reset(UErrorCode &)1682 virtual void reset(UErrorCode& /*status*/) {
1683 i=0;
1684 }
1685
getStaticClassID()1686 static inline UClassID getStaticClassID() {
1687 return (UClassID)&fgClassID;
1688 }
getDynamicClassID() const1689 virtual UClassID getDynamicClassID() const {
1690 return getStaticClassID();
1691 }
1692
1693 private:
1694 static const char fgClassID;
1695
1696 int32_t i;
1697 };
1698
1699 const char TestEnumeration::fgClassID=0;
1700
1701 void
TestStringEnumeration()1702 UnicodeStringTest::TestStringEnumeration() {
1703 UnicodeString s;
1704 TestEnumeration ten;
1705 int32_t i, length;
1706 UErrorCode status;
1707
1708 const UChar *pu;
1709 const char *pc;
1710
1711 // test the next() default implementation and ensureCharsCapacity()
1712 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1713 status=U_ZERO_ERROR;
1714 pc=ten.next(&length, status);
1715 s=UnicodeString(testEnumStrings[i], "");
1716 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1717 errln("StringEnumeration.next(%d) failed", i);
1718 }
1719 }
1720 status=U_ZERO_ERROR;
1721 if(ten.next(&length, status)!=NULL) {
1722 errln("StringEnumeration.next(done)!=NULL");
1723 }
1724
1725 // test the unext() default implementation
1726 ten.reset(status);
1727 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1728 status=U_ZERO_ERROR;
1729 pu=ten.unext(&length, status);
1730 s=UnicodeString(testEnumStrings[i], "");
1731 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1732 errln("StringEnumeration.unext(%d) failed", i);
1733 }
1734 }
1735 status=U_ZERO_ERROR;
1736 if(ten.unext(&length, status)!=NULL) {
1737 errln("StringEnumeration.unext(done)!=NULL");
1738 }
1739
1740 // test that the default clone() implementation works, and returns NULL
1741 if(ten.clone()!=NULL) {
1742 errln("StringEnumeration.clone()!=NULL");
1743 }
1744
1745 // test that uenum_openFromStringEnumeration() works
1746 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1747 StringEnumeration *newTen = new TestEnumeration;
1748 status=U_ZERO_ERROR;
1749 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1750 if (uten==NULL || U_FAILURE(status)) {
1751 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1752 return;
1753 }
1754
1755 // test uenum_next()
1756 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1757 status=U_ZERO_ERROR;
1758 pc=uenum_next(uten, &length, &status);
1759 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1760 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1761 }
1762 }
1763 status=U_ZERO_ERROR;
1764 if(uenum_next(uten, &length, &status)!=NULL) {
1765 errln("File %s, line %d, uenum_next(done)!=NULL");
1766 }
1767
1768 // test the uenum_unext()
1769 uenum_reset(uten, &status);
1770 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1771 status=U_ZERO_ERROR;
1772 pu=uenum_unext(uten, &length, &status);
1773 s=UnicodeString(testEnumStrings[i], "");
1774 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1775 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1776 }
1777 }
1778 status=U_ZERO_ERROR;
1779 if(uenum_unext(uten, &length, &status)!=NULL) {
1780 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1781 }
1782
1783 uenum_close(uten);
1784 }
1785
1786 /*
1787 * Namespace test, to make sure that macros like UNICODE_STRING include the
1788 * namespace qualifier.
1789 *
1790 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1791 */
1792 namespace bogus {
1793 class UnicodeString {
1794 public:
1795 enum EInvariant { kInvariant };
UnicodeString()1796 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1797 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1798 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1799 ) : i(length) {}
1800 private:
1801 int32_t i;
1802 };
1803 }
1804
1805 void
TestNameSpace()1806 UnicodeStringTest::TestNameSpace() {
1807 // Provoke name collision unless the UnicodeString macros properly
1808 // qualify the icu::UnicodeString class.
1809 using namespace bogus;
1810
1811 // Use all UnicodeString macros from unistr.h.
1812 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1813 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1814 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1815
1816 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1817 icu::UnicodeString s4=s1+s2+s3;
1818 if(s4.length()!=9) {
1819 errln("Something wrong with UnicodeString::operator+().");
1820 }
1821 }
1822
1823 void
TestUTF32()1824 UnicodeStringTest::TestUTF32() {
1825 // Input string length US_STACKBUF_SIZE to cause overflow of the
1826 // initially chosen fStackBuffer due to supplementary characters.
1827 static const UChar32 utf32[] = {
1828 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1829 0x10000, 0x20000, 0xe0000, 0x10ffff
1830 };
1831 static const UChar expected_utf16[] = {
1832 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1833 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1834 };
1835 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1836 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1837 if(from32 != expected) {
1838 errln("UnicodeString::fromUTF32() did not create the expected string.");
1839 }
1840
1841 static const UChar utf16[] = {
1842 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1843 };
1844 static const UChar32 expected_utf32[] = {
1845 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1846 };
1847 UChar32 result32[16];
1848 UErrorCode errorCode = U_ZERO_ERROR;
1849 int32_t length32 =
1850 UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1851 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1852 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1853 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1854 result32[length32] != 0
1855 ) {
1856 errln("UnicodeString::toUTF32() did not create the expected string.");
1857 }
1858 }
1859
1860 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1861 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1862 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1863 : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1864 virtual void Flush() { calledFlush = TRUE; }
1865 UBool calledFlush;
1866 };
1867
1868 void
TestUTF8()1869 UnicodeStringTest::TestUTF8() {
1870 static const uint8_t utf8[] = {
1871 // Code points:
1872 // 0x41, 0xd900,
1873 // 0x61, 0xdc00,
1874 // 0x110000, 0x5a,
1875 // 0x50000, 0x7a,
1876 // 0x10000, 0x20000,
1877 // 0xe0000, 0x10ffff
1878 0x41, 0xed, 0xa4, 0x80,
1879 0x61, 0xed, 0xb0, 0x80,
1880 0xf4, 0x90, 0x80, 0x80, 0x5a,
1881 0xf1, 0x90, 0x80, 0x80, 0x7a,
1882 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1883 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1884 };
1885 static const UChar expected_utf16[] = {
1886 0x41, 0xfffd, 0xfffd, 0xfffd,
1887 0x61, 0xfffd, 0xfffd, 0xfffd,
1888 0xfffd, 0xfffd, 0xfffd, 0xfffd,0x5a,
1889 0xd900, 0xdc00, 0x7a,
1890 0xd800, 0xdc00, 0xd840, 0xdc00,
1891 0xdb40, 0xdc00, 0xdbff, 0xdfff
1892 };
1893 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1894 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1895
1896 if(from8 != expected) {
1897 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1898 }
1899 std::string utf8_string((const char *)utf8, sizeof(utf8));
1900 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1901 if(from8b != expected) {
1902 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1903 }
1904
1905 static const UChar utf16[] = {
1906 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1907 };
1908 static const uint8_t expected_utf8[] = {
1909 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1910 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1911 };
1912 UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1913
1914 char buffer[64];
1915 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1916 us.toUTF8(sink);
1917 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1918 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1919 ) {
1920 errln("UnicodeString::toUTF8() did not create the expected string.");
1921 }
1922 if(!sink.calledFlush) {
1923 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1924 }
1925 // Initial contents for testing that toUTF8String() appends.
1926 std::string result8 = "-->";
1927 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1928 // Use the return value just for testing.
1929 std::string &result8r = us.toUTF8String(result8);
1930 if(result8r != expected8 || &result8r != &result8) {
1931 errln("UnicodeString::toUTF8String() did not create the expected string.");
1932 }
1933 }
1934
1935 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1936 static UnicodeString wrapUChars(const UChar *uchars) {
1937 return UnicodeString(TRUE, uchars, -1);
1938 }
1939
1940 void
TestReadOnlyAlias()1941 UnicodeStringTest::TestReadOnlyAlias() {
1942 UChar uchars[]={ 0x61, 0x62, 0 };
1943 UnicodeString alias(TRUE, uchars, 2);
1944 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1945 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1946 return;
1947 }
1948 alias.truncate(1);
1949 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1950 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1951 }
1952 if(alias.getTerminatedBuffer()==uchars) {
1953 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1954 "did not allocate and copy as expected.");
1955 }
1956 if(uchars[1]!=0x62) {
1957 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1958 "modified the original buffer.");
1959 }
1960 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1961 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1962 "does not return a buffer terminated at the proper length.");
1963 }
1964
1965 alias.setTo(TRUE, uchars, 2);
1966 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1967 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1968 return;
1969 }
1970 alias.remove();
1971 if(alias.length()!=0) {
1972 errln("UnicodeString(read-only-alias).remove() did not work.");
1973 }
1974 if(alias.getTerminatedBuffer()==uchars) {
1975 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1976 "did not un-alias as expected.");
1977 }
1978 if(uchars[0]!=0x61) {
1979 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1980 "modified the original buffer.");
1981 }
1982 if(0!=u_strlen(alias.getTerminatedBuffer())) {
1983 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1984 "does not return a buffer terminated at length 0.");
1985 }
1986
1987 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1988 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1989 alias.remove(0, 10);
1990 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1991 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1992 }
1993 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1994 alias.remove(27, 99);
1995 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1996 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1997 }
1998 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1999 alias.retainBetween(6, 30);
2000 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
2001 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
2002 }
2003
2004 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
2005 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
2006
2007 UnicodeString temp;
2008 temp.fastCopyFrom(longString.tempSubString());
2009 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2010 errln("UnicodeString.tempSubString() failed");
2011 }
2012 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2013 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2014 errln("UnicodeString.tempSubString(-3, 5) failed");
2015 }
2016 temp.fastCopyFrom(longString.tempSubString(17));
2017 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2018 errln("UnicodeString.tempSubString(17) failed");
2019 }
2020 temp.fastCopyFrom(longString.tempSubString(99));
2021 if(!temp.isEmpty()) {
2022 errln("UnicodeString.tempSubString(99) failed");
2023 }
2024 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2025 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2026 errln("UnicodeString.tempSubStringBetween(6) failed");
2027 }
2028 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2029 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2030 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2031 }
2032 UnicodeString bogusString;
2033 bogusString.setToBogus();
2034 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2035 if(!temp.isBogus()) {
2036 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2037 }
2038 }
2039
2040 void
doTestAppendable(UnicodeString & dest,Appendable & app)2041 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2042 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2043 static const UChar fg[3]={ 0x66, 0x67, 0 };
2044 if(!app.reserveAppendCapacity(12)) {
2045 errln("Appendable.reserve(12) failed");
2046 }
2047 app.appendCodeUnit(0x61);
2048 app.appendCodePoint(0x62);
2049 app.appendCodePoint(0x50000);
2050 app.appendString(cde, 3);
2051 app.appendString(fg, -1);
2052 UChar scratch[3];
2053 int32_t capacity=-1;
2054 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2055 if(capacity<3) {
2056 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2057 return;
2058 }
2059 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2060 u_memcpy(buffer, hij, 3);
2061 app.appendString(buffer, 3);
2062 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2063 errln("Appendable.append(...) failed");
2064 }
2065 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2066 if(buffer!=NULL || capacity!=0) {
2067 errln("Appendable.getAppendBuffer(min=0) failed");
2068 }
2069 capacity=1;
2070 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2071 if(buffer!=NULL || capacity!=0) {
2072 errln("Appendable.getAppendBuffer(scratch<min) failed");
2073 }
2074 }
2075
2076 class SimpleAppendable : public Appendable {
2077 public:
SimpleAppendable(UnicodeString & dest)2078 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2079 virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2080 SimpleAppendable &reset() { str.remove(); return *this; }
2081 private:
2082 UnicodeString &str;
2083 };
2084
2085 void
TestAppendable()2086 UnicodeStringTest::TestAppendable() {
2087 UnicodeString dest;
2088 SimpleAppendable app(dest);
2089 doTestAppendable(dest, app);
2090 }
2091
2092 void
TestUnicodeStringImplementsAppendable()2093 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2094 UnicodeString dest;
2095 UnicodeStringAppendable app(dest);
2096 doTestAppendable(dest, app);
2097 }
2098
2099 void
TestSizeofUnicodeString()2100 UnicodeStringTest::TestSizeofUnicodeString() {
2101 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2102 // See the API comments for UNISTR_OBJECT_SIZE.
2103 size_t sizeofUniStr=sizeof(UnicodeString);
2104 size_t expected=UNISTR_OBJECT_SIZE;
2105 if(expected!=sizeofUniStr) {
2106 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2107 // of the compiler might add more internal padding than expected.
2108 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2109 (int)sizeofUniStr, (int)expected);
2110 }
2111 if(sizeofUniStr<32) {
2112 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2113 }
2114 // We assume that the entire UnicodeString object,
2115 // minus the vtable pointer and 2 bytes for flags and short length,
2116 // is available for internal storage of UChars.
2117 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2118 UnicodeString s;
2119 const UChar *emptyBuffer=s.getBuffer();
2120 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2121 s.append((UChar)0x2e);
2122 }
2123 const UChar *fullBuffer=s.getBuffer();
2124 if(fullBuffer!=emptyBuffer) {
2125 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2126 expectedStackBufferLength);
2127 }
2128 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2129 if(terminatedBuffer==emptyBuffer) {
2130 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2131 expectedStackBufferLength);
2132 }
2133 }
2134
2135 // Try to avoid clang -Wself-move warnings from s1 = std::move(s1);
moveFrom(UnicodeString & dest,UnicodeString & src)2136 void moveFrom(UnicodeString &dest, UnicodeString &src) {
2137 dest = std::move(src);
2138 }
2139
2140 void
TestMoveSwap()2141 UnicodeStringTest::TestMoveSwap() {
2142 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2143 UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias
2144 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2145 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2146 const UChar *p = s2.getBuffer();
2147 s1.swap(s2);
2148 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2149 errln("UnicodeString.swap() did not swap");
2150 }
2151 swap(s2, s3);
2152 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2153 errln("swap(UnicodeString) did not swap back");
2154 }
2155 UnicodeString s4;
2156 s4 = std::move(s1);
2157 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2158 errln("UnicodeString = std::move(heap) did not move");
2159 }
2160 UnicodeString s5;
2161 s5 = std::move(s2);
2162 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2163 errln("UnicodeString = std::move(stack) did not move");
2164 }
2165 UnicodeString s6;
2166 s6 = std::move(s3);
2167 if(s6.getBuffer() != abc || s6.length() != 3) {
2168 errln("UnicodeString = std::move(alias) did not move");
2169 }
2170 infoln("TestMoveSwap() with rvalue references");
2171 s1 = static_cast<UnicodeString &&>(s6);
2172 if(s1.getBuffer() != abc || s1.length() != 3) {
2173 errln("UnicodeString move assignment operator did not move");
2174 }
2175 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2176 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2177 errln("UnicodeString move constructor did not move");
2178 }
2179
2180 // Move self assignment leaves the object valid but in an undefined state.
2181 // Do it to make sure there is no crash,
2182 // but do not check for any particular resulting value.
2183 moveFrom(s1, s1);
2184 moveFrom(s2, s2);
2185 moveFrom(s3, s3);
2186 moveFrom(s4, s4);
2187 moveFrom(s5, s5);
2188 moveFrom(s6, s6);
2189 moveFrom(s7, s7);
2190 // Simple copy assignment must work.
2191 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2192 s1 = s6 = s4 = s7 = simple;
2193 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2194 errln("UnicodeString copy after self-move did not work");
2195 }
2196 }
2197
2198 void
TestUInt16Pointers()2199 UnicodeStringTest::TestUInt16Pointers() {
2200 static const uint16_t carr[] = { 0x61, 0x62, 0x63, 0 };
2201 uint16_t arr[4];
2202
2203 UnicodeString expected(u"abc");
2204 assertEquals("abc from pointer", expected, UnicodeString(carr));
2205 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2206 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2207
2208 UnicodeString alias(arr, 0, 4);
2209 alias.append(u'a').append(u'b').append(u'c');
2210 assertEquals("abc from writable alias", expected, alias);
2211 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2212
2213 UErrorCode errorCode = U_ZERO_ERROR;
2214 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2215 assertSuccess(WHERE, errorCode);
2216 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2217 }
2218
2219 void
TestWCharPointers()2220 UnicodeStringTest::TestWCharPointers() {
2221 #if U_SIZEOF_WCHAR_T==2
2222 static const wchar_t carr[] = { 0x61, 0x62, 0x63, 0 };
2223 wchar_t arr[4];
2224
2225 UnicodeString expected(u"abc");
2226 assertEquals("abc from pointer", expected, UnicodeString(carr));
2227 assertEquals("abc from pointer+length", expected, UnicodeString(carr, 3));
2228 assertEquals("abc from read-only-alias pointer", expected, UnicodeString(TRUE, carr, 3));
2229
2230 UnicodeString alias(arr, 0, 4);
2231 alias.append(u'a').append(u'b').append(u'c');
2232 assertEquals("abc from writable alias", expected, alias);
2233 assertEquals("buffer=abc from writable alias", expected, UnicodeString(arr, 3));
2234
2235 UErrorCode errorCode = U_ZERO_ERROR;
2236 int32_t length = UnicodeString(u"def").extract(arr, 4, errorCode);
2237 assertSuccess(WHERE, errorCode);
2238 assertEquals("def from extract()", UnicodeString(u"def"), UnicodeString(arr, length));
2239 #endif
2240 }
2241
2242 void
TestNullPointers()2243 UnicodeStringTest::TestNullPointers() {
2244 assertTrue("empty from nullptr", UnicodeString(nullptr).isEmpty());
2245 assertTrue("empty from nullptr+length", UnicodeString(nullptr, 2).isEmpty());
2246 assertTrue("empty from read-only-alias nullptr", UnicodeString(TRUE, nullptr, 3).isEmpty());
2247
2248 UnicodeString alias(nullptr, 4, 4); // empty, no alias
2249 assertTrue("empty from writable alias", alias.isEmpty());
2250 alias.append(u'a').append(u'b').append(u'c');
2251 UnicodeString expected(u"abc");
2252 assertEquals("abc from writable alias", expected, alias);
2253
2254 UErrorCode errorCode = U_ZERO_ERROR;
2255 UnicodeString(u"def").extract(nullptr, 0, errorCode);
2256 assertEquals("buffer overflow extracting to nullptr", U_BUFFER_OVERFLOW_ERROR, errorCode);
2257 }
2258
TestUnicodeStringInsertAppendToSelf()2259 void UnicodeStringTest::TestUnicodeStringInsertAppendToSelf() {
2260 IcuTestErrorCode status(*this, "TestUnicodeStringAppendToSelf");
2261
2262 // Test append operation
2263 UnicodeString str(u"foo ");
2264 str.append(str);
2265 str.append(str);
2266 str.append(str);
2267 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2268
2269 // Test append operation with readonly alias to start
2270 str = UnicodeString(TRUE, u"foo ", 4);
2271 str.append(str);
2272 str.append(str);
2273 str.append(str);
2274 assertEquals("", u"foo foo foo foo foo foo foo foo ", str);
2275
2276 // Test append operation with aliased substring
2277 str = u"abcde";
2278 UnicodeString sub = str.tempSubString(1, 2);
2279 str.append(sub);
2280 assertEquals("", u"abcdebc", str);
2281
2282 // Test append operation with double-aliased substring
2283 str = UnicodeString(TRUE, u"abcde", 5);
2284 sub = str.tempSubString(1, 2);
2285 str.append(sub);
2286 assertEquals("", u"abcdebc", str);
2287
2288 // Test insert operation
2289 str = u"a-*b";
2290 str.insert(2, str);
2291 str.insert(4, str);
2292 str.insert(8, str);
2293 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2294
2295 // Test insert operation with readonly alias to start
2296 str = UnicodeString(TRUE, u"a-*b", 4);
2297 str.insert(2, str);
2298 str.insert(4, str);
2299 str.insert(8, str);
2300 assertEquals("", u"a-a-a-a-a-a-a-a-*b*b*b*b*b*b*b*b", str);
2301
2302 // Test insert operation with aliased substring
2303 str = u"abcde";
2304 sub = str.tempSubString(1, 3);
2305 str.insert(2, sub);
2306 assertEquals("", u"abbcdcde", str);
2307
2308 // Test insert operation with double-aliased substring
2309 str = UnicodeString(TRUE, u"abcde", 5);
2310 sub = str.tempSubString(1, 3);
2311 str.insert(2, sub);
2312 assertEquals("", u"abbcdcde", str);
2313 }
2314
TestLargeAppend()2315 void UnicodeStringTest::TestLargeAppend() {
2316 if(quick) return;
2317
2318 IcuTestErrorCode status(*this, "TestLargeAppend");
2319 // Make a large UnicodeString
2320 int32_t len = 0xAFFFFFF;
2321 UnicodeString str;
2322 char16_t *buf = str.getBuffer(len);
2323 // A fast way to set buffer to valid Unicode.
2324 // 4E4E is a valid unicode character
2325 uprv_memset(buf, 0x4e, len * 2);
2326 str.releaseBuffer(len);
2327 UnicodeString dest;
2328 // Append it 16 times
2329 // 0xAFFFFFF times 16 is 0xA4FFFFF1,
2330 // which is greater than INT32_MAX, which is 0x7FFFFFFF.
2331 int64_t total = 0;
2332 for (int32_t i = 0; i < 16; i++) {
2333 dest.append(str);
2334 total += len;
2335 if (total <= INT32_MAX) {
2336 assertFalse("dest is not bogus", dest.isBogus());
2337 } else {
2338 assertTrue("dest should be bogus", dest.isBogus());
2339 }
2340 }
2341 dest.remove();
2342 total = 0;
2343 for (int32_t i = 0; i < 16; i++) {
2344 dest.append(str);
2345 total += len;
2346 if (total + len <= INT32_MAX) {
2347 assertFalse("dest is not bogus", dest.isBogus());
2348 } else if (total <= INT32_MAX) {
2349 // Check that a string of exactly the maximum size works
2350 UnicodeString str2;
2351 int32_t remain = INT32_MAX - total;
2352 char16_t *buf2 = str2.getBuffer(remain);
2353 if (buf2 == nullptr) {
2354 // if somehow memory allocation fail, return the test
2355 return;
2356 }
2357 uprv_memset(buf2, 0x4e, remain * 2);
2358 str2.releaseBuffer(remain);
2359 dest.append(str2);
2360 total += remain;
2361 assertEquals("When a string of exactly the maximum size works", (int64_t)INT32_MAX, total);
2362 assertEquals("When a string of exactly the maximum size works", INT32_MAX, dest.length());
2363 assertFalse("dest is not bogus", dest.isBogus());
2364
2365 // Check that a string size+1 goes bogus
2366 str2.truncate(1);
2367 dest.append(str2);
2368 total++;
2369 assertTrue("dest should be bogus", dest.isBogus());
2370 } else {
2371 assertTrue("dest should be bogus", dest.isBogus());
2372 }
2373 }
2374 }
2375