1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (c) 1997-2015, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6
7 #include "ustrtest.h"
8 #include "unicode/appendable.h"
9 #include "unicode/std_string.h"
10 #include "unicode/unistr.h"
11 #include "unicode/uchar.h"
12 #include "unicode/ustring.h"
13 #include "unicode/locid.h"
14 #include "unicode/ucnv.h"
15 #include "unicode/uenum.h"
16 #include "unicode/utf16.h"
17 #include "cmemory.h"
18 #include "charstr.h"
19
20 #if 0
21 #include "unicode/ustream.h"
22
23 #include <iostream>
24 using namespace std;
25
26 #endif
27
~UnicodeStringTest()28 UnicodeStringTest::~UnicodeStringTest() {}
29
runIndexedTest(int32_t index,UBool exec,const char * & name,char * par)30 void UnicodeStringTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char *par)
31 {
32 if (exec) logln("TestSuite UnicodeStringTest: ");
33 TESTCASE_AUTO_BEGIN;
34 TESTCASE_AUTO_CLASS(StringCaseTest);
35 TESTCASE_AUTO(TestBasicManipulation);
36 TESTCASE_AUTO(TestCompare);
37 TESTCASE_AUTO(TestExtract);
38 TESTCASE_AUTO(TestRemoveReplace);
39 TESTCASE_AUTO(TestSearching);
40 TESTCASE_AUTO(TestSpacePadding);
41 TESTCASE_AUTO(TestPrefixAndSuffix);
42 TESTCASE_AUTO(TestFindAndReplace);
43 TESTCASE_AUTO(TestBogus);
44 TESTCASE_AUTO(TestReverse);
45 TESTCASE_AUTO(TestMiscellaneous);
46 TESTCASE_AUTO(TestStackAllocation);
47 TESTCASE_AUTO(TestUnescape);
48 TESTCASE_AUTO(TestCountChar32);
49 TESTCASE_AUTO(TestStringEnumeration);
50 TESTCASE_AUTO(TestNameSpace);
51 TESTCASE_AUTO(TestUTF32);
52 TESTCASE_AUTO(TestUTF8);
53 TESTCASE_AUTO(TestReadOnlyAlias);
54 TESTCASE_AUTO(TestAppendable);
55 TESTCASE_AUTO(TestUnicodeStringImplementsAppendable);
56 TESTCASE_AUTO(TestSizeofUnicodeString);
57 TESTCASE_AUTO(TestStartsWithAndEndsWithNulTerminated);
58 TESTCASE_AUTO(TestMoveSwap);
59 TESTCASE_AUTO_END;
60 }
61
62 void
TestBasicManipulation()63 UnicodeStringTest::TestBasicManipulation()
64 {
65 UnicodeString test1("Now is the time for all men to come swiftly to the aid of the party.\n");
66 UnicodeString expectedValue;
67 UnicodeString *c;
68
69 c=(UnicodeString *)test1.clone();
70 test1.insert(24, "good ");
71 expectedValue = "Now is the time for all good men to come swiftly to the aid of the party.\n";
72 if (test1 != expectedValue)
73 errln("insert() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
74
75 c->insert(24, "good ");
76 if(*c != expectedValue) {
77 errln("clone()->insert() failed: expected \"" + expectedValue + "\"\n,got \"" + *c + "\"");
78 }
79 delete c;
80
81 test1.remove(41, 8);
82 expectedValue = "Now is the time for all good men to come to the aid of the party.\n";
83 if (test1 != expectedValue)
84 errln("remove() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
85
86 test1.replace(58, 6, "ir country");
87 expectedValue = "Now is the time for all good men to come to the aid of their country.\n";
88 if (test1 != expectedValue)
89 errln("replace() failed: expected \"" + expectedValue + "\"\n,got \"" + test1 + "\"");
90
91 UChar temp[80];
92 test1.extract(0, 15, temp);
93
94 UnicodeString test2(temp, 15);
95
96 expectedValue = "Now is the time";
97 if (test2 != expectedValue)
98 errln("extract() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
99
100 test2 += " for me to go!\n";
101 expectedValue = "Now is the time for me to go!\n";
102 if (test2 != expectedValue)
103 errln("operator+=() failed: expected \"" + expectedValue + "\"\n,got \"" + test2 + "\"");
104
105 if (test1.length() != 70)
106 errln(UnicodeString("length() failed: expected 70, got ") + test1.length());
107 if (test2.length() != 30)
108 errln(UnicodeString("length() failed: expected 30, got ") + test2.length());
109
110 UnicodeString test3;
111 test3.append((UChar32)0x20402);
112 if(test3 != CharsToUnicodeString("\\uD841\\uDC02")){
113 errln((UnicodeString)"append failed for UChar32, expected \"\\\\ud841\\\\udc02\", got " + prettify(test3));
114 }
115 if(test3.length() != 2){
116 errln(UnicodeString("append or length failed for UChar32, expected 2, got ") + test3.length());
117 }
118 test3.append((UChar32)0x0074);
119 if(test3 != CharsToUnicodeString("\\uD841\\uDC02t")){
120 errln((UnicodeString)"append failed for UChar32, expected \"\\\\uD841\\\\uDC02t\", got " + prettify(test3));
121 }
122 if(test3.length() != 3){
123 errln((UnicodeString)"append or length failed for UChar32, expected 2, got " + test3.length());
124 }
125
126 // test some UChar32 overloads
127 if( test3.setTo((UChar32)0x10330).length() != 2 ||
128 test3.insert(0, (UChar32)0x20100).length() != 4 ||
129 test3.replace(2, 2, (UChar32)0xe0061).length() != 4 ||
130 (test3 = (UChar32)0x14001).length() != 2
131 ) {
132 errln((UnicodeString)"simple UChar32 overloads for replace, insert, setTo or = failed");
133 }
134
135 {
136 // test moveIndex32()
137 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
138
139 if(
140 s.moveIndex32(2, -1)!=0 ||
141 s.moveIndex32(2, 1)!=4 ||
142 s.moveIndex32(2, 2)!=5 ||
143 s.moveIndex32(5, -2)!=2 ||
144 s.moveIndex32(0, -1)!=0 ||
145 s.moveIndex32(6, 1)!=6
146 ) {
147 errln("UnicodeString::moveIndex32() failed");
148 }
149
150 if(s.getChar32Start(1)!=0 || s.getChar32Start(2)!=2) {
151 errln("UnicodeString::getChar32Start() failed");
152 }
153
154 if(s.getChar32Limit(1)!=2 || s.getChar32Limit(2)!=2) {
155 errln("UnicodeString::getChar32Limit() failed");
156 }
157 }
158
159 {
160 // test new 2.2 constructors and setTo function that parallel Java's substring function.
161 UnicodeString src("Hello folks how are you?");
162 UnicodeString target1("how are you?");
163 if (target1 != UnicodeString(src, 12)) {
164 errln("UnicodeString(const UnicodeString&, int32_t) failed");
165 }
166 UnicodeString target2("folks");
167 if (target2 != UnicodeString(src, 6, 5)) {
168 errln("UnicodeString(const UnicodeString&, int32_t, int32_t) failed");
169 }
170 if (target1 != target2.setTo(src, 12)) {
171 errln("UnicodeString::setTo(const UnicodeString&, int32_t) failed");
172 }
173 }
174
175 {
176 // op+ is new in ICU 2.8
177 UnicodeString s=UnicodeString("abc", "")+UnicodeString("def", "")+UnicodeString("ghi", "");
178 if(s!=UnicodeString("abcdefghi", "")) {
179 errln("operator+(UniStr, UniStr) failed");
180 }
181 }
182
183 {
184 // tests for Jitterbug 2360
185 // verify that APIs with source pointer + length accept length == -1
186 // mostly test only where modified, only few functions did not already do this
187 if(UnicodeString("abc", -1, "")!=UnicodeString("abc", "")) {
188 errln("UnicodeString(codepageData, dataLength, codepage) does not work with dataLength==-1");
189 }
190
191 UChar buffer[10]={ 0x61, 0x62, 0x20ac, 0xd900, 0xdc05, 0, 0x62, 0xffff, 0xdbff, 0xdfff };
192 UnicodeString s, t(buffer, -1, UPRV_LENGTHOF(buffer));
193
194 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=u_strlen(buffer)) {
195 errln("UnicodeString.setTo(buffer, length, capacity) does not work with length==-1");
196 }
197 if(t.length()!=u_strlen(buffer)) {
198 errln("UnicodeString(buffer, length, capacity) does not work with length==-1");
199 }
200
201 if(0!=s.caseCompare(buffer, -1, U_FOLD_CASE_DEFAULT)) {
202 errln("UnicodeString.caseCompare(const UChar *, length, options) does not work with length==-1");
203 }
204 if(0!=s.caseCompare(0, s.length(), buffer, U_FOLD_CASE_DEFAULT)) {
205 errln("UnicodeString.caseCompare(start, _length, const UChar *, options) does not work");
206 }
207
208 buffer[u_strlen(buffer)]=0xe4;
209 UnicodeString u(buffer, -1, UPRV_LENGTHOF(buffer));
210 if(s.setTo(buffer, -1, UPRV_LENGTHOF(buffer)).length()!=UPRV_LENGTHOF(buffer)) {
211 errln("UnicodeString.setTo(buffer without NUL, length, capacity) does not work with length==-1");
212 }
213 if(u.length()!=UPRV_LENGTHOF(buffer)) {
214 errln("UnicodeString(buffer without NUL, length, capacity) does not work with length==-1");
215 }
216
217 static const char cs[]={ 0x61, (char)0xe4, (char)0x85, 0 };
218 UConverter *cnv;
219 UErrorCode errorCode=U_ZERO_ERROR;
220
221 cnv=ucnv_open("ISO-8859-1", &errorCode);
222 UnicodeString v(cs, -1, cnv, errorCode);
223 ucnv_close(cnv);
224 if(v!=CharsToUnicodeString("a\\xe4\\x85")) {
225 errln("UnicodeString(const char *, length, cnv, errorCode) does not work with length==-1");
226 }
227 }
228
229 #if U_CHARSET_IS_UTF8
230 {
231 // Test the hardcoded-UTF-8 UnicodeString optimizations.
232 static const uint8_t utf8[]={ 0x61, 0xC3, 0xA4, 0xC3, 0x9F, 0xE4, 0xB8, 0x80, 0 };
233 static const UChar utf16[]={ 0x61, 0xE4, 0xDF, 0x4E00 };
234 UnicodeString from8a = UnicodeString((const char *)utf8);
235 UnicodeString from8b = UnicodeString((const char *)utf8, (int32_t)sizeof(utf8)-1);
236 UnicodeString from16(FALSE, utf16, UPRV_LENGTHOF(utf16));
237 if(from8a != from16 || from8b != from16) {
238 errln("UnicodeString(const char * U_CHARSET_IS_UTF8) failed");
239 }
240 char buffer[16];
241 int32_t length8=from16.extract(0, 0x7fffffff, buffer, (uint32_t)sizeof(buffer));
242 if(length8!=((int32_t)sizeof(utf8)-1) || 0!=uprv_memcmp(buffer, utf8, sizeof(utf8))) {
243 errln("UnicodeString::extract(char * U_CHARSET_IS_UTF8) failed");
244 }
245 length8=from16.extract(1, 2, buffer, (uint32_t)sizeof(buffer));
246 if(length8!=4 || buffer[length8]!=0 || 0!=uprv_memcmp(buffer, utf8+1, length8)) {
247 errln("UnicodeString::extract(substring to char * U_CHARSET_IS_UTF8) failed");
248 }
249 }
250 #endif
251 }
252
253 void
TestCompare()254 UnicodeStringTest::TestCompare()
255 {
256 UnicodeString test1("this is a test");
257 UnicodeString test2("this is a test");
258 UnicodeString test3("this is a test of the emergency broadcast system");
259 UnicodeString test4("never say, \"this is a test\"!!");
260
261 UnicodeString test5((UChar)0x5000);
262 UnicodeString test6((UChar)0x5100);
263
264 UChar uniChars[] = { 0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73,
265 0x20, 0x61, 0x20, 0x74, 0x65, 0x73, 0x74, 0 };
266 char chars[] = "this is a test";
267
268 // test operator== and operator!=
269 if (test1 != test2 || test1 == test3 || test1 == test4)
270 errln("operator== or operator!= failed");
271
272 // test operator> and operator<
273 if (test1 > test2 || test1 < test2 || !(test1 < test3) || !(test1 > test4) ||
274 !(test5 < test6)
275 ) {
276 errln("operator> or operator< failed");
277 }
278
279 // test operator>= and operator<=
280 if (!(test1 >= test2) || !(test1 <= test2) || !(test1 <= test3) || !(test1 >= test4))
281 errln("operator>= or operator<= failed");
282
283 // test compare(UnicodeString)
284 if (test1.compare(test2) != 0 || test1.compare(test3) >= 0 || test1.compare(test4) <= 0)
285 errln("compare(UnicodeString) failed");
286
287 //test compare(offset, length, UnicodeString)
288 if(test1.compare(0, 14, test2) != 0 ||
289 test3.compare(0, 14, test2) != 0 ||
290 test4.compare(12, 14, test2) != 0 ||
291 test3.compare(0, 18, test1) <=0 )
292 errln("compare(offset, length, UnicodeString) failes");
293
294 // test compare(UChar*)
295 if (test2.compare(uniChars) != 0 || test3.compare(uniChars) <= 0 || test4.compare(uniChars) >= 0)
296 errln("compare(UChar*) failed");
297
298 // test compare(char*)
299 if (test2.compare(chars) != 0 || test3.compare(chars) <= 0 || test4.compare(chars) >= 0)
300 errln("compare(char*) failed");
301
302 // test compare(UChar*, length)
303 if (test1.compare(uniChars, 4) <= 0 || test1.compare(uniChars, 4) <= 0)
304 errln("compare(UChar*, length) failed");
305
306 // test compare(thisOffset, thisLength, that, thatOffset, thatLength)
307 if (test1.compare(0, 14, test2, 0, 14) != 0
308 || test1.compare(0, 14, test3, 0, 14) != 0
309 || test1.compare(0, 14, test4, 12, 14) != 0)
310 errln("1. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
311
312 if (test1.compare(10, 4, test2, 0, 4) >= 0
313 || test1.compare(10, 4, test3, 22, 9) <= 0
314 || test1.compare(10, 4, test4, 22, 4) != 0)
315 errln("2. compare(thisOffset, thisLength, that, thatOffset, thatLength) failed");
316
317 // test compareBetween
318 if (test1.compareBetween(0, 14, test2, 0, 14) != 0 || test1.compareBetween(0, 14, test3, 0, 14) != 0
319 || test1.compareBetween(0, 14, test4, 12, 26) != 0)
320 errln("compareBetween failed");
321
322 if (test1.compareBetween(10, 14, test2, 0, 4) >= 0 || test1.compareBetween(10, 14, test3, 22, 31) <= 0
323 || test1.compareBetween(10, 14, test4, 22, 26) != 0)
324 errln("compareBetween failed");
325
326 // test compare() etc. with strings that share a buffer but are not equal
327 test2=test1; // share the buffer, length() too large for the stackBuffer
328 test2.truncate(1); // change only the length, not the buffer
329 if( test1==test2 || test1<=test2 ||
330 test1.compare(test2)<=0 ||
331 test1.compareCodePointOrder(test2)<=0 ||
332 test1.compareCodePointOrder(0, INT32_MAX, test2)<=0 ||
333 test1.compareCodePointOrder(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
334 test1.compareCodePointOrderBetween(0, INT32_MAX, test2, 0, INT32_MAX)<=0 ||
335 test1.caseCompare(test2, U_FOLD_CASE_DEFAULT)<=0
336 ) {
337 errln("UnicodeStrings that share a buffer but have different lengths compare as equal");
338 }
339
340 /* test compareCodePointOrder() */
341 {
342 /* these strings are in ascending order */
343 static const UChar strings[][4]={
344 { 0x61, 0 }, /* U+0061 */
345 { 0x20ac, 0xd801, 0 }, /* U+20ac U+d801 */
346 { 0x20ac, 0xd800, 0xdc00, 0 }, /* U+20ac U+10000 */
347 { 0xd800, 0 }, /* U+d800 */
348 { 0xd800, 0xff61, 0 }, /* U+d800 U+ff61 */
349 { 0xdfff, 0 }, /* U+dfff */
350 { 0xff61, 0xdfff, 0 }, /* U+ff61 U+dfff */
351 { 0xff61, 0xd800, 0xdc02, 0 }, /* U+ff61 U+10002 */
352 { 0xd800, 0xdc02, 0 }, /* U+10002 */
353 { 0xd84d, 0xdc56, 0 } /* U+23456 */
354 };
355 UnicodeString u[20]; // must be at least as long as strings[]
356 int32_t i;
357
358 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])); ++i) {
359 u[i]=UnicodeString(TRUE, strings[i], -1);
360 }
361
362 for(i=0; i<(int32_t)(sizeof(strings)/sizeof(strings[0])-1); ++i) {
363 if(u[i].compareCodePointOrder(u[i+1])>=0 || u[i].compareCodePointOrder(0, INT32_MAX, u[i+1].getBuffer())>=0) {
364 errln("error: UnicodeString::compareCodePointOrder() fails for string %d and the following one\n", i);
365 }
366 }
367 }
368
369 /* test caseCompare() */
370 {
371 static const UChar
372 _mixed[]= { 0x61, 0x42, 0x131, 0x3a3, 0xdf, 0x130, 0x49, 0xfb03, 0xd93f, 0xdfff, 0 },
373 _otherDefault[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x69, 0x307, 0x69, 0x46, 0x66, 0x49, 0xd93f, 0xdfff, 0 },
374 _otherExcludeSpecialI[]={ 0x41, 0x62, 0x131, 0x3c3, 0x53, 0x73, 0x69, 0x131, 0x66, 0x46, 0x69, 0xd93f, 0xdfff, 0 },
375 _different[]= { 0x41, 0x62, 0x131, 0x3c3, 0x73, 0x53, 0x130, 0x49, 0x46, 0x66, 0x49, 0xd93f, 0xdffd, 0 };
376
377 UnicodeString
378 mixed(TRUE, _mixed, -1),
379 otherDefault(TRUE, _otherDefault, -1),
380 otherExcludeSpecialI(TRUE, _otherExcludeSpecialI, -1),
381 different(TRUE, _different, -1);
382
383 int8_t result;
384
385 /* test caseCompare() */
386 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_DEFAULT);
387 if(result!=0 || 0!=mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_DEFAULT)) {
388 errln("error: mixed.caseCompare(other, default)=%ld instead of 0\n", result);
389 }
390 result=mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
391 if(result!=0) {
392 errln("error: mixed.caseCompare(otherExcludeSpecialI, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=%ld instead of 0\n", result);
393 }
394 result=mixed.caseCompare(otherDefault, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
395 if(result==0 || 0==mixed.caseCompareBetween(0, INT32_MAX, otherDefault, 0, INT32_MAX, U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
396 errln("error: mixed.caseCompare(other, U_FOLD_CASE_EXCLUDE_SPECIAL_I)=0 instead of !=0\n");
397 }
398
399 /* test caseCompare() */
400 result=mixed.caseCompare(different, U_FOLD_CASE_DEFAULT);
401 if(result<=0) {
402 errln("error: mixed.caseCompare(different, default)=%ld instead of positive\n", result);
403 }
404
405 /* test caseCompare() - include the folded sharp s (U+00df) with different lengths */
406 result=mixed.caseCompare(1, 4, different, 1, 5, U_FOLD_CASE_DEFAULT);
407 if(result!=0 || 0!=mixed.caseCompareBetween(1, 5, different, 1, 6, U_FOLD_CASE_DEFAULT)) {
408 errln("error: mixed.caseCompare(mixed, 1, 4, different, 1, 5, default)=%ld instead of 0\n", result);
409 }
410
411 /* test caseCompare() - stop in the middle of the sharp s (U+00df) */
412 result=mixed.caseCompare(1, 4, different, 1, 4, U_FOLD_CASE_DEFAULT);
413 if(result<=0) {
414 errln("error: mixed.caseCompare(1, 4, different, 1, 4, default)=%ld instead of positive\n", result);
415 }
416 }
417
418 // test that srcLength=-1 is handled in functions that
419 // take input const UChar */int32_t srcLength (j785)
420 {
421 static const UChar u[]={ 0x61, 0x308, 0x62, 0 };
422 UnicodeString s=UNICODE_STRING("a\\u0308b", 8).unescape();
423
424 if(s.compare(u, -1)!=0 || s.compare(0, 999, u, 0, -1)!=0) {
425 errln("error UnicodeString::compare(..., const UChar *, srcLength=-1) does not work");
426 }
427
428 if(s.compareCodePointOrder(u, -1)!=0 || s.compareCodePointOrder(0, 999, u, 0, -1)!=0) {
429 errln("error UnicodeString::compareCodePointOrder(..., const UChar *, srcLength=-1, ...) does not work");
430 }
431
432 if(s.caseCompare(u, -1, U_FOLD_CASE_DEFAULT)!=0 || s.caseCompare(0, 999, u, 0, -1, U_FOLD_CASE_DEFAULT)!=0) {
433 errln("error UnicodeString::caseCompare(..., const UChar *, srcLength=-1, ...) does not work");
434 }
435
436 if(s.indexOf(u, 1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0, 999)!=1 || s.indexOf(u+1, -1, 0)!=1) {
437 errln("error UnicodeString::indexOf(const UChar *, srcLength=-1, ...) does not work");
438 }
439
440 if(s.lastIndexOf(u, 1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0, 999)!=1 || s.lastIndexOf(u+1, -1, 0)!=1) {
441 errln("error UnicodeString::lastIndexOf(const UChar *, srcLength=-1, ...) does not work");
442 }
443
444 UnicodeString s2, s3;
445 s2.replace(0, 0, u+1, -1);
446 s3.replace(0, 0, u, 1, -1);
447 if(s.compare(1, 999, s2)!=0 || s2!=s3) {
448 errln("error UnicodeString::replace(..., const UChar *, srcLength=-1, ...) does not work");
449 }
450 }
451 }
452
453 void
TestExtract()454 UnicodeStringTest::TestExtract()
455 {
456 UnicodeString test1("Now is the time for all good men to come to the aid of their country.", "");
457 UnicodeString test2;
458 UChar test3[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
459 char test4[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
460 UnicodeString test5;
461 char test6[13] = {1, 2, 3, 4, 5, 6, 7, 8, 8, 10, 11, 12, 13};
462
463 test1.extract(11, 12, test2);
464 test1.extract(11, 12, test3);
465 if (test1.extract(11, 12, test4) != 12 || test4[12] != 0) {
466 errln("UnicodeString.extract(char *) failed to return the correct size of destination buffer.");
467 }
468
469 // test proper pinning in extractBetween()
470 test1.extractBetween(-3, 7, test5);
471 if(test5!=UNICODE_STRING("Now is ", 7)) {
472 errln("UnicodeString.extractBetween(-3, 7) did not pin properly.");
473 }
474
475 test1.extractBetween(11, 23, test5);
476 if (test1.extract(60, 71, test6) != 9) {
477 errln("UnicodeString.extract() failed to return the correct size of destination buffer for end of buffer.");
478 }
479 if (test1.extract(11, 12, test6) != 12) {
480 errln("UnicodeString.extract() failed to return the correct size of destination buffer.");
481 }
482
483 // convert test4 back to Unicode for comparison
484 UnicodeString test4b(test4, 12);
485
486 if (test1.extract(11, 12, (char *)NULL) != 12) {
487 errln("UnicodeString.extract(NULL) failed to return the correct size of destination buffer.");
488 }
489 if (test1.extract(11, -1, test6) != 0) {
490 errln("UnicodeString.extract(-1) failed to stop reading the string.");
491 }
492
493 for (int32_t i = 0; i < 12; i++) {
494 if (test1.charAt((int32_t)(11 + i)) != test2.charAt(i)) {
495 errln(UnicodeString("extracting into a UnicodeString failed at position ") + i);
496 break;
497 }
498 if (test1.charAt((int32_t)(11 + i)) != test3[i]) {
499 errln(UnicodeString("extracting into an array of UChar failed at position ") + i);
500 break;
501 }
502 if (((char)test1.charAt((int32_t)(11 + i))) != test4b.charAt(i)) {
503 errln(UnicodeString("extracting into an array of char failed at position ") + i);
504 break;
505 }
506 if (test1.charAt((int32_t)(11 + i)) != test5.charAt(i)) {
507 errln(UnicodeString("extracting with extractBetween failed at position ") + i);
508 break;
509 }
510 }
511
512 // test preflighting and overflows with invariant conversion
513 if (test1.extract(0, 10, (char *)NULL, "") != 10) {
514 errln("UnicodeString.extract(0, 10, (char *)NULL, \"\") != 10");
515 }
516
517 test4[2] = (char)0xff;
518 if (test1.extract(0, 10, test4, 2, "") != 10) {
519 errln("UnicodeString.extract(0, 10, test4, 2, \"\") != 10");
520 }
521 if (test4[2] != (char)0xff) {
522 errln("UnicodeString.extract(0, 10, test4, 2, \"\") overwrote test4[2]");
523 }
524
525 {
526 // test new, NUL-terminating extract() function
527 UnicodeString s("terminate", "");
528 UChar dest[20]={
529 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5,
530 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5, 0xa5
531 };
532 UErrorCode errorCode;
533 int32_t length;
534
535 errorCode=U_ZERO_ERROR;
536 length=s.extract((UChar *)NULL, 0, errorCode);
537 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
538 errln("UnicodeString.extract(NULL, 0)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)", length, s.length(), u_errorName(errorCode));
539 }
540
541 errorCode=U_ZERO_ERROR;
542 length=s.extract(dest, s.length()-1, errorCode);
543 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=s.length()) {
544 errln("UnicodeString.extract(dest too short)==%d (%s) expected %d (U_BUFFER_OVERFLOW_ERROR)",
545 length, u_errorName(errorCode), s.length());
546 }
547
548 errorCode=U_ZERO_ERROR;
549 length=s.extract(dest, s.length(), errorCode);
550 if(errorCode!=U_STRING_NOT_TERMINATED_WARNING || length!=s.length()) {
551 errln("UnicodeString.extract(dest just right without NUL)==%d (%s) expected %d (U_STRING_NOT_TERMINATED_WARNING)",
552 length, u_errorName(errorCode), s.length());
553 }
554 if(dest[length-1]!=s[length-1] || dest[length]!=0xa5) {
555 errln("UnicodeString.extract(dest just right without NUL) did not extract the string correctly");
556 }
557
558 errorCode=U_ZERO_ERROR;
559 length=s.extract(dest, s.length()+1, errorCode);
560 if(errorCode!=U_ZERO_ERROR || length!=s.length()) {
561 errln("UnicodeString.extract(dest large enough)==%d (%s) expected %d (U_ZERO_ERROR)",
562 length, u_errorName(errorCode), s.length());
563 }
564 if(dest[length-1]!=s[length-1] || dest[length]!=0 || dest[length+1]!=0xa5) {
565 errln("UnicodeString.extract(dest large enough) did not extract the string correctly");
566 }
567 }
568
569 {
570 // test new UConverter extract() and constructor
571 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
572 char buffer[32];
573 static const char expect[]={
574 (char)0xf0, (char)0xaf, (char)0xa6, (char)0x99,
575 (char)0xf0, (char)0x9d, (char)0x85, (char)0x9f,
576 (char)0xc3, (char)0x84,
577 (char)0xe1, (char)0xbb, (char)0x90
578 };
579 UErrorCode errorCode=U_ZERO_ERROR;
580 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
581 int32_t length;
582
583 if(U_SUCCESS(errorCode)) {
584 // test preflighting
585 if( (length=s.extract(NULL, 0, cnv, errorCode))!=13 ||
586 errorCode!=U_BUFFER_OVERFLOW_ERROR
587 ) {
588 errln("UnicodeString::extract(NULL, UConverter) preflighting failed (length=%ld, %s)",
589 length, u_errorName(errorCode));
590 }
591 errorCode=U_ZERO_ERROR;
592 if( (length=s.extract(buffer, 2, cnv, errorCode))!=13 ||
593 errorCode!=U_BUFFER_OVERFLOW_ERROR
594 ) {
595 errln("UnicodeString::extract(too small, UConverter) preflighting failed (length=%ld, %s)",
596 length, u_errorName(errorCode));
597 }
598
599 // try error cases
600 errorCode=U_ZERO_ERROR;
601 if( s.extract(NULL, 2, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
602 errln("UnicodeString::extract(UConverter) succeeded with an illegal destination");
603 }
604 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
605 if( s.extract(NULL, 0, cnv, errorCode)==13 || U_SUCCESS(errorCode)) {
606 errln("UnicodeString::extract(UConverter) succeeded with a previous error code");
607 }
608 errorCode=U_ZERO_ERROR;
609
610 // extract for real
611 if( (length=s.extract(buffer, sizeof(buffer), cnv, errorCode))!=13 ||
612 uprv_memcmp(buffer, expect, 13)!=0 ||
613 buffer[13]!=0 ||
614 U_FAILURE(errorCode)
615 ) {
616 errln("UnicodeString::extract(UConverter) conversion failed (length=%ld, %s)",
617 length, u_errorName(errorCode));
618 }
619 // Test again with just the converter name.
620 if( (length=s.extract(0, s.length(), buffer, sizeof(buffer), "UTF-8"))!=13 ||
621 uprv_memcmp(buffer, expect, 13)!=0 ||
622 buffer[13]!=0 ||
623 U_FAILURE(errorCode)
624 ) {
625 errln("UnicodeString::extract(\"UTF-8\") conversion failed (length=%ld, %s)",
626 length, u_errorName(errorCode));
627 }
628
629 // try the constructor
630 UnicodeString t(expect, sizeof(expect), cnv, errorCode);
631 if(U_FAILURE(errorCode) || s!=t) {
632 errln("UnicodeString(UConverter) conversion failed (%s)",
633 u_errorName(errorCode));
634 }
635
636 ucnv_close(cnv);
637 }
638 }
639 }
640
641 void
TestRemoveReplace()642 UnicodeStringTest::TestRemoveReplace()
643 {
644 UnicodeString test1("The rain in Spain stays mainly on the plain");
645 UnicodeString test2("eat SPAMburgers!");
646 UChar test3[] = { 0x53, 0x50, 0x41, 0x4d, 0x4d, 0 };
647 char test4[] = "SPAM";
648 UnicodeString& test5 = test1;
649
650 test1.replace(4, 4, test2, 4, 4);
651 test1.replace(12, 5, test3, 4);
652 test3[4] = 0;
653 test1.replace(17, 4, test3);
654 test1.replace(23, 4, test4);
655 test1.replaceBetween(37, 42, test2, 4, 8);
656
657 if (test1 != "The SPAM in SPAM SPAMs SPAMly on the SPAM")
658 errln("One of the replace methods failed:\n"
659 " expected \"The SPAM in SPAM SPAMs SPAMly on the SPAM\",\n"
660 " got \"" + test1 + "\"");
661
662 test1.remove(21, 1);
663 test1.removeBetween(26, 28);
664
665 if (test1 != "The SPAM in SPAM SPAM SPAM on the SPAM")
666 errln("One of the remove methods failed:\n"
667 " expected \"The SPAM in SPAM SPAM SPAM on the SPAM\",\n"
668 " got \"" + test1 + "\"");
669
670 for (int32_t i = 0; i < test1.length(); i++) {
671 if (test5[i] != 0x53 && test5[i] != 0x50 && test5[i] != 0x41 && test5[i] != 0x4d && test5[i] != 0x20) {
672 test1.setCharAt(i, 0x78);
673 }
674 }
675
676 if (test1 != "xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM")
677 errln("One of the remove methods failed:\n"
678 " expected \"xxx SPAM xx SPAM SPAM SPAM xx xxx SPAM\",\n"
679 " got \"" + test1 + "\"");
680
681 test1.remove();
682 if (test1.length() != 0)
683 errln("Remove() failed: expected empty string, got \"" + test1 + "\"");
684 }
685
686 void
TestSearching()687 UnicodeStringTest::TestSearching()
688 {
689 UnicodeString test1("test test ttest tetest testesteststt");
690 UnicodeString test2("test");
691 UChar testChar = 0x74;
692
693 UChar32 testChar32 = 0x20402;
694 UChar testData[]={
695 // 0 1 2 3 4 5 6 7
696 0xd841, 0xdc02, 0x0071, 0xdc02, 0xd841, 0x0071, 0xd841, 0xdc02,
697
698 // 8 9 10 11 12 13 14 15
699 0x0071, 0x0072, 0xd841, 0xdc02, 0x0071, 0xd841, 0xdc02, 0x0071,
700
701 // 16 17 18 19
702 0xdc02, 0xd841, 0x0073, 0x0000
703 };
704 UnicodeString test3(testData);
705 UnicodeString test4(testChar32);
706
707 uint16_t occurrences = 0;
708 int32_t startPos = 0;
709 for ( ;
710 startPos != -1 && startPos < test1.length();
711 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
712 ;
713 if (occurrences != 6)
714 errln(UnicodeString("indexOf failed: expected to find 6 occurrences, found ") + occurrences);
715
716 for ( occurrences = 0, startPos = 10;
717 startPos != -1 && startPos < test1.length();
718 (startPos = test1.indexOf(test2, startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
719 ;
720 if (occurrences != 4)
721 errln(UnicodeString("indexOf with starting offset failed: "
722 "expected to find 4 occurrences, found ") + occurrences);
723
724 int32_t endPos = 28;
725 for ( occurrences = 0, startPos = 5;
726 startPos != -1 && startPos < test1.length();
727 (startPos = test1.indexOf(test2, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 4) : 0)
728 ;
729 if (occurrences != 4)
730 errln(UnicodeString("indexOf with starting and ending offsets failed: "
731 "expected to find 4 occurrences, found ") + occurrences);
732
733 //using UChar32 string
734 for ( startPos=0, occurrences=0;
735 startPos != -1 && startPos < test3.length();
736 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
737 ;
738 if (occurrences != 4)
739 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
740
741 for ( startPos=10, occurrences=0;
742 startPos != -1 && startPos < test3.length();
743 (startPos = test3.indexOf(test4, startPos)) != -1 ? (++occurrences, startPos += 2) : 0)
744 ;
745 if (occurrences != 2)
746 errln(UnicodeString("indexOf failed: expected to find 2 occurrences, found ") + occurrences);
747 //---
748
749 for ( occurrences = 0, startPos = 0;
750 startPos != -1 && startPos < test1.length();
751 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
752 ;
753 if (occurrences != 16)
754 errln(UnicodeString("indexOf with character failed: "
755 "expected to find 16 occurrences, found ") + occurrences);
756
757 for ( occurrences = 0, startPos = 10;
758 startPos != -1 && startPos < test1.length();
759 (startPos = test1.indexOf(testChar, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
760 ;
761 if (occurrences != 12)
762 errln(UnicodeString("indexOf with character & start offset failed: "
763 "expected to find 12 occurrences, found ") + occurrences);
764
765 for ( occurrences = 0, startPos = 5, endPos = 28;
766 startPos != -1 && startPos < test1.length();
767 (startPos = test1.indexOf(testChar, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
768 ;
769 if (occurrences != 10)
770 errln(UnicodeString("indexOf with character & start & end offsets failed: "
771 "expected to find 10 occurrences, found ") + occurrences);
772
773 //testing for UChar32
774 UnicodeString subString;
775 for( occurrences =0, startPos=0; startPos < test3.length(); startPos +=1){
776 subString.append(test3, startPos, test3.length());
777 if(subString.indexOf(testChar32) != -1 ){
778 ++occurrences;
779 }
780 subString.remove();
781 }
782 if (occurrences != 14)
783 errln((UnicodeString)"indexOf failed: expected to find 14 occurrences, found " + occurrences);
784
785 for ( occurrences = 0, startPos = 0;
786 startPos != -1 && startPos < test3.length();
787 (startPos = test3.indexOf(testChar32, startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
788 ;
789 if (occurrences != 4)
790 errln((UnicodeString)"indexOf failed: expected to find 4 occurrences, found " + occurrences);
791
792 endPos=test3.length();
793 for ( occurrences = 0, startPos = 5;
794 startPos != -1 && startPos < test3.length();
795 (startPos = test3.indexOf(testChar32, startPos, endPos - startPos)) != -1 ? (++occurrences, startPos += 1) : 0)
796 ;
797 if (occurrences != 3)
798 errln((UnicodeString)"indexOf with character & start & end offsets failed: expected to find 2 occurrences, found " + occurrences);
799 //---
800
801 if(test1.lastIndexOf(test2)!=29) {
802 errln("test1.lastIndexOf(test2)!=29");
803 }
804
805 if(test1.lastIndexOf(test2, 15)!=29 || test1.lastIndexOf(test2, 29)!=29 || test1.lastIndexOf(test2, 30)!=-1) {
806 errln("test1.lastIndexOf(test2, start) failed");
807 }
808
809 for ( occurrences = 0, startPos = 32;
810 startPos != -1;
811 (startPos = test1.lastIndexOf(test2, 5, startPos - 5)) != -1 ? ++occurrences : 0)
812 ;
813 if (occurrences != 4)
814 errln(UnicodeString("lastIndexOf with starting and ending offsets failed: "
815 "expected to find 4 occurrences, found ") + occurrences);
816
817 for ( occurrences = 0, startPos = 32;
818 startPos != -1;
819 (startPos = test1.lastIndexOf(testChar, 5, startPos - 5)) != -1 ? ++occurrences : 0)
820 ;
821 if (occurrences != 11)
822 errln(UnicodeString("lastIndexOf with character & start & end offsets failed: "
823 "expected to find 11 occurrences, found ") + occurrences);
824
825 //testing UChar32
826 startPos=test3.length();
827 for ( occurrences = 0;
828 startPos != -1;
829 (startPos = test3.lastIndexOf(testChar32, 5, startPos - 5)) != -1 ? ++occurrences : 0)
830 ;
831 if (occurrences != 3)
832 errln((UnicodeString)"lastIndexOf with character & start & end offsets failed: expected to find 3 occurrences, found " + occurrences);
833
834
835 for ( occurrences = 0, endPos = test3.length(); endPos > 0; endPos -= 1){
836 subString.remove();
837 subString.append(test3, 0, endPos);
838 if(subString.lastIndexOf(testChar32) != -1 ){
839 ++occurrences;
840 }
841 }
842 if (occurrences != 18)
843 errln((UnicodeString)"indexOf failed: expected to find 18 occurrences, found " + occurrences);
844 //---
845
846 // test that indexOf(UChar32) and lastIndexOf(UChar32)
847 // do not find surrogate code points when they are part of matched pairs
848 // (= part of supplementary code points)
849 // Jitterbug 1542
850 if(test3.indexOf((UChar32)0xd841) != 4 || test3.indexOf((UChar32)0xdc02) != 3) {
851 errln("error: UnicodeString::indexOf(UChar32 surrogate) finds a partial supplementary code point");
852 }
853 if( UnicodeString(test3, 0, 17).lastIndexOf((UChar)0xd841, 0) != 4 ||
854 UnicodeString(test3, 0, 17).lastIndexOf((UChar32)0xd841, 2) != 4 ||
855 test3.lastIndexOf((UChar32)0xd841, 0, 17) != 4 || test3.lastIndexOf((UChar32)0xdc02, 0, 17) != 16
856 ) {
857 errln("error: UnicodeString::lastIndexOf(UChar32 surrogate) finds a partial supplementary code point");
858 }
859 }
860
861 void
TestSpacePadding()862 UnicodeStringTest::TestSpacePadding()
863 {
864 UnicodeString test1("hello");
865 UnicodeString test2(" there");
866 UnicodeString test3("Hi! How ya doin'? Beautiful day, isn't it?");
867 UnicodeString test4;
868 UBool returnVal;
869 UnicodeString expectedValue;
870
871 returnVal = test1.padLeading(15);
872 expectedValue = " hello";
873 if (returnVal == FALSE || test1 != expectedValue)
874 errln("padLeading() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
875
876 returnVal = test2.padTrailing(15);
877 expectedValue = " there ";
878 if (returnVal == FALSE || test2 != expectedValue)
879 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
880
881 expectedValue = test3;
882 returnVal = test3.padTrailing(15);
883 if (returnVal == TRUE || test3 != expectedValue)
884 errln("padTrailing() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
885
886 expectedValue = "hello";
887 test4.setTo(test1).trim();
888
889 if (test4 != expectedValue || test1 == expectedValue || test4 != expectedValue)
890 errln("trim(UnicodeString&) failed");
891
892 test1.trim();
893 if (test1 != expectedValue)
894 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
895
896 test2.trim();
897 expectedValue = "there";
898 if (test2 != expectedValue)
899 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
900
901 test3.trim();
902 expectedValue = "Hi! How ya doin'? Beautiful day, isn't it?";
903 if (test3 != expectedValue)
904 errln("trim() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
905
906 returnVal = test1.truncate(15);
907 expectedValue = "hello";
908 if (returnVal == TRUE || test1 != expectedValue)
909 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
910
911 returnVal = test2.truncate(15);
912 expectedValue = "there";
913 if (returnVal == TRUE || test2 != expectedValue)
914 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test2 + "\".");
915
916 returnVal = test3.truncate(15);
917 expectedValue = "Hi! How ya doi";
918 if (returnVal == FALSE || test3 != expectedValue)
919 errln("truncate() failed: expected \"" + expectedValue + "\", got \"" + test3 + "\".");
920 }
921
922 void
TestPrefixAndSuffix()923 UnicodeStringTest::TestPrefixAndSuffix()
924 {
925 UnicodeString test1("Now is the time for all good men to come to the aid of their country.");
926 UnicodeString test2("Now");
927 UnicodeString test3("country.");
928 UnicodeString test4("count");
929
930 if (!test1.startsWith(test2) || !test1.startsWith(test2, 0, test2.length())) {
931 errln("startsWith() failed: \"" + test2 + "\" should be a prefix of \"" + test1 + "\".");
932 }
933
934 if (test1.startsWith(test3) ||
935 test1.startsWith(test3.getBuffer(), test3.length()) ||
936 test1.startsWith(test3.getTerminatedBuffer(), 0, -1)
937 ) {
938 errln("startsWith() failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test1 + "\".");
939 }
940
941 if (test1.endsWith(test2)) {
942 errln("endsWith() failed: \"" + test2 + "\" shouldn't be a suffix of \"" + test1 + "\".");
943 }
944
945 if (!test1.endsWith(test3)) {
946 errln("endsWith(test3) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
947 }
948 if (!test1.endsWith(test3, 0, INT32_MAX)) {
949 errln("endsWith(test3, 0, INT32_MAX) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
950 }
951
952 if(!test1.endsWith(test3.getBuffer(), test3.length())) {
953 errln("endsWith(test3.getBuffer(), test3.length()) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
954 }
955 if(!test1.endsWith(test3.getTerminatedBuffer(), 0, -1)) {
956 errln("endsWith(test3.getTerminatedBuffer(), 0, -1) failed: \"" + test3 + "\" should be a suffix of \"" + test1 + "\".");
957 }
958
959 if (!test3.startsWith(test4)) {
960 errln("endsWith(test4) failed: \"" + test4 + "\" should be a prefix of \"" + test3 + "\".");
961 }
962
963 if (test4.startsWith(test3)) {
964 errln("startsWith(test3) failed: \"" + test3 + "\" shouldn't be a prefix of \"" + test4 + "\".");
965 }
966 }
967
968 void
TestStartsWithAndEndsWithNulTerminated()969 UnicodeStringTest::TestStartsWithAndEndsWithNulTerminated() {
970 UnicodeString test("abcde");
971 const UChar ab[] = { 0x61, 0x62, 0 };
972 const UChar de[] = { 0x64, 0x65, 0 };
973 assertTrue("abcde.startsWith(ab, -1)", test.startsWith(ab, -1));
974 assertTrue("abcde.startsWith(ab, 0, -1)", test.startsWith(ab, 0, -1));
975 assertTrue("abcde.endsWith(de, -1)", test.endsWith(de, -1));
976 assertTrue("abcde.endsWith(de, 0, -1)", test.endsWith(de, 0, -1));
977 }
978
979 void
TestFindAndReplace()980 UnicodeStringTest::TestFindAndReplace()
981 {
982 UnicodeString test1("One potato, two potato, three potato, four\n");
983 UnicodeString test2("potato");
984 UnicodeString test3("MISSISSIPPI");
985
986 UnicodeString expectedValue;
987
988 test1.findAndReplace(test2, test3);
989 expectedValue = "One MISSISSIPPI, two MISSISSIPPI, three MISSISSIPPI, four\n";
990 if (test1 != expectedValue)
991 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
992 test1.findAndReplace(2, 32, test3, test2);
993 expectedValue = "One potato, two potato, three MISSISSIPPI, four\n";
994 if (test1 != expectedValue)
995 errln("findAndReplace failed: expected \"" + expectedValue + "\", got \"" + test1 + "\".");
996 }
997
998 void
TestReverse()999 UnicodeStringTest::TestReverse()
1000 {
1001 UnicodeString test("backwards words say to used I");
1002
1003 test.reverse();
1004 test.reverse(2, 4);
1005 test.reverse(7, 2);
1006 test.reverse(10, 3);
1007 test.reverse(14, 5);
1008 test.reverse(20, 9);
1009
1010 if (test != "I used to say words backwards")
1011 errln("reverse() failed: Expected \"I used to say words backwards\",\n got \""
1012 + test + "\"");
1013
1014 test=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1015 test.reverse();
1016 if(test.char32At(0)!=0x1ed0 || test.char32At(1)!=0xc4 || test.char32At(2)!=0x1d15f || test.char32At(4)!=0x2f999) {
1017 errln("reverse() failed with supplementary characters");
1018 }
1019
1020 // Test case for ticket #8091:
1021 // UnicodeString::reverse() failed to see a lead surrogate in the middle of
1022 // an odd-length string that contains no other lead surrogates.
1023 test=UNICODE_STRING_SIMPLE("ab\\U0001F4A9e").unescape();
1024 UnicodeString expected=UNICODE_STRING_SIMPLE("e\\U0001F4A9ba").unescape();
1025 test.reverse();
1026 if(test!=expected) {
1027 errln("reverse() failed with only lead surrogate in the middle");
1028 }
1029 }
1030
1031 void
TestMiscellaneous()1032 UnicodeStringTest::TestMiscellaneous()
1033 {
1034 UnicodeString test1("This is a test");
1035 UnicodeString test2("This is a test");
1036 UnicodeString test3("Me too!");
1037
1038 // test getBuffer(minCapacity) and releaseBuffer()
1039 test1=UnicodeString(); // make sure that it starts with its stackBuffer
1040 UChar *p=test1.getBuffer(20);
1041 if(test1.getCapacity()<20) {
1042 errln("UnicodeString::getBuffer(20).getCapacity()<20");
1043 }
1044
1045 test1.append((UChar)7); // must not be able to modify the string here
1046 test1.setCharAt(3, 7);
1047 test1.reverse();
1048 if( test1.length()!=0 ||
1049 test1.charAt(0)!=0xffff || test1.charAt(3)!=0xffff ||
1050 test1.getBuffer(10)!=0 || test1.getBuffer()!=0
1051 ) {
1052 errln("UnicodeString::getBuffer(minCapacity) allows read or write access to the UnicodeString");
1053 }
1054
1055 p[0]=1;
1056 p[1]=2;
1057 p[2]=3;
1058 test1.releaseBuffer(3);
1059 test1.append((UChar)4);
1060
1061 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1062 errln("UnicodeString::releaseBuffer(newLength) does not properly reallow access to the UnicodeString");
1063 }
1064
1065 // test releaseBuffer() without getBuffer(minCapacity) - must not have any effect
1066 test1.releaseBuffer(1);
1067 if(test1.length()!=4 || test1.charAt(0)!=1 || test1.charAt(1)!=2 || test1.charAt(2)!=3 || test1.charAt(3)!=4) {
1068 errln("UnicodeString::releaseBuffer(newLength) without getBuffer(minCapacity) changed the UnicodeString");
1069 }
1070
1071 // test getBuffer(const)
1072 const UChar *q=test1.getBuffer(), *r=test1.getBuffer();
1073 if( test1.length()!=4 ||
1074 q[0]!=1 || q[1]!=2 || q[2]!=3 || q[3]!=4 ||
1075 r[0]!=1 || r[1]!=2 || r[2]!=3 || r[3]!=4
1076 ) {
1077 errln("UnicodeString::getBuffer(const) does not return a usable buffer pointer");
1078 }
1079
1080 // test releaseBuffer() with a NUL-terminated buffer
1081 test1.getBuffer(20)[2]=0;
1082 test1.releaseBuffer(); // implicit -1
1083 if(test1.length()!=2 || test1.charAt(0)!=1 || test1.charAt(1) !=2) {
1084 errln("UnicodeString::releaseBuffer(-1) does not properly set the length of the UnicodeString");
1085 }
1086
1087 // test releaseBuffer() with a non-NUL-terminated buffer
1088 p=test1.getBuffer(256);
1089 for(int32_t i=0; i<test1.getCapacity(); ++i) {
1090 p[i]=(UChar)1; // fill the buffer with all non-NUL code units
1091 }
1092 test1.releaseBuffer(); // implicit -1
1093 if(test1.length()!=test1.getCapacity() || test1.charAt(1)!=1 || test1.charAt(100)!=1 || test1.charAt(test1.getCapacity()-1)!=1) {
1094 errln("UnicodeString::releaseBuffer(-1 but no NUL) does not properly set the length of the UnicodeString");
1095 }
1096
1097 // test getTerminatedBuffer()
1098 test1=UnicodeString("This is another test.", "");
1099 test2=UnicodeString("This is another test.", "");
1100 q=test1.getTerminatedBuffer();
1101 if(q[test1.length()]!=0 || test1!=test2 || test2.compare(q, -1)!=0) {
1102 errln("getTerminatedBuffer()[length]!=0");
1103 }
1104
1105 const UChar u[]={ 5, 6, 7, 8, 0 };
1106 test1.setTo(FALSE, u, 3);
1107 q=test1.getTerminatedBuffer();
1108 if(q==u || q[0]!=5 || q[1]!=6 || q[2]!=7 || q[3]!=0) {
1109 errln("UnicodeString(u[3]).getTerminatedBuffer() returns a bad buffer");
1110 }
1111
1112 test1.setTo(TRUE, u, -1);
1113 q=test1.getTerminatedBuffer();
1114 if(q!=u || test1.length()!=4 || q[3]!=8 || q[4]!=0) {
1115 errln("UnicodeString(u[-1]).getTerminatedBuffer() returns a bad buffer");
1116 }
1117
1118 test1=UNICODE_STRING("la", 2);
1119 test1.append(UNICODE_STRING(" lila", 5).getTerminatedBuffer(), 0, -1);
1120 if(test1!=UNICODE_STRING("la lila", 7)) {
1121 errln("UnicodeString::append(const UChar *, start, length) failed");
1122 }
1123
1124 test1.insert(3, UNICODE_STRING("dudum ", 6), 0, INT32_MAX);
1125 if(test1!=UNICODE_STRING("la dudum lila", 13)) {
1126 errln("UnicodeString::insert(start, const UniStr &, start, length) failed");
1127 }
1128
1129 static const UChar ucs[]={ 0x68, 0x6d, 0x20, 0 };
1130 test1.insert(9, ucs, -1);
1131 if(test1!=UNICODE_STRING("la dudum hm lila", 16)) {
1132 errln("UnicodeString::insert(start, const UChar *, length) failed");
1133 }
1134
1135 test1.replace(9, 2, (UChar)0x2b);
1136 if(test1!=UNICODE_STRING("la dudum + lila", 15)) {
1137 errln("UnicodeString::replace(start, length, UChar) failed");
1138 }
1139
1140 if(test1.hasMetaData() || UnicodeString().hasMetaData()) {
1141 errln("UnicodeString::hasMetaData() returns TRUE");
1142 }
1143
1144 // test getTerminatedBuffer() on a truncated, shared, heap-allocated string
1145 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1146 test1.truncate(36); // ensure length()<getCapacity()
1147 test2=test1; // share the buffer
1148 test1.truncate(5);
1149 if(test1.length()!=5 || test1.getTerminatedBuffer()[5]!=0) {
1150 errln("UnicodeString(shared buffer).truncate() failed");
1151 }
1152 if(test2.length()!=36 || test2[5]!=0x66 || u_strlen(test2.getTerminatedBuffer())!=36) {
1153 errln("UnicodeString(shared buffer).truncate().getTerminatedBuffer() "
1154 "modified another copy of the string!");
1155 }
1156 test1=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789.");
1157 test1.truncate(36); // ensure length()<getCapacity()
1158 test2=test1; // share the buffer
1159 test1.remove();
1160 if(test1.length()!=0 || test1.getTerminatedBuffer()[0]!=0) {
1161 errln("UnicodeString(shared buffer).remove() failed");
1162 }
1163 if(test2.length()!=36 || test2[0]!=0x61 || u_strlen(test2.getTerminatedBuffer())!=36) {
1164 errln("UnicodeString(shared buffer).remove().getTerminatedBuffer() "
1165 "modified another copy of the string!");
1166 }
1167
1168 // ticket #9740
1169 test1.setTo(TRUE, ucs, 3);
1170 assertEquals("length of read-only alias", 3, test1.length());
1171 test1.trim();
1172 assertEquals("length of read-only alias after trim()", 2, test1.length());
1173 assertEquals("length of terminated buffer of read-only alias + trim()",
1174 2, u_strlen(test1.getTerminatedBuffer()));
1175 }
1176
1177 void
TestStackAllocation()1178 UnicodeStringTest::TestStackAllocation()
1179 {
1180 UChar testString[] ={
1181 0x54, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x61, 0x20, 0x63, 0x72, 0x61, 0x7a, 0x79, 0x20, 0x74, 0x65, 0x73, 0x74, 0x2e, 0 };
1182 UChar guardWord = 0x4DED;
1183 UnicodeString* test = 0;
1184
1185 test = new UnicodeString(testString);
1186 if (*test != "This is a crazy test.")
1187 errln("Test string failed to initialize properly.");
1188 if (guardWord != 0x04DED)
1189 errln("Test string initialization overwrote guard word!");
1190
1191 test->insert(8, "only ");
1192 test->remove(15, 6);
1193 if (*test != "This is only a test.")
1194 errln("Manipulation of test string failed to work right.");
1195 if (guardWord != 0x4DED)
1196 errln("Manipulation of test string overwrote guard word!");
1197
1198 // we have to deinitialize and release the backing store by calling the destructor
1199 // explicitly, since we can't overload operator delete
1200 delete test;
1201
1202 UChar workingBuffer[] = {
1203 0x4e, 0x6f, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x69, 0x6d, 0x65, 0x20,
1204 0x66, 0x6f, 0x72, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x6d, 0x65, 0x6e, 0x20, 0x74, 0x6f, 0x20,
1205 0x63, 0x6f, 0x6d, 0x65, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1208 UChar guardWord2 = 0x4DED;
1209
1210 test = new UnicodeString(workingBuffer, 35, 100);
1211 if (*test != "Now is the time for all men to come")
1212 errln("Stack-allocated backing store failed to initialize correctly.");
1213 if (guardWord2 != 0x4DED)
1214 errln("Stack-allocated backing store overwrote guard word!");
1215
1216 test->insert(24, "good ");
1217 if (*test != "Now is the time for all good men to come")
1218 errln("insert() on stack-allocated UnicodeString didn't work right");
1219 if (guardWord2 != 0x4DED)
1220 errln("insert() on stack-allocated UnicodeString overwrote guard word!");
1221
1222 if (workingBuffer[24] != 0x67)
1223 errln("insert() on stack-allocated UnicodeString didn't affect backing store");
1224
1225 *test += " to the aid of their country.";
1226 if (*test != "Now is the time for all good men to come to the aid of their country.")
1227 errln("Stack-allocated UnicodeString overflow didn't work");
1228 if (guardWord2 != 0x4DED)
1229 errln("Stack-allocated UnicodeString overflow overwrote guard word!");
1230
1231 *test = "ha!";
1232 if (*test != "ha!")
1233 errln("Assignment to stack-allocated UnicodeString didn't work");
1234 if (workingBuffer[0] != 0x4e)
1235 errln("Change to UnicodeString after overflow are still affecting original buffer");
1236 if (guardWord2 != 0x4DED)
1237 errln("Change to UnicodeString after overflow overwrote guard word!");
1238
1239 // test read-only aliasing with setTo()
1240 workingBuffer[0] = 0x20ac;
1241 workingBuffer[1] = 0x125;
1242 workingBuffer[2] = 0;
1243 test->setTo(TRUE, workingBuffer, 2);
1244 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x125) {
1245 errln("UnicodeString.setTo(readonly alias) does not alias correctly");
1246 }
1247
1248 UnicodeString *c=(UnicodeString *)test->clone();
1249
1250 workingBuffer[1] = 0x109;
1251 if(test->charAt(1) != 0x109) {
1252 errln("UnicodeString.setTo(readonly alias) made a copy: did not see change in buffer");
1253 }
1254
1255 if(c->length() != 2 || c->charAt(1) != 0x125) {
1256 errln("clone(alias) did not copy the buffer");
1257 }
1258 delete c;
1259
1260 test->setTo(TRUE, workingBuffer, -1);
1261 if(test->length() != 2 || test->charAt(0) != 0x20ac || test->charAt(1) != 0x109) {
1262 errln("UnicodeString.setTo(readonly alias, length -1) does not alias correctly");
1263 }
1264
1265 test->setTo(FALSE, workingBuffer, -1);
1266 if(!test->isBogus()) {
1267 errln("UnicodeString.setTo(unterminated readonly alias, length -1) does not result in isBogus()");
1268 }
1269
1270 delete test;
1271
1272 test=new UnicodeString();
1273 UChar buffer[]={0x0061, 0x0062, 0x20ac, 0x0043, 0x0042, 0x0000};
1274 test->setTo(buffer, 4, 10);
1275 if(test->length() !=4 || test->charAt(0) != 0x0061 || test->charAt(1) != 0x0062 ||
1276 test->charAt(2) != 0x20ac || test->charAt(3) != 0x0043){
1277 errln((UnicodeString)"UnicodeString.setTo(UChar*, length, capacity) does not work correctly\n" + prettify(*test));
1278 }
1279 delete test;
1280
1281
1282 // test the UChar32 constructor
1283 UnicodeString c32Test((UChar32)0x10ff2a);
1284 if( c32Test.length() != U16_LENGTH(0x10ff2a) ||
1285 c32Test.char32At(c32Test.length() - 1) != 0x10ff2a
1286 ) {
1287 errln("The UnicodeString(UChar32) constructor does not work with a 0x10ff2a filler");
1288 }
1289
1290 // test the (new) capacity constructor
1291 UnicodeString capTest(5, (UChar32)0x2a, 5);
1292 if( capTest.length() != 5 * U16_LENGTH(0x2a) ||
1293 capTest.char32At(0) != 0x2a ||
1294 capTest.char32At(4) != 0x2a
1295 ) {
1296 errln("The UnicodeString capacity constructor does not work with an ASCII filler");
1297 }
1298
1299 capTest = UnicodeString(5, (UChar32)0x10ff2a, 5);
1300 if( capTest.length() != 5 * U16_LENGTH(0x10ff2a) ||
1301 capTest.char32At(0) != 0x10ff2a ||
1302 capTest.char32At(4) != 0x10ff2a
1303 ) {
1304 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1305 }
1306
1307 capTest = UnicodeString(5, (UChar32)0, 0);
1308 if(capTest.length() != 0) {
1309 errln("The UnicodeString capacity constructor does not work with a 0x10ff2a filler");
1310 }
1311 }
1312
1313 /**
1314 * Test the unescape() function.
1315 */
TestUnescape(void)1316 void UnicodeStringTest::TestUnescape(void) {
1317 UnicodeString IN("abc\\u4567 \\n\\r \\U00101234xyz\\x1\\x{5289}\\x1b", -1, US_INV);
1318 UnicodeString OUT("abc");
1319 OUT.append((UChar)0x4567);
1320 OUT.append(" ");
1321 OUT.append((UChar)0xA);
1322 OUT.append((UChar)0xD);
1323 OUT.append(" ");
1324 OUT.append((UChar32)0x00101234);
1325 OUT.append("xyz");
1326 OUT.append((UChar32)1).append((UChar32)0x5289).append((UChar)0x1b);
1327 UnicodeString result = IN.unescape();
1328 if (result != OUT) {
1329 errln("FAIL: " + prettify(IN) + ".unescape() -> " +
1330 prettify(result) + ", expected " +
1331 prettify(OUT));
1332 }
1333
1334 // test that an empty string is returned in case of an error
1335 if (!UNICODE_STRING("wrong \\u sequence", 17).unescape().isEmpty()) {
1336 errln("FAIL: unescaping of a string with an illegal escape sequence did not return an empty string");
1337 }
1338 }
1339
1340 /* test code point counting functions --------------------------------------- */
1341
1342 /* reference implementation of UnicodeString::hasMoreChar32Than() */
1343 static int32_t
_refUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1344 _refUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1345 int32_t count=s.countChar32(start, length);
1346 return count>number;
1347 }
1348
1349 /* compare the real function against the reference */
1350 void
_testUnicodeStringHasMoreChar32Than(const UnicodeString & s,int32_t start,int32_t length,int32_t number)1351 UnicodeStringTest::_testUnicodeStringHasMoreChar32Than(const UnicodeString &s, int32_t start, int32_t length, int32_t number) {
1352 if(s.hasMoreChar32Than(start, length, number)!=_refUnicodeStringHasMoreChar32Than(s, start, length, number)) {
1353 errln("hasMoreChar32Than(%d, %d, %d)=%hd is wrong\n",
1354 start, length, number, s.hasMoreChar32Than(start, length, number));
1355 }
1356 }
1357
1358 void
TestCountChar32(void)1359 UnicodeStringTest::TestCountChar32(void) {
1360 {
1361 UnicodeString s=UNICODE_STRING("\\U0002f999\\U0001d15f\\u00c4\\u1ed0", 32).unescape();
1362
1363 // test countChar32()
1364 // note that this also calls and tests u_countChar32(length>=0)
1365 if(
1366 s.countChar32()!=4 ||
1367 s.countChar32(1)!=4 ||
1368 s.countChar32(2)!=3 ||
1369 s.countChar32(2, 3)!=2 ||
1370 s.countChar32(2, 0)!=0
1371 ) {
1372 errln("UnicodeString::countChar32() failed");
1373 }
1374
1375 // NUL-terminate the string buffer and test u_countChar32(length=-1)
1376 const UChar *buffer=s.getTerminatedBuffer();
1377 if(
1378 u_countChar32(buffer, -1)!=4 ||
1379 u_countChar32(buffer+1, -1)!=4 ||
1380 u_countChar32(buffer+2, -1)!=3 ||
1381 u_countChar32(buffer+3, -1)!=3 ||
1382 u_countChar32(buffer+4, -1)!=2 ||
1383 u_countChar32(buffer+5, -1)!=1 ||
1384 u_countChar32(buffer+6, -1)!=0
1385 ) {
1386 errln("u_countChar32(length=-1) failed");
1387 }
1388
1389 // test u_countChar32() with bad input
1390 if(u_countChar32(NULL, 5)!=0 || u_countChar32(buffer, -2)!=0) {
1391 errln("u_countChar32(bad input) failed (returned non-zero counts)");
1392 }
1393 }
1394
1395 /* test data and variables for hasMoreChar32Than() */
1396 static const UChar str[]={
1397 0x61, 0x62, 0xd800, 0xdc00,
1398 0xd801, 0xdc01, 0x63, 0xd802,
1399 0x64, 0xdc03, 0x65, 0x66,
1400 0xd804, 0xdc04, 0xd805, 0xdc05,
1401 0x67
1402 };
1403 UnicodeString string(str, UPRV_LENGTHOF(str));
1404 int32_t start, length, number;
1405
1406 /* test hasMoreChar32Than() */
1407 for(length=string.length(); length>=0; --length) {
1408 for(start=0; start<=length; ++start) {
1409 for(number=-1; number<=((length-start)+2); ++number) {
1410 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1411 }
1412 }
1413 }
1414
1415 /* test hasMoreChar32Than() with pinning */
1416 for(start=-1; start<=string.length()+1; ++start) {
1417 for(number=-1; number<=((string.length()-start)+2); ++number) {
1418 _testUnicodeStringHasMoreChar32Than(string, start, 0x7fffffff, number);
1419 }
1420 }
1421
1422 /* test hasMoreChar32Than() with a bogus string */
1423 string.setToBogus();
1424 for(length=-1; length<=1; ++length) {
1425 for(start=-1; start<=length; ++start) {
1426 for(number=-1; number<=((length-start)+2); ++number) {
1427 _testUnicodeStringHasMoreChar32Than(string, start, length-start, number);
1428 }
1429 }
1430 }
1431 }
1432
1433 void
TestBogus()1434 UnicodeStringTest::TestBogus() {
1435 UnicodeString test1("This is a test");
1436 UnicodeString test2("This is a test");
1437 UnicodeString test3("Me too!");
1438
1439 // test isBogus() and setToBogus()
1440 if (test1.isBogus() || test2.isBogus() || test3.isBogus()) {
1441 errln("A string returned TRUE for isBogus()!");
1442 }
1443
1444 // NULL pointers are treated like empty strings
1445 // use other illegal arguments to make a bogus string
1446 test3.setTo(FALSE, test1.getBuffer(), -2);
1447 if(!test3.isBogus()) {
1448 errln("A bogus string returned FALSE for isBogus()!");
1449 }
1450 if (test1.hashCode() != test2.hashCode() || test1.hashCode() == test3.hashCode()) {
1451 errln("hashCode() failed");
1452 }
1453 if(test3.getBuffer()!=0 || test3.getBuffer(20)!=0 || test3.getTerminatedBuffer()!=0) {
1454 errln("bogus.getBuffer()!=0");
1455 }
1456 if (test1.indexOf(test3) != -1) {
1457 errln("bogus.indexOf() != -1");
1458 }
1459 if (test1.lastIndexOf(test3) != -1) {
1460 errln("bogus.lastIndexOf() != -1");
1461 }
1462 if (test1.caseCompare(test3, U_FOLD_CASE_DEFAULT) != 1 || test3.caseCompare(test1, U_FOLD_CASE_DEFAULT) != -1) {
1463 errln("caseCompare() doesn't work with bogus strings");
1464 }
1465 if (test1.compareCodePointOrder(test3) != 1 || test3.compareCodePointOrder(test1) != -1) {
1466 errln("compareCodePointOrder() doesn't work with bogus strings");
1467 }
1468
1469 // verify that non-assignment modifications fail and do not revive a bogus string
1470 test3.setToBogus();
1471 test3.append((UChar)0x61);
1472 if(!test3.isBogus() || test3.getBuffer()!=0) {
1473 errln("bogus.append('a') worked but must not");
1474 }
1475
1476 test3.setToBogus();
1477 test3.findAndReplace(UnicodeString((UChar)0x61), test2);
1478 if(!test3.isBogus() || test3.getBuffer()!=0) {
1479 errln("bogus.findAndReplace() worked but must not");
1480 }
1481
1482 test3.setToBogus();
1483 test3.trim();
1484 if(!test3.isBogus() || test3.getBuffer()!=0) {
1485 errln("bogus.trim() revived bogus but must not");
1486 }
1487
1488 test3.setToBogus();
1489 test3.remove(1);
1490 if(!test3.isBogus() || test3.getBuffer()!=0) {
1491 errln("bogus.remove(1) revived bogus but must not");
1492 }
1493
1494 test3.setToBogus();
1495 if(!test3.setCharAt(0, 0x62).isBogus() || !test3.isEmpty()) {
1496 errln("bogus.setCharAt(0, 'b') worked but must not");
1497 }
1498
1499 test3.setToBogus();
1500 if(test3.truncate(1) || !test3.isBogus() || !test3.isEmpty()) {
1501 errln("bogus.truncate(1) revived bogus but must not");
1502 }
1503
1504 // verify that assignments revive a bogus string
1505 test3.setToBogus();
1506 if(!test3.isBogus() || (test3=test1).isBogus() || test3!=test1) {
1507 errln("bogus.operator=() failed");
1508 }
1509
1510 test3.setToBogus();
1511 if(!test3.isBogus() || test3.fastCopyFrom(test1).isBogus() || test3!=test1) {
1512 errln("bogus.fastCopyFrom() failed");
1513 }
1514
1515 test3.setToBogus();
1516 if(!test3.isBogus() || test3.setTo(test1).isBogus() || test3!=test1) {
1517 errln("bogus.setTo(UniStr) failed");
1518 }
1519
1520 test3.setToBogus();
1521 if(!test3.isBogus() || test3.setTo(test1, 0).isBogus() || test3!=test1) {
1522 errln("bogus.setTo(UniStr, 0) failed");
1523 }
1524
1525 test3.setToBogus();
1526 if(!test3.isBogus() || test3.setTo(test1, 0, 0x7fffffff).isBogus() || test3!=test1) {
1527 errln("bogus.setTo(UniStr, 0, len) failed");
1528 }
1529
1530 test3.setToBogus();
1531 if(!test3.isBogus() || test3.setTo(test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1532 errln("bogus.setTo(const UChar *, len) failed");
1533 }
1534
1535 test3.setToBogus();
1536 if(!test3.isBogus() || test3.setTo((UChar)0x2028).isBogus() || test3!=UnicodeString((UChar)0x2028)) {
1537 errln("bogus.setTo(UChar) failed");
1538 }
1539
1540 test3.setToBogus();
1541 if(!test3.isBogus() || test3.setTo((UChar32)0x1d157).isBogus() || test3!=UnicodeString((UChar32)0x1d157)) {
1542 errln("bogus.setTo(UChar32) failed");
1543 }
1544
1545 test3.setToBogus();
1546 if(!test3.isBogus() || test3.setTo(FALSE, test1.getBuffer(), test1.length()).isBogus() || test3!=test1) {
1547 errln("bogus.setTo(readonly alias) failed");
1548 }
1549
1550 // writable alias to another string's buffer: very bad idea, just convenient for this test
1551 test3.setToBogus();
1552 if(!test3.isBogus() || test3.setTo((UChar *)test1.getBuffer(), test1.length(), test1.getCapacity()).isBogus() || test3!=test1) {
1553 errln("bogus.setTo(writable alias) failed");
1554 }
1555
1556 // verify simple, documented ways to turn a bogus string into an empty one
1557 test3.setToBogus();
1558 if(!test3.isBogus() || (test3=UnicodeString()).isBogus() || !test3.isEmpty()) {
1559 errln("bogus.operator=(UnicodeString()) failed");
1560 }
1561
1562 test3.setToBogus();
1563 if(!test3.isBogus() || test3.setTo(UnicodeString()).isBogus() || !test3.isEmpty()) {
1564 errln("bogus.setTo(UnicodeString()) failed");
1565 }
1566
1567 test3.setToBogus();
1568 if(test3.remove().isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1569 errln("bogus.remove() failed");
1570 }
1571
1572 test3.setToBogus();
1573 if(test3.remove(0, INT32_MAX).isBogus() || test3.getBuffer()==0 || !test3.isEmpty()) {
1574 errln("bogus.remove(0, INT32_MAX) failed");
1575 }
1576
1577 test3.setToBogus();
1578 if(test3.truncate(0) || test3.isBogus() || !test3.isEmpty()) {
1579 errln("bogus.truncate(0) failed");
1580 }
1581
1582 test3.setToBogus();
1583 if(!test3.isBogus() || test3.setTo((UChar32)-1).isBogus() || !test3.isEmpty()) {
1584 errln("bogus.setTo((UChar32)-1) failed");
1585 }
1586
1587 static const UChar nul=0;
1588
1589 test3.setToBogus();
1590 if(!test3.isBogus() || test3.setTo(&nul, 0).isBogus() || !test3.isEmpty()) {
1591 errln("bogus.setTo(&nul, 0) failed");
1592 }
1593
1594 test3.setToBogus();
1595 if(!test3.isBogus() || test3.getBuffer()!=0) {
1596 errln("setToBogus() failed to make a string bogus");
1597 }
1598
1599 test3.setToBogus();
1600 if(test1.isBogus() || !(test1=test3).isBogus()) {
1601 errln("normal=bogus failed to make the left string bogus");
1602 }
1603
1604 // test that NULL primitive input string values are treated like
1605 // empty strings, not errors (bogus)
1606 test2.setTo((UChar32)0x10005);
1607 if(test2.insert(1, NULL, 1).length()!=2) {
1608 errln("UniStr.insert(...NULL...) should not modify the string but does");
1609 }
1610
1611 UErrorCode errorCode=U_ZERO_ERROR;
1612 UnicodeString
1613 test4((const UChar *)NULL),
1614 test5(TRUE, (const UChar *)NULL, 1),
1615 test6((UChar *)NULL, 5, 5),
1616 test7((const char *)NULL, 3, NULL, errorCode);
1617 if(test4.isBogus() || test5.isBogus() || test6.isBogus() || test7.isBogus()) {
1618 errln("a constructor set to bogus for a NULL input string, should be empty");
1619 }
1620
1621 test4.setTo(NULL, 3);
1622 test5.setTo(TRUE, (const UChar *)NULL, 1);
1623 test6.setTo((UChar *)NULL, 5, 5);
1624 if(test4.isBogus() || test5.isBogus() || test6.isBogus()) {
1625 errln("a setTo() set to bogus for a NULL input string, should be empty");
1626 }
1627
1628 // test that bogus==bogus<any
1629 if(test1!=test3 || test1.compare(test3)!=0) {
1630 errln("bogus==bogus failed");
1631 }
1632
1633 test2.remove();
1634 if(test1>=test2 || !(test2>test1) || test1.compare(test2)>=0 || !(test2.compare(test1)>0)) {
1635 errln("bogus<empty failed");
1636 }
1637 }
1638
1639 // StringEnumeration ------------------------------------------------------- ***
1640 // most of StringEnumeration is tested elsewhere
1641 // this test improves code coverage
1642
1643 static const char *const
1644 testEnumStrings[]={
1645 "a",
1646 "b",
1647 "c",
1648 "this is a long string which helps us test some buffer limits",
1649 "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
1650 };
1651
1652 class TestEnumeration : public StringEnumeration {
1653 public:
TestEnumeration()1654 TestEnumeration() : i(0) {}
1655
count(UErrorCode &) const1656 virtual int32_t count(UErrorCode& /*status*/) const {
1657 return UPRV_LENGTHOF(testEnumStrings);
1658 }
1659
snext(UErrorCode & status)1660 virtual const UnicodeString *snext(UErrorCode &status) {
1661 if(U_SUCCESS(status) && i<UPRV_LENGTHOF(testEnumStrings)) {
1662 unistr=UnicodeString(testEnumStrings[i++], "");
1663 return &unistr;
1664 }
1665
1666 return NULL;
1667 }
1668
reset(UErrorCode &)1669 virtual void reset(UErrorCode& /*status*/) {
1670 i=0;
1671 }
1672
getStaticClassID()1673 static inline UClassID getStaticClassID() {
1674 return (UClassID)&fgClassID;
1675 }
getDynamicClassID() const1676 virtual UClassID getDynamicClassID() const {
1677 return getStaticClassID();
1678 }
1679
1680 private:
1681 static const char fgClassID;
1682
1683 int32_t i;
1684 };
1685
1686 const char TestEnumeration::fgClassID=0;
1687
1688 void
TestStringEnumeration()1689 UnicodeStringTest::TestStringEnumeration() {
1690 UnicodeString s;
1691 TestEnumeration ten;
1692 int32_t i, length;
1693 UErrorCode status;
1694
1695 const UChar *pu;
1696 const char *pc;
1697
1698 // test the next() default implementation and ensureCharsCapacity()
1699 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1700 status=U_ZERO_ERROR;
1701 pc=ten.next(&length, status);
1702 s=UnicodeString(testEnumStrings[i], "");
1703 if(U_FAILURE(status) || pc==NULL || length!=s.length() || UnicodeString(pc, length, "")!=s) {
1704 errln("StringEnumeration.next(%d) failed", i);
1705 }
1706 }
1707 status=U_ZERO_ERROR;
1708 if(ten.next(&length, status)!=NULL) {
1709 errln("StringEnumeration.next(done)!=NULL");
1710 }
1711
1712 // test the unext() default implementation
1713 ten.reset(status);
1714 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1715 status=U_ZERO_ERROR;
1716 pu=ten.unext(&length, status);
1717 s=UnicodeString(testEnumStrings[i], "");
1718 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1719 errln("StringEnumeration.unext(%d) failed", i);
1720 }
1721 }
1722 status=U_ZERO_ERROR;
1723 if(ten.unext(&length, status)!=NULL) {
1724 errln("StringEnumeration.unext(done)!=NULL");
1725 }
1726
1727 // test that the default clone() implementation works, and returns NULL
1728 if(ten.clone()!=NULL) {
1729 errln("StringEnumeration.clone()!=NULL");
1730 }
1731
1732 // test that uenum_openFromStringEnumeration() works
1733 // Need a heap allocated string enumeration because it is adopted by the UEnumeration.
1734 StringEnumeration *newTen = new TestEnumeration;
1735 status=U_ZERO_ERROR;
1736 UEnumeration *uten = uenum_openFromStringEnumeration(newTen, &status);
1737 if (uten==NULL || U_FAILURE(status)) {
1738 errln("fail at file %s, line %d, UErrorCode is %s\n", __FILE__, __LINE__, u_errorName(status));
1739 return;
1740 }
1741
1742 // test uenum_next()
1743 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1744 status=U_ZERO_ERROR;
1745 pc=uenum_next(uten, &length, &status);
1746 if(U_FAILURE(status) || pc==NULL || strcmp(pc, testEnumStrings[i]) != 0) {
1747 errln("File %s, line %d, StringEnumeration.next(%d) failed", __FILE__, __LINE__, i);
1748 }
1749 }
1750 status=U_ZERO_ERROR;
1751 if(uenum_next(uten, &length, &status)!=NULL) {
1752 errln("File %s, line %d, uenum_next(done)!=NULL");
1753 }
1754
1755 // test the uenum_unext()
1756 uenum_reset(uten, &status);
1757 for(i=0; i<UPRV_LENGTHOF(testEnumStrings); ++i) {
1758 status=U_ZERO_ERROR;
1759 pu=uenum_unext(uten, &length, &status);
1760 s=UnicodeString(testEnumStrings[i], "");
1761 if(U_FAILURE(status) || pu==NULL || length!=s.length() || UnicodeString(TRUE, pu, length)!=s) {
1762 errln("File %s, Line %d, uenum_unext(%d) failed", __FILE__, __LINE__, i);
1763 }
1764 }
1765 status=U_ZERO_ERROR;
1766 if(uenum_unext(uten, &length, &status)!=NULL) {
1767 errln("File %s, Line %d, uenum_unext(done)!=NULL" __FILE__, __LINE__);
1768 }
1769
1770 uenum_close(uten);
1771 }
1772
1773 /*
1774 * Namespace test, to make sure that macros like UNICODE_STRING include the
1775 * namespace qualifier.
1776 *
1777 * Define a (bogus) UnicodeString class in another namespace and check for ambiguity.
1778 */
1779 namespace bogus {
1780 class UnicodeString {
1781 public:
1782 enum EInvariant { kInvariant };
UnicodeString()1783 UnicodeString() : i(1) {}
UnicodeString(UBool,const UChar *,int32_t textLength)1784 UnicodeString(UBool /*isTerminated*/, const UChar * /*text*/, int32_t textLength) : i(textLength) {(void)i;}
UnicodeString(const char *,int32_t length,enum EInvariant)1785 UnicodeString(const char * /*src*/, int32_t length, enum EInvariant /*inv*/
1786 ) : i(length) {}
1787 private:
1788 int32_t i;
1789 };
1790 }
1791
1792 void
TestNameSpace()1793 UnicodeStringTest::TestNameSpace() {
1794 // Provoke name collision unless the UnicodeString macros properly
1795 // qualify the icu::UnicodeString class.
1796 using namespace bogus;
1797
1798 // Use all UnicodeString macros from unistr.h.
1799 icu::UnicodeString s1=icu::UnicodeString("abc", 3, US_INV);
1800 icu::UnicodeString s2=UNICODE_STRING("def", 3);
1801 icu::UnicodeString s3=UNICODE_STRING_SIMPLE("ghi");
1802
1803 // Make sure the compiler does not optimize away instantiation of s1, s2, s3.
1804 icu::UnicodeString s4=s1+s2+s3;
1805 if(s4.length()!=9) {
1806 errln("Something wrong with UnicodeString::operator+().");
1807 }
1808 }
1809
1810 void
TestUTF32()1811 UnicodeStringTest::TestUTF32() {
1812 // Input string length US_STACKBUF_SIZE to cause overflow of the
1813 // initially chosen fStackBuffer due to supplementary characters.
1814 static const UChar32 utf32[] = {
1815 0x41, 0xd900, 0x61, 0xdc00, -1, 0x110000, 0x5a, 0x50000, 0x7a,
1816 0x10000, 0x20000, 0xe0000, 0x10ffff
1817 };
1818 static const UChar expected_utf16[] = {
1819 0x41, 0xfffd, 0x61, 0xfffd, 0xfffd, 0xfffd, 0x5a, 0xd900, 0xdc00, 0x7a,
1820 0xd800, 0xdc00, 0xd840, 0xdc00, 0xdb40, 0xdc00, 0xdbff, 0xdfff
1821 };
1822 UnicodeString from32 = UnicodeString::fromUTF32(utf32, UPRV_LENGTHOF(utf32));
1823 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1824 if(from32 != expected) {
1825 errln("UnicodeString::fromUTF32() did not create the expected string.");
1826 }
1827
1828 static const UChar utf16[] = {
1829 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1830 };
1831 static const UChar32 expected_utf32[] = {
1832 0x41, 0xfffd, 0x61, 0xfffd, 0x5a, 0x50000, 0x7a, 0x10000, 0x10ffff
1833 };
1834 UChar32 result32[16];
1835 UErrorCode errorCode = U_ZERO_ERROR;
1836 int32_t length32 =
1837 UnicodeString(FALSE, utf16, UPRV_LENGTHOF(utf16)).
1838 toUTF32(result32, UPRV_LENGTHOF(result32), errorCode);
1839 if( length32 != UPRV_LENGTHOF(expected_utf32) ||
1840 0 != uprv_memcmp(result32, expected_utf32, length32*4) ||
1841 result32[length32] != 0
1842 ) {
1843 errln("UnicodeString::toUTF32() did not create the expected string.");
1844 }
1845 }
1846
1847 class TestCheckedArrayByteSink : public CheckedArrayByteSink {
1848 public:
TestCheckedArrayByteSink(char * outbuf,int32_t capacity)1849 TestCheckedArrayByteSink(char* outbuf, int32_t capacity)
1850 : CheckedArrayByteSink(outbuf, capacity), calledFlush(FALSE) {}
Flush()1851 virtual void Flush() { calledFlush = TRUE; }
1852 UBool calledFlush;
1853 };
1854
1855 void
TestUTF8()1856 UnicodeStringTest::TestUTF8() {
1857 static const uint8_t utf8[] = {
1858 // Code points:
1859 // 0x41, 0xd900,
1860 // 0x61, 0xdc00,
1861 // 0x110000, 0x5a,
1862 // 0x50000, 0x7a,
1863 // 0x10000, 0x20000,
1864 // 0xe0000, 0x10ffff
1865 0x41, 0xed, 0xa4, 0x80,
1866 0x61, 0xed, 0xb0, 0x80,
1867 0xf4, 0x90, 0x80, 0x80, 0x5a,
1868 0xf1, 0x90, 0x80, 0x80, 0x7a,
1869 0xf0, 0x90, 0x80, 0x80, 0xf0, 0xa0, 0x80, 0x80,
1870 0xf3, 0xa0, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1871 };
1872 static const UChar expected_utf16[] = {
1873 0x41, 0xfffd,
1874 0x61, 0xfffd,
1875 0xfffd, 0x5a,
1876 0xd900, 0xdc00, 0x7a,
1877 0xd800, 0xdc00, 0xd840, 0xdc00,
1878 0xdb40, 0xdc00, 0xdbff, 0xdfff
1879 };
1880 UnicodeString from8 = UnicodeString::fromUTF8(StringPiece((const char *)utf8, (int32_t)sizeof(utf8)));
1881 UnicodeString expected(FALSE, expected_utf16, UPRV_LENGTHOF(expected_utf16));
1882
1883 if(from8 != expected) {
1884 errln("UnicodeString::fromUTF8(StringPiece) did not create the expected string.");
1885 }
1886 #if U_HAVE_STD_STRING
1887 std::string utf8_string((const char *)utf8, sizeof(utf8));
1888 UnicodeString from8b = UnicodeString::fromUTF8(utf8_string);
1889 if(from8b != expected) {
1890 errln("UnicodeString::fromUTF8(std::string) did not create the expected string.");
1891 }
1892 #endif
1893
1894 static const UChar utf16[] = {
1895 0x41, 0xd900, 0x61, 0xdc00, 0x5a, 0xd900, 0xdc00, 0x7a, 0xd800, 0xdc00, 0xdbff, 0xdfff
1896 };
1897 static const uint8_t expected_utf8[] = {
1898 0x41, 0xef, 0xbf, 0xbd, 0x61, 0xef, 0xbf, 0xbd, 0x5a, 0xf1, 0x90, 0x80, 0x80, 0x7a,
1899 0xf0, 0x90, 0x80, 0x80, 0xf4, 0x8f, 0xbf, 0xbf
1900 };
1901 UnicodeString us(FALSE, utf16, UPRV_LENGTHOF(utf16));
1902
1903 char buffer[64];
1904 TestCheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
1905 us.toUTF8(sink);
1906 if( sink.NumberOfBytesWritten() != (int32_t)sizeof(expected_utf8) ||
1907 0 != uprv_memcmp(buffer, expected_utf8, sizeof(expected_utf8))
1908 ) {
1909 errln("UnicodeString::toUTF8() did not create the expected string.");
1910 }
1911 if(!sink.calledFlush) {
1912 errln("UnicodeString::toUTF8(sink) did not sink.Flush().");
1913 }
1914 #if U_HAVE_STD_STRING
1915 // Initial contents for testing that toUTF8String() appends.
1916 std::string result8 = "-->";
1917 std::string expected8 = "-->" + std::string((const char *)expected_utf8, sizeof(expected_utf8));
1918 // Use the return value just for testing.
1919 std::string &result8r = us.toUTF8String(result8);
1920 if(result8r != expected8 || &result8r != &result8) {
1921 errln("UnicodeString::toUTF8String() did not create the expected string.");
1922 }
1923 #endif
1924 }
1925
1926 // Test if this compiler supports Return Value Optimization of unnamed temporary objects.
wrapUChars(const UChar * uchars)1927 static UnicodeString wrapUChars(const UChar *uchars) {
1928 return UnicodeString(TRUE, uchars, -1);
1929 }
1930
1931 void
TestReadOnlyAlias()1932 UnicodeStringTest::TestReadOnlyAlias() {
1933 UChar uchars[]={ 0x61, 0x62, 0 };
1934 UnicodeString alias(TRUE, uchars, 2);
1935 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1936 errln("UnicodeString read-only-aliasing constructor does not behave as expected.");
1937 return;
1938 }
1939 alias.truncate(1);
1940 if(alias.length()!=1 || alias.getBuffer()!=uchars) {
1941 errln("UnicodeString(read-only-alias).truncate() did not preserve aliasing as expected.");
1942 }
1943 if(alias.getTerminatedBuffer()==uchars) {
1944 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1945 "did not allocate and copy as expected.");
1946 }
1947 if(uchars[1]!=0x62) {
1948 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1949 "modified the original buffer.");
1950 }
1951 if(1!=u_strlen(alias.getTerminatedBuffer())) {
1952 errln("UnicodeString(read-only-alias).truncate().getTerminatedBuffer() "
1953 "does not return a buffer terminated at the proper length.");
1954 }
1955
1956 alias.setTo(TRUE, uchars, 2);
1957 if(alias.length()!=2 || alias.getBuffer()!=uchars || alias.getTerminatedBuffer()!=uchars) {
1958 errln("UnicodeString read-only-aliasing setTo() does not behave as expected.");
1959 return;
1960 }
1961 alias.remove();
1962 if(alias.length()!=0) {
1963 errln("UnicodeString(read-only-alias).remove() did not work.");
1964 }
1965 if(alias.getTerminatedBuffer()==uchars) {
1966 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1967 "did not un-alias as expected.");
1968 }
1969 if(uchars[0]!=0x61) {
1970 errln("UnicodeString(read-only-alias).remove().getTerminatedBuffer() "
1971 "modified the original buffer.");
1972 }
1973 if(0!=u_strlen(alias.getTerminatedBuffer())) {
1974 errln("UnicodeString.setTo(read-only-alias).remove().getTerminatedBuffer() "
1975 "does not return a buffer terminated at length 0.");
1976 }
1977
1978 UnicodeString longString=UNICODE_STRING_SIMPLE("abcdefghijklmnopqrstuvwxyz0123456789");
1979 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1980 alias.remove(0, 10);
1981 if(longString.compare(10, INT32_MAX, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+10) {
1982 errln("UnicodeString.setTo(read-only-alias).remove(0, 10) did not preserve aliasing as expected.");
1983 }
1984 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1985 alias.remove(27, 99);
1986 if(longString.compare(0, 27, alias)!=0 || alias.getBuffer()!=longString.getBuffer()) {
1987 errln("UnicodeString.setTo(read-only-alias).remove(27, 99) did not preserve aliasing as expected.");
1988 }
1989 alias.setTo(FALSE, longString.getBuffer(), longString.length());
1990 alias.retainBetween(6, 30);
1991 if(longString.compare(6, 24, alias)!=0 || alias.getBuffer()!=longString.getBuffer()+6) {
1992 errln("UnicodeString.setTo(read-only-alias).retainBetween(6, 30) did not preserve aliasing as expected.");
1993 }
1994
1995 UChar abc[]={ 0x61, 0x62, 0x63, 0 };
1996 UBool hasRVO= wrapUChars(abc).getBuffer()==abc;
1997
1998 UnicodeString temp;
1999 temp.fastCopyFrom(longString.tempSubString());
2000 if(temp!=longString || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2001 errln("UnicodeString.tempSubString() failed");
2002 }
2003 temp.fastCopyFrom(longString.tempSubString(-3, 5));
2004 if(longString.compare(0, 5, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer())) {
2005 errln("UnicodeString.tempSubString(-3, 5) failed");
2006 }
2007 temp.fastCopyFrom(longString.tempSubString(17));
2008 if(longString.compare(17, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+17)) {
2009 errln("UnicodeString.tempSubString(17) failed");
2010 }
2011 temp.fastCopyFrom(longString.tempSubString(99));
2012 if(!temp.isEmpty()) {
2013 errln("UnicodeString.tempSubString(99) failed");
2014 }
2015 temp.fastCopyFrom(longString.tempSubStringBetween(6));
2016 if(longString.compare(6, INT32_MAX, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+6)) {
2017 errln("UnicodeString.tempSubStringBetween(6) failed");
2018 }
2019 temp.fastCopyFrom(longString.tempSubStringBetween(8, 18));
2020 if(longString.compare(8, 10, temp)!=0 || (hasRVO && temp.getBuffer()!=longString.getBuffer()+8)) {
2021 errln("UnicodeString.tempSubStringBetween(8, 18) failed");
2022 }
2023 UnicodeString bogusString;
2024 bogusString.setToBogus();
2025 temp.fastCopyFrom(bogusString.tempSubStringBetween(8, 18));
2026 if(!temp.isBogus()) {
2027 errln("UnicodeString.setToBogus().tempSubStringBetween(8, 18) failed");
2028 }
2029 }
2030
2031 void
doTestAppendable(UnicodeString & dest,Appendable & app)2032 UnicodeStringTest::doTestAppendable(UnicodeString &dest, Appendable &app) {
2033 static const UChar cde[3]={ 0x63, 0x64, 0x65 };
2034 static const UChar fg[3]={ 0x66, 0x67, 0 };
2035 if(!app.reserveAppendCapacity(12)) {
2036 errln("Appendable.reserve(12) failed");
2037 }
2038 app.appendCodeUnit(0x61);
2039 app.appendCodePoint(0x62);
2040 app.appendCodePoint(0x50000);
2041 app.appendString(cde, 3);
2042 app.appendString(fg, -1);
2043 UChar scratch[3];
2044 int32_t capacity=-1;
2045 UChar *buffer=app.getAppendBuffer(3, 3, scratch, 3, &capacity);
2046 if(capacity<3) {
2047 errln("Appendable.getAppendBuffer(min=3) returned capacity=%d<3", (int)capacity);
2048 return;
2049 }
2050 static const UChar hij[3]={ 0x68, 0x69, 0x6a };
2051 u_memcpy(buffer, hij, 3);
2052 app.appendString(buffer, 3);
2053 if(dest!=UNICODE_STRING_SIMPLE("ab\\U00050000cdefghij").unescape()) {
2054 errln("Appendable.append(...) failed");
2055 }
2056 buffer=app.getAppendBuffer(0, 3, scratch, 3, &capacity);
2057 if(buffer!=NULL || capacity!=0) {
2058 errln("Appendable.getAppendBuffer(min=0) failed");
2059 }
2060 capacity=1;
2061 buffer=app.getAppendBuffer(3, 3, scratch, 2, &capacity);
2062 if(buffer!=NULL || capacity!=0) {
2063 errln("Appendable.getAppendBuffer(scratch<min) failed");
2064 }
2065 }
2066
2067 class SimpleAppendable : public Appendable {
2068 public:
SimpleAppendable(UnicodeString & dest)2069 explicit SimpleAppendable(UnicodeString &dest) : str(dest) {}
appendCodeUnit(UChar c)2070 virtual UBool appendCodeUnit(UChar c) { str.append(c); return TRUE; }
reset()2071 SimpleAppendable &reset() { str.remove(); return *this; }
2072 private:
2073 UnicodeString &str;
2074 };
2075
2076 void
TestAppendable()2077 UnicodeStringTest::TestAppendable() {
2078 UnicodeString dest;
2079 SimpleAppendable app(dest);
2080 doTestAppendable(dest, app);
2081 }
2082
2083 void
TestUnicodeStringImplementsAppendable()2084 UnicodeStringTest::TestUnicodeStringImplementsAppendable() {
2085 UnicodeString dest;
2086 UnicodeStringAppendable app(dest);
2087 doTestAppendable(dest, app);
2088 }
2089
2090 void
TestSizeofUnicodeString()2091 UnicodeStringTest::TestSizeofUnicodeString() {
2092 // See the comments in unistr.h near the declaration of UnicodeString's fields.
2093 // See the API comments for UNISTR_OBJECT_SIZE.
2094 size_t sizeofUniStr=sizeof(UnicodeString);
2095 size_t expected=UNISTR_OBJECT_SIZE;
2096 if(expected!=sizeofUniStr) {
2097 // Possible cause: UNISTR_OBJECT_SIZE may not be a multiple of sizeof(pointer),
2098 // of the compiler might add more internal padding than expected.
2099 errln("sizeof(UnicodeString)=%d, expected UNISTR_OBJECT_SIZE=%d",
2100 (int)sizeofUniStr, (int)expected);
2101 }
2102 if(sizeofUniStr<32) {
2103 errln("sizeof(UnicodeString)=%d < 32, probably too small", (int)sizeofUniStr);
2104 }
2105 // We assume that the entire UnicodeString object,
2106 // minus the vtable pointer and 2 bytes for flags and short length,
2107 // is available for internal storage of UChars.
2108 int32_t expectedStackBufferLength=((int32_t)UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR;
2109 UnicodeString s;
2110 const UChar *emptyBuffer=s.getBuffer();
2111 for(int32_t i=0; i<expectedStackBufferLength; ++i) {
2112 s.append((UChar)0x2e);
2113 }
2114 const UChar *fullBuffer=s.getBuffer();
2115 if(fullBuffer!=emptyBuffer) {
2116 errln("unexpected reallocation when filling with assumed stack buffer size of %d",
2117 expectedStackBufferLength);
2118 }
2119 const UChar *terminatedBuffer=s.getTerminatedBuffer();
2120 if(terminatedBuffer==emptyBuffer) {
2121 errln("unexpected keeping stack buffer when overfilling assumed stack buffer size of %d",
2122 expectedStackBufferLength);
2123 }
2124 }
2125
2126 void
TestMoveSwap()2127 UnicodeStringTest::TestMoveSwap() {
2128 static const UChar abc[3] = { 0x61, 0x62, 0x63 }; // "abc"
2129 UnicodeString s1(FALSE, abc, UPRV_LENGTHOF(abc)); // read-only alias
2130 UnicodeString s2(100, 0x7a, 100); // 100 * 'z' should be on the heap
2131 UnicodeString s3("defg", 4, US_INV); // in stack buffer
2132 const UChar *p = s2.getBuffer();
2133 s1.swap(s2);
2134 if(s1.getBuffer() != p || s1.length() != 100 || s2.getBuffer() != abc || s2.length() != 3) {
2135 errln("UnicodeString.swap() did not swap");
2136 }
2137 swap(s2, s3);
2138 if(s2 != UNICODE_STRING_SIMPLE("defg") || s3.getBuffer() != abc || s3.length() != 3) {
2139 errln("swap(UnicodeString) did not swap back");
2140 }
2141 UnicodeString s4;
2142 s4.moveFrom(s1);
2143 if(s4.getBuffer() != p || s4.length() != 100 || !s1.isBogus()) {
2144 errln("UnicodeString.moveFrom(heap) did not move");
2145 }
2146 UnicodeString s5;
2147 s5.moveFrom(s2);
2148 if(s5 != UNICODE_STRING_SIMPLE("defg")) {
2149 errln("UnicodeString.moveFrom(stack) did not move");
2150 }
2151 UnicodeString s6;
2152 s6.moveFrom(s3);
2153 if(s6.getBuffer() != abc || s6.length() != 3) {
2154 errln("UnicodeString.moveFrom(alias) did not move");
2155 }
2156 #if U_HAVE_RVALUE_REFERENCES
2157 infoln("TestMoveSwap() with rvalue references");
2158 s1 = static_cast<UnicodeString &&>(s6);
2159 if(s1.getBuffer() != abc || s1.length() != 3) {
2160 errln("UnicodeString move assignment operator did not move");
2161 }
2162 UnicodeString s7(static_cast<UnicodeString &&>(s4));
2163 if(s7.getBuffer() != p || s7.length() != 100 || !s4.isBogus()) {
2164 errln("UnicodeString move constructor did not move");
2165 }
2166 #else
2167 infoln("TestMoveSwap() without rvalue references");
2168 UnicodeString s7;
2169 #endif
2170
2171 // Move self assignment leaves the object valid but in an undefined state.
2172 // Do it to make sure there is no crash,
2173 // but do not check for any particular resulting value.
2174 s1.moveFrom(s1);
2175 s2.moveFrom(s2);
2176 s3.moveFrom(s3);
2177 s4.moveFrom(s4);
2178 s5.moveFrom(s5);
2179 s6.moveFrom(s6);
2180 s7.moveFrom(s7);
2181 // Simple copy assignment must work.
2182 UnicodeString simple = UNICODE_STRING_SIMPLE("simple");
2183 s1 = s6 = s4 = s7 = simple;
2184 if(s1 != simple || s4 != simple || s6 != simple || s7 != simple) {
2185 errln("UnicodeString copy after self-move did not work");
2186 }
2187 }
2188