1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /* file name: strtest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov22
14 * created by: Markus W. Scherer
15 */
16
17 #ifdef U_HAVE_STRING_VIEW
18 #include <string_view>
19 #endif
20
21 #include <cstddef>
22 #include <string.h>
23 #include <limits>
24
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/std_string.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/unistr.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf_old.h" // for UTF8_COUNT_TRAIL_BYTES
32 #include "unicode/utf8.h"
33 #include "charstr.h"
34 #include "cstr.h"
35 #include "intltest.h"
36 #include "strtest.h"
37 #include "uinvchar.h"
38
~StringTest()39 StringTest::~StringTest() {}
40
TestEndian(void)41 void StringTest::TestEndian(void) {
42 union {
43 uint8_t byte;
44 uint16_t word;
45 } u;
46 u.word=0x0100;
47 if(U_IS_BIG_ENDIAN!=u.byte) {
48 errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
49 }
50 }
51
TestSizeofTypes(void)52 void StringTest::TestSizeofTypes(void) {
53 if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
54 errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
55 }
56 #ifdef U_INT64_T_UNAVAILABLE
57 errln("int64_t and uint64_t are undefined.");
58 #else
59 if(8!=sizeof(int64_t)) {
60 errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
61 }
62 if(8!=sizeof(uint64_t)) {
63 errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
64 }
65 #endif
66 if(8!=sizeof(double)) {
67 errln("8!=sizeof(double) - putil.c code may not work");
68 }
69 if(4!=sizeof(int32_t)) {
70 errln("4!=sizeof(int32_t)");
71 }
72 if(4!=sizeof(uint32_t)) {
73 errln("4!=sizeof(uint32_t)");
74 }
75 if(2!=sizeof(int16_t)) {
76 errln("2!=sizeof(int16_t)");
77 }
78 if(2!=sizeof(uint16_t)) {
79 errln("2!=sizeof(uint16_t)");
80 }
81 if(2!=sizeof(UChar)) {
82 errln("2!=sizeof(UChar)");
83 }
84 if(1!=sizeof(int8_t)) {
85 errln("1!=sizeof(int8_t)");
86 }
87 if(1!=sizeof(uint8_t)) {
88 errln("1!=sizeof(uint8_t)");
89 }
90 if(1!=sizeof(UBool)) {
91 errln("1!=sizeof(UBool)");
92 }
93 }
94
TestCharsetFamily(void)95 void StringTest::TestCharsetFamily(void) {
96 unsigned char c='A';
97 if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
98 (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
99 ) {
100 errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
101 }
102 }
103
104 U_STRING_DECL(ustringVar, "aZ0 -", 5);
105
106 void
Test_U_STRING()107 StringTest::Test_U_STRING() {
108 U_STRING_INIT(ustringVar, "aZ0 -", 5);
109 if( u_strlen(ustringVar)!=5 ||
110 ustringVar[0]!=0x61 ||
111 ustringVar[1]!=0x5a ||
112 ustringVar[2]!=0x30 ||
113 ustringVar[3]!=0x20 ||
114 ustringVar[4]!=0x2d ||
115 ustringVar[5]!=0
116 ) {
117 errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
118 "See putil.h and utypes.h with platform.h.");
119 }
120 }
121
122 void
Test_UNICODE_STRING()123 StringTest::Test_UNICODE_STRING() {
124 UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
125 if( ustringVar.length()!=5 ||
126 ustringVar[0]!=0x61 ||
127 ustringVar[1]!=0x5a ||
128 ustringVar[2]!=0x30 ||
129 ustringVar[3]!=0x20 ||
130 ustringVar[4]!=0x2d
131 ) {
132 errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
133 "See unistr.h and utypes.h with platform.h.");
134 }
135 }
136
137 void
Test_UNICODE_STRING_SIMPLE()138 StringTest::Test_UNICODE_STRING_SIMPLE() {
139 UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
140 if( ustringVar.length()!=5 ||
141 ustringVar[0]!=0x61 ||
142 ustringVar[1]!=0x5a ||
143 ustringVar[2]!=0x30 ||
144 ustringVar[3]!=0x20 ||
145 ustringVar[4]!=0x2d
146 ) {
147 errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
148 "See unistr.h and utypes.h with platform.h.");
149 }
150 }
151
152 namespace {
153
154 // See U_CHARSET_FAMILY in unicode/platform.h.
155 const char *nativeInvChars =
156 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
157 "abcdefghijklmnopqrstuvwxyz"
158 "0123456789 \"%&'()*+,-./:;<=>?_";
159 const char16_t *asciiInvChars =
160 u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
161 u"abcdefghijklmnopqrstuvwxyz"
162 u"0123456789 \"%&'()*+,-./:;<=>?_";
163
164 } // namespace
165
166 void
TestUpperOrdinal()167 StringTest::TestUpperOrdinal() {
168 for (int32_t i = 0;; ++i) {
169 char ic = nativeInvChars[i];
170 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
171 int32_t expected = ac - 'A';
172 int32_t actual = uprv_upperOrdinal(ic);
173 if (0 <= expected && expected <= 25) {
174 if (actual != expected) {
175 errln("uprv_upperOrdinal('%c')=%d != expected %d",
176 ic, (int)actual, (int)expected);
177 }
178 } else {
179 if (0 <= actual && actual <= 25) {
180 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
181 ic, (int)actual);
182 }
183 }
184 if (ic == 0) { break; }
185 }
186 }
187
188 void
TestLowerOrdinal()189 StringTest::TestLowerOrdinal() {
190 for (int32_t i = 0;; ++i) {
191 char ic = nativeInvChars[i];
192 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
193 int32_t expected = ac - 'a';
194 int32_t actual = uprv_lowerOrdinal(ic);
195 if (0 <= expected && expected <= 25) {
196 if (actual != expected) {
197 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
198 ic, (int)actual, (int)expected);
199 }
200 } else {
201 if (0 <= actual && actual <= 25) {
202 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
203 ic, (int)actual);
204 }
205 }
206 if (ic == 0) { break; }
207 }
208 }
209
210 void
Test_UTF8_COUNT_TRAIL_BYTES()211 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
212 #if !U_HIDE_OBSOLETE_UTF_OLD_H
213 if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
214 || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
215 || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
216 || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
217 errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
218 }
219 #endif
220 // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
221 // have completely different implementations.
222 if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
223 || U8_COUNT_TRAIL_BYTES(0xC2) != 1
224 || U8_COUNT_TRAIL_BYTES(0xE0) != 2
225 || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
226 errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
227 }
228 }
229
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)230 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
231 if(exec) {
232 logln("TestSuite Character and String Test: ");
233 }
234 TESTCASE_AUTO_BEGIN;
235 TESTCASE_AUTO(TestEndian);
236 TESTCASE_AUTO(TestSizeofTypes);
237 TESTCASE_AUTO(TestCharsetFamily);
238 TESTCASE_AUTO(Test_U_STRING);
239 TESTCASE_AUTO(Test_UNICODE_STRING);
240 TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
241 TESTCASE_AUTO(TestUpperOrdinal);
242 TESTCASE_AUTO(TestLowerOrdinal);
243 TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
244 TESTCASE_AUTO(TestSTLCompatibility);
245 TESTCASE_AUTO(TestStringPiece);
246 TESTCASE_AUTO(TestStringPieceComparisons);
247 TESTCASE_AUTO(TestStringPieceFind);
248 TESTCASE_AUTO(TestStringPieceOther);
249 #ifdef U_HAVE_STRING_VIEW
250 TESTCASE_AUTO(TestStringPieceStringView);
251 #endif
252 TESTCASE_AUTO(TestStringPieceU8);
253 TESTCASE_AUTO(TestByteSink);
254 TESTCASE_AUTO(TestCheckedArrayByteSink);
255 TESTCASE_AUTO(TestStringByteSink);
256 TESTCASE_AUTO(TestStringByteSinkAppendU8);
257 TESTCASE_AUTO(TestCharString);
258 TESTCASE_AUTO(TestCStr);
259 TESTCASE_AUTO(TestCharStrAppendNumber);
260 TESTCASE_AUTO(Testctou);
261 TESTCASE_AUTO_END;
262 }
263
264 void
TestStringPiece()265 StringTest::TestStringPiece() {
266 // Default constructor.
267 StringPiece empty;
268 if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) {
269 errln("StringPiece() failed");
270 }
271 // Construct from NULL const char * pointer.
272 StringPiece null((const char *)nullptr);
273 if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
274 errln("StringPiece(NULL) failed");
275 }
276 // Construct from const char * pointer.
277 static const char *abc_chars="abc";
278 StringPiece abc(abc_chars);
279 if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
280 errln("StringPiece(abc_chars) failed");
281 }
282 // Construct from const char * pointer and length.
283 static const char *abcdefg_chars="abcdefg";
284 StringPiece abcd(abcdefg_chars, 4);
285 if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
286 errln("StringPiece(abcdefg_chars, 4) failed");
287 }
288 // Construct from std::string.
289 std::string uvwxyz_string("uvwxyz");
290 StringPiece uvwxyz(uvwxyz_string);
291 if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
292 errln("StringPiece(uvwxyz_string) failed");
293 }
294 // Substring constructor with pos.
295 StringPiece sp(abcd, -1);
296 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
297 errln("StringPiece(abcd, -1) failed");
298 }
299 sp=StringPiece(abcd, 5);
300 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
301 errln("StringPiece(abcd, 5) failed");
302 }
303 sp=StringPiece(abcd, 2);
304 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
305 errln("StringPiece(abcd, -1) failed");
306 }
307 // Substring constructor with pos and len.
308 sp=StringPiece(abcd, -1, 8);
309 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
310 errln("StringPiece(abcd, -1, 8) failed");
311 }
312 sp=StringPiece(abcd, 5, 8);
313 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
314 errln("StringPiece(abcd, 5, 8) failed");
315 }
316 sp=StringPiece(abcd, 2, 8);
317 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
318 errln("StringPiece(abcd, -1) failed");
319 }
320 sp=StringPiece(abcd, 2, -1);
321 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
322 errln("StringPiece(abcd, 5, -1) failed");
323 }
324 // static const npos
325 const int32_t *ptr_npos=&StringPiece::npos;
326 if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
327 errln("StringPiece::npos!=0x7fffffff");
328 }
329 // substr() method with pos, using len=npos.
330 sp=abcd.substr(-1);
331 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
332 errln("abcd.substr(-1) failed");
333 }
334 sp=abcd.substr(5);
335 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
336 errln("abcd.substr(5) failed");
337 }
338 sp=abcd.substr(2);
339 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
340 errln("abcd.substr(-1) failed");
341 }
342 // substr() method with pos and len.
343 sp=abcd.substr(-1, 8);
344 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
345 errln("abcd.substr(-1, 8) failed");
346 }
347 sp=abcd.substr(5, 8);
348 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
349 errln("abcd.substr(5, 8) failed");
350 }
351 sp=abcd.substr(2, 8);
352 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
353 errln("abcd.substr(-1) failed");
354 }
355 sp=abcd.substr(2, -1);
356 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
357 errln("abcd.substr(5, -1) failed");
358 }
359 // clear()
360 sp=abcd;
361 sp.clear();
362 if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) {
363 errln("abcd.clear() failed");
364 }
365 // remove_prefix()
366 sp=abcd;
367 sp.remove_prefix(-1);
368 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
369 errln("abcd.remove_prefix(-1) failed");
370 }
371 sp=abcd;
372 sp.remove_prefix(2);
373 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
374 errln("abcd.remove_prefix(2) failed");
375 }
376 sp=abcd;
377 sp.remove_prefix(5);
378 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
379 errln("abcd.remove_prefix(5) failed");
380 }
381 // remove_suffix()
382 sp=abcd;
383 sp.remove_suffix(-1);
384 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
385 errln("abcd.remove_suffix(-1) failed");
386 }
387 sp=abcd;
388 sp.remove_suffix(2);
389 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
390 errln("abcd.remove_suffix(2) failed");
391 }
392 sp=abcd;
393 sp.remove_suffix(5);
394 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
395 errln("abcd.remove_suffix(5) failed");
396 }
397 }
398
399 void
TestStringPieceComparisons()400 StringTest::TestStringPieceComparisons() {
401 StringPiece empty;
402 StringPiece null(nullptr);
403 StringPiece abc("abc");
404 StringPiece abcd("abcdefg", 4);
405 StringPiece abx("abx");
406 if(empty!=null) {
407 errln("empty!=null");
408 }
409 if(empty==abc) {
410 errln("empty==abc");
411 }
412 if(abc==abcd) {
413 errln("abc==abcd");
414 }
415
416 assertTrue("null<abc", null.compare(abc) < 0);
417 assertTrue("abc>null", abc.compare(null) > 0);
418 assertTrue("abc<abcd", abc.compare(abcd) < 0);
419 assertTrue("abcd>abc", abcd.compare(abc) > 0);
420 assertTrue("abc<abx", abc.compare(abx) < 0);
421 assertTrue("abx>abc", abx.compare(abc) > 0);
422 assertTrue("abx>abcd", abx.compare(abcd) > 0);
423 assertTrue("abcd<abx", abcd.compare(abx) < 0);
424 assertTrue("abx==abx", abx.compare(abx) == 0);
425
426 // Behavior should be the same as std::string::compare
427 {
428 std::string null("");
429 std::string abc("abc");
430 std::string abcd("abcdefg", 4);
431 std::string abx("abx");
432
433 assertTrue("std: null<abc", null.compare(abc) < 0);
434 assertTrue("std: abc>null", abc.compare(null) > 0);
435 assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
436 assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
437 assertTrue("std: abc<abx", abc.compare(abx) < 0);
438 assertTrue("std: abx>abc", abx.compare(abc) > 0);
439 assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
440 assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
441 assertTrue("std: abx==abx", abx.compare(abx) == 0);
442 }
443
444 abcd.remove_suffix(1);
445 if(abc!=abcd) {
446 errln("abc!=abcd.remove_suffix(1)");
447 }
448 if(abc==abx) {
449 errln("abc==abx");
450 }
451 }
452
453 void
TestStringPieceFind()454 StringTest::TestStringPieceFind() {
455 struct TestCase {
456 const char* haystack;
457 const char* needle;
458 int32_t expected;
459 } cases[] = {
460 { "", "", 0 },
461 { "", "x", -1 },
462 { "x", "", 0 },
463 { "x", "x", 0 },
464 { "xy", "x", 0 },
465 { "xy", "y", 1 },
466 { "xy", "xy", 0 },
467 { "xy", "xyz", -1 },
468 { "qwerty", "qqw", -1 },
469 { "qwerty", "qw", 0 },
470 { "qwerty", "er", 2 },
471 { "qwerty", "err", -1 },
472 { "qwerty", "ert", 2 },
473 { "qwerty", "ty", 4 },
474 { "qwerty", "tyy", -1 },
475 { "qwerty", "a", -1 },
476 { "qwerty", "abc", -1 }
477 };
478 int32_t caseNumber = 0;
479 for (auto& cas : cases) {
480 StringPiece haystack(cas.haystack);
481 StringPiece needle(cas.needle);
482 assertEquals(Int64ToUnicodeString(caseNumber),
483 cas.expected, haystack.find(needle, 0));
484 // Should be same as std::string::find
485 std::string stdhaystack(cas.haystack);
486 std::string stdneedle(cas.needle);
487 assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
488 cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
489 // Test offsets against std::string::find
490 for (int32_t offset = 0; offset < haystack.length(); offset++) {
491 assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
492 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
493 }
494 caseNumber++;
495 }
496 }
497
498 void
TestStringPieceOther()499 StringTest::TestStringPieceOther() {
500 static constexpr char msg[] = "Kapow!";
501
502 // Another string piece implementation.
503 struct Other {
504 const char* data() { return msg; }
505 size_t size() { return sizeof msg - 1; }
506 };
507
508 Other other;
509 StringPiece piece(other);
510
511 assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
512 assertEquals("data()", piece.data(), other.data());
513 }
514
515 #ifdef U_HAVE_STRING_VIEW
516 void
TestStringPieceStringView()517 StringTest::TestStringPieceStringView() {
518 static constexpr char msg[] = "Kapow!";
519
520 std::string_view view(msg); // C++17
521 StringPiece piece(view);
522
523 assertEquals("size()", piece.size(), view.size());
524 assertEquals("data()", piece.data(), view.data());
525 }
526 #endif
527
528 void
TestStringPieceU8()529 StringTest::TestStringPieceU8() {
530 // ICU-20984 "mitigate some C++20 char8_t breakages"
531 // For the following APIs there are overloads for both
532 // const char * and const char8_t *.
533 // A u8"string literal" has one type or the other
534 // depending on C++ version and compiler settings.
535 StringPiece abc(u8"abc");
536 assertEquals("abc.length", 3, abc.length());
537 assertEquals("abc", "\x61\x62\x63", abc.data());
538
539 StringPiece abc3(u8"abcdef", 3);
540 assertEquals("abc3.length", 3, abc3.length());
541 assertEquals("abc3[0]", 0x61, abc3.data()[0]);
542 assertEquals("abc3[1]", 0x62, abc3.data()[1]);
543 assertEquals("abc3[2]", 0x63, abc3.data()[2]);
544
545 StringPiece uvw("q");
546 uvw.set(u8"uvw");
547 assertEquals("uvw.length", 3, uvw.length());
548 assertEquals("uvw", "\x75\x76\x77", uvw.data());
549
550 StringPiece xyz("r");
551 xyz.set(u8"xyzXYZ", 3);
552 assertEquals("xyz.length", 3, xyz.length());
553 assertEquals("xyz[0]", 0x78, xyz.data()[0]);
554 assertEquals("xyz[1]", 0x79, xyz.data()[1]);
555 assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
556
557 StringPiece null(nullptr);
558 assertTrue("null is empty", null.empty());
559 assertTrue("null is null", null.data() == nullptr);
560
561 #ifdef __cpp_lib_char8_t
562 std::u8string_view u8sv(u8"sv"); // C++20
563 StringPiece u8svsp(u8sv);
564 assertEquals("u8svsp.length", 2, u8svsp.length());
565 assertEquals("u8svsp", "\x73\x76", u8svsp.data());
566
567 std::u8string u8str(u8"str"); // C++20
568 StringPiece u8strsp(u8str);
569 assertEquals("u8strsp.length", 3, u8strsp.length());
570 assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
571 #endif // __cpp_lib_char8_t
572 }
573
574 // Verify that ByteSink is subclassable and Flush() overridable.
575 class SimpleByteSink : public ByteSink {
576 public:
SimpleByteSink(char * outbuf)577 SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)578 virtual void Append(const char *bytes, int32_t n) override {
579 if(fOutbuf != bytes) {
580 memcpy(fOutbuf, bytes, n);
581 }
582 fOutbuf += n;
583 fLength += n;
584 }
Flush()585 virtual void Flush() override { Append("z", 1); }
length()586 int32_t length() { return fLength; }
587 private:
588 char *fOutbuf;
589 int32_t fLength;
590 };
591
592 // Test the ByteSink base class.
593 void
TestByteSink()594 StringTest::TestByteSink() {
595 char buffer[20];
596 buffer[4] = '!';
597 SimpleByteSink sink(buffer);
598 sink.Append("abc", 3);
599 sink.Flush();
600 if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
601 errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
602 return;
603 }
604 char scratch[20];
605 int32_t capacity = -1;
606 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
607 if(dest != NULL || capacity != 0) {
608 errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
609 return;
610 }
611 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
612 if(dest != NULL || capacity != 0) {
613 errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
614 return;
615 }
616 dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
617 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
618 errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
619 }
620 }
621
622 void
TestCheckedArrayByteSink()623 StringTest::TestCheckedArrayByteSink() {
624 char buffer[20]; // < 26 for the test code to work
625 buffer[3] = '!';
626 CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
627 sink.Append("abc", 3);
628 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
629 0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
630 !sink.Overflowed()
631 ) {
632 errln("CheckedArrayByteSink did not Append() as expected");
633 return;
634 }
635 char scratch[10];
636 int32_t capacity = -1;
637 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
638 if(dest != NULL || capacity != 0) {
639 errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
640 return;
641 }
642 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
643 if(dest != NULL || capacity != 0) {
644 errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
645 return;
646 }
647 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
648 if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
649 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
650 return;
651 }
652 memcpy(dest, "defghijklm", 10);
653 sink.Append(dest, 10);
654 if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
655 0 == memcmp("abcdefghijklm", buffer, 13) &&
656 !sink.Overflowed())
657 ) {
658 errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
659 return;
660 }
661 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
662 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
663 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
664 }
665 memcpy(dest, "nopqrstuvw", 10);
666 sink.Append(dest, 10);
667 if(!(sink.NumberOfBytesAppended() == 23 &&
668 sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
669 0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
670 sink.Overflowed())
671 ) {
672 errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
673 return;
674 }
675 sink.Reset().Append("123", 3);
676 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
677 0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
678 !sink.Overflowed())
679 ) {
680 errln("CheckedArrayByteSink did not Reset().Append() as expected");
681 return;
682 }
683 }
684
685 void
TestStringByteSink()686 StringTest::TestStringByteSink() {
687 // Not much to test because only the constructors and Append()
688 // are implemented, and trivially so.
689 std::string result("abc"); // std::string
690 StringByteSink<std::string> sink(&result);
691 sink.Append("def", 3);
692 if(result != "abcdef") {
693 errln("StringByteSink did not Append() as expected");
694 }
695 StringByteSink<std::string> sink2(&result, 20);
696 if(result.capacity() < (result.length() + 20)) {
697 errln("StringByteSink should have 20 append capacity, has only %d",
698 (int)(result.capacity() - result.length()));
699 }
700 sink.Append("ghi", 3);
701 if(result != "abcdefghi") {
702 errln("StringByteSink did not Append() as expected");
703 }
704 }
705
706 void
TestStringByteSinkAppendU8()707 StringTest::TestStringByteSinkAppendU8() {
708 // ICU-20984 "mitigate some C++20 char8_t breakages"
709 // For the following APIs there are overloads for both
710 // const char * and const char8_t *.
711 // A u8"string literal" has one type or the other
712 // depending on C++ version and compiler settings.
713 std::string result("abc");
714 StringByteSink<std::string> sink(&result);
715 sink.AppendU8("def", 3);
716 sink.AppendU8(u8"ghijkl", 4);
717 assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
718 }
719
720 #if defined(_MSC_VER)
721 #include <vector>
722 #endif
723
724 void
TestSTLCompatibility()725 StringTest::TestSTLCompatibility() {
726 #if defined(_MSC_VER)
727 /* Just make sure that it compiles with STL's placement new usage. */
728 std::vector<UnicodeString> myvect;
729 myvect.push_back(UnicodeString("blah"));
730 #endif
731 }
732
733 void
TestCharString()734 StringTest::TestCharString() {
735 IcuTestErrorCode errorCode(*this, "TestCharString()");
736 char expected[400];
737 static const char longStr[] =
738 "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
739 CharString chStr(longStr, errorCode);
740 if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
741 errln("CharString(longStr) failed.");
742 }
743 CharString test("Test", errorCode);
744 CharString copy(test,errorCode);
745 copy.copyFrom(chStr, errorCode);
746 if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
747 errln("CharString.copyFrom() failed.");
748 }
749 StringPiece sp(chStr.toStringPiece());
750 sp.remove_prefix(4);
751 chStr.append(sp, errorCode).append(chStr, errorCode);
752 strcpy(expected, longStr);
753 strcat(expected, longStr+4);
754 strcat(expected, longStr);
755 strcat(expected, longStr+4);
756 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
757 errln("CharString(longStr).append(substring of self).append(self) failed.");
758 }
759 chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
760 if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
761 errln("CharString.clear().append(abc).append(defghij, 3) failed.");
762 }
763 chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
764 "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
765 errorCode);
766 strcpy(expected, "abcdef");
767 strcat(expected, longStr);
768 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
769 errln("CharString.appendInvariantChars(longStr) failed.");
770 }
771 int32_t appendCapacity = 0;
772 char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
773 if (errorCode.isFailure()) {
774 return;
775 }
776 memcpy(buffer, "*****", 5);
777 chStr.append(buffer, 5, errorCode);
778 chStr.truncate(chStr.length()-3);
779 strcat(expected, "**");
780 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
781 errln("CharString.getAppendBuffer().append(**) failed.");
782 }
783
784 UErrorCode ec = U_ZERO_ERROR;
785 chStr.clear();
786 chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
787 if (ec != U_INVARIANT_CONVERSION_ERROR) {
788 errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
789 }
790 if (chStr.length() != 0) {
791 errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
792 }
793
794 {
795 CharString s1("Short string", errorCode);
796 CharString s2(std::move(s1));
797 assertEquals("s2 should have content of s1", "Short string", s2.data());
798 CharString s3("Dummy", errorCode);
799 s3 = std::move(s2);
800 assertEquals("s3 should have content of s2", "Short string", s3.data());
801 }
802
803 {
804 CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
805 CharString s2(std::move(s1));
806 assertEquals("s2 should have content of s1",
807 "Long string over 40 characters to trigger heap allocation",
808 s2.data());
809 CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
810 s3 = std::move(s2);
811 assertEquals("s3 should have content of s2",
812 "Long string over 40 characters to trigger heap allocation",
813 s3.data());
814 }
815
816 {
817 // extract()
818 errorCode.reset();
819 CharString s("abc", errorCode);
820 char buffer[10];
821
822 s.extract(buffer, 10, errorCode);
823 assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
824 assertEquals("abc.extract(10) output", "abc", buffer);
825
826 strcpy(buffer, "012345");
827 s.extract(buffer, 3, errorCode);
828 assertEquals("abc.extract(3) not terminated",
829 U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
830 assertEquals("abc.extract(3) output", "abc345", buffer);
831
832 strcpy(buffer, "012345");
833 s.extract(buffer, 2, errorCode);
834 assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
835 }
836 }
837
838 void
TestCStr()839 StringTest::TestCStr() {
840 const char *cs = "This is a test string.";
841 UnicodeString us(cs);
842 if (0 != strcmp(CStr(us)(), cs)) {
843 errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
844 }
845 }
846
TestCharStrAppendNumber()847 void StringTest::TestCharStrAppendNumber() {
848 IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
849
850 CharString testString;
851 testString.appendNumber(1, errorCode);
852 assertEquals("TestAppendNumber 1", "1", testString.data());
853
854 testString.clear();
855 testString.appendNumber(-1, errorCode);
856 assertEquals("TestAppendNumber -1", "-1", testString.data());
857
858 testString.clear();
859 testString.appendNumber(12345, errorCode);
860 assertEquals("TestAppendNumber 12345", "12345", testString.data());
861 testString.appendNumber(123, errorCode);
862 assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
863
864 testString.clear();
865 testString.appendNumber(std::numeric_limits<int32_t>::max(), errorCode);
866 assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
867
868 testString.clear();
869 testString.appendNumber(std::numeric_limits<int32_t>::min(), errorCode);
870 assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
871
872 testString.clear();
873 testString.appendNumber(0, errorCode);
874 assertEquals("TestAppendNumber when appending zero", "0", testString.data());
875 }
876
877 void
Testctou()878 StringTest::Testctou() {
879 const char *cs = "Fa\\u0127mu";
880 UnicodeString u = ctou(cs);
881 assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
882 assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
883 }
884