1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /* file name: strtest.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 1999nov22
14 * created by: Markus W. Scherer
15 */
16
17 #include <string_view>
18 #include <cstddef>
19 #include <string.h>
20 #include <limits>
21
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
24 #include "unicode/std_string.h"
25 #include "unicode/stringpiece.h"
26 #include "unicode/unistr.h"
27 #include "unicode/ustring.h"
28 #include "unicode/utf_old.h" // for UTF8_COUNT_TRAIL_BYTES
29 #include "unicode/utf8.h"
30 #include "charstr.h"
31 #include "cstr.h"
32 #include "intltest.h"
33 #include "strtest.h"
34 #include "uinvchar.h"
35
~StringTest()36 StringTest::~StringTest() {}
37
TestEndian()38 void StringTest::TestEndian() {
39 union {
40 uint8_t byte;
41 uint16_t word;
42 } u;
43 u.word=0x0100;
44 if(U_IS_BIG_ENDIAN!=u.byte) {
45 errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
46 }
47 }
48
TestSizeofTypes()49 void StringTest::TestSizeofTypes() {
50 if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
51 errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
52 }
53 #ifdef U_INT64_T_UNAVAILABLE
54 errln("int64_t and uint64_t are undefined.");
55 #else
56 if(8!=sizeof(int64_t)) {
57 errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
58 }
59 if(8!=sizeof(uint64_t)) {
60 errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
61 }
62 #endif
63 if(8!=sizeof(double)) {
64 errln("8!=sizeof(double) - putil.c code may not work");
65 }
66 if(4!=sizeof(int32_t)) {
67 errln("4!=sizeof(int32_t)");
68 }
69 if(4!=sizeof(uint32_t)) {
70 errln("4!=sizeof(uint32_t)");
71 }
72 if(2!=sizeof(int16_t)) {
73 errln("2!=sizeof(int16_t)");
74 }
75 if(2!=sizeof(uint16_t)) {
76 errln("2!=sizeof(uint16_t)");
77 }
78 if(2!=sizeof(char16_t)) {
79 errln("2!=sizeof(char16_t)");
80 }
81 if(1!=sizeof(int8_t)) {
82 errln("1!=sizeof(int8_t)");
83 }
84 if(1!=sizeof(uint8_t)) {
85 errln("1!=sizeof(uint8_t)");
86 }
87 if(1!=sizeof(UBool)) {
88 errln("1!=sizeof(UBool)");
89 }
90 }
91
TestCharsetFamily()92 void StringTest::TestCharsetFamily() {
93 unsigned char c='A';
94 if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
95 (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
96 ) {
97 errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
98 }
99 }
100
101 U_STRING_DECL(ustringVar, "aZ0 -", 5);
102
103 void
Test_U_STRING()104 StringTest::Test_U_STRING() {
105 U_STRING_INIT(ustringVar, "aZ0 -", 5);
106 if( u_strlen(ustringVar)!=5 ||
107 ustringVar[0]!=0x61 ||
108 ustringVar[1]!=0x5a ||
109 ustringVar[2]!=0x30 ||
110 ustringVar[3]!=0x20 ||
111 ustringVar[4]!=0x2d ||
112 ustringVar[5]!=0
113 ) {
114 errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
115 "See putil.h and utypes.h with platform.h.");
116 }
117 }
118
119 void
Test_UNICODE_STRING()120 StringTest::Test_UNICODE_STRING() {
121 UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
122 if( ustringVar.length()!=5 ||
123 ustringVar[0]!=0x61 ||
124 ustringVar[1]!=0x5a ||
125 ustringVar[2]!=0x30 ||
126 ustringVar[3]!=0x20 ||
127 ustringVar[4]!=0x2d
128 ) {
129 errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
130 "See unistr.h and utypes.h with platform.h.");
131 }
132 }
133
134 void
Test_UNICODE_STRING_SIMPLE()135 StringTest::Test_UNICODE_STRING_SIMPLE() {
136 UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
137 if( ustringVar.length()!=5 ||
138 ustringVar[0]!=0x61 ||
139 ustringVar[1]!=0x5a ||
140 ustringVar[2]!=0x30 ||
141 ustringVar[3]!=0x20 ||
142 ustringVar[4]!=0x2d
143 ) {
144 errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
145 "See unistr.h and utypes.h with platform.h.");
146 }
147 }
148
149 namespace {
150
151 // See U_CHARSET_FAMILY in unicode/platform.h.
152 const char *nativeInvChars =
153 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
154 "abcdefghijklmnopqrstuvwxyz"
155 "0123456789 \"%&'()*+,-./:;<=>?_";
156 const char16_t *asciiInvChars =
157 u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
158 u"abcdefghijklmnopqrstuvwxyz"
159 u"0123456789 \"%&'()*+,-./:;<=>?_";
160
161 } // namespace
162
163 void
TestUpperOrdinal()164 StringTest::TestUpperOrdinal() {
165 for (int32_t i = 0;; ++i) {
166 char ic = nativeInvChars[i];
167 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
168 int32_t expected = ac - 'A';
169 int32_t actual = uprv_upperOrdinal(ic);
170 if (0 <= expected && expected <= 25) {
171 if (actual != expected) {
172 errln("uprv_upperOrdinal('%c')=%d != expected %d",
173 ic, (int)actual, (int)expected);
174 }
175 } else {
176 if (0 <= actual && actual <= 25) {
177 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
178 ic, (int)actual);
179 }
180 }
181 if (ic == 0) { break; }
182 }
183 }
184
185 void
TestLowerOrdinal()186 StringTest::TestLowerOrdinal() {
187 for (int32_t i = 0;; ++i) {
188 char ic = nativeInvChars[i];
189 uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
190 int32_t expected = ac - 'a';
191 int32_t actual = uprv_lowerOrdinal(ic);
192 if (0 <= expected && expected <= 25) {
193 if (actual != expected) {
194 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
195 ic, (int)actual, (int)expected);
196 }
197 } else {
198 if (0 <= actual && actual <= 25) {
199 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
200 ic, (int)actual);
201 }
202 }
203 if (ic == 0) { break; }
204 }
205 }
206
207 void
Test_UTF8_COUNT_TRAIL_BYTES()208 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
209 #if !U_HIDE_OBSOLETE_UTF_OLD_H
210 if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
211 || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
212 || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
213 || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
214 errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
215 }
216 #endif
217 // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
218 // have completely different implementations.
219 if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
220 || U8_COUNT_TRAIL_BYTES(0xC2) != 1
221 || U8_COUNT_TRAIL_BYTES(0xE0) != 2
222 || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
223 errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
224 }
225 }
226
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)227 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
228 if(exec) {
229 logln("TestSuite Character and String Test: ");
230 }
231 TESTCASE_AUTO_BEGIN;
232 TESTCASE_AUTO(TestEndian);
233 TESTCASE_AUTO(TestSizeofTypes);
234 TESTCASE_AUTO(TestCharsetFamily);
235 TESTCASE_AUTO(Test_U_STRING);
236 TESTCASE_AUTO(Test_UNICODE_STRING);
237 TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
238 TESTCASE_AUTO(TestUpperOrdinal);
239 TESTCASE_AUTO(TestLowerOrdinal);
240 TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
241 TESTCASE_AUTO(TestSTLCompatibility);
242 TESTCASE_AUTO(TestStringPiece);
243 TESTCASE_AUTO(TestStringPieceComparisons);
244 TESTCASE_AUTO(TestStringPieceFind);
245 TESTCASE_AUTO(TestStringPieceOther);
246 TESTCASE_AUTO(TestStringPieceStringView);
247 TESTCASE_AUTO(TestStringPieceU8);
248 TESTCASE_AUTO(TestByteSink);
249 TESTCASE_AUTO(TestCheckedArrayByteSink);
250 TESTCASE_AUTO(TestStringByteSink);
251 TESTCASE_AUTO(TestStringByteSinkAppendU8);
252 TESTCASE_AUTO(TestCharString);
253 TESTCASE_AUTO(TestCStr);
254 TESTCASE_AUTO(TestCharStrAppendNumber);
255 TESTCASE_AUTO(Testctou);
256 TESTCASE_AUTO_END;
257 }
258
259 void
TestStringPiece()260 StringTest::TestStringPiece() {
261 // Default constructor.
262 StringPiece empty;
263 if(!empty.empty() || empty.data()!=nullptr || empty.length()!=0 || empty.size()!=0) {
264 errln("StringPiece() failed");
265 }
266 // Construct from nullptr const char * pointer.
267 StringPiece null((const char *)nullptr);
268 if(!null.empty() || null.data()!=nullptr || null.length()!=0 || null.size()!=0) {
269 errln("StringPiece(nullptr) failed");
270 }
271 // Construct from const char * pointer.
272 static const char *abc_chars="abc";
273 StringPiece abc(abc_chars);
274 if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
275 errln("StringPiece(abc_chars) failed");
276 }
277 // Construct from const char * pointer and length.
278 static const char *abcdefg_chars="abcdefg";
279 StringPiece abcd(abcdefg_chars, 4);
280 if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
281 errln("StringPiece(abcdefg_chars, 4) failed");
282 }
283 // Construct from std::string.
284 std::string uvwxyz_string("uvwxyz");
285 StringPiece uvwxyz(uvwxyz_string);
286 if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
287 errln("StringPiece(uvwxyz_string) failed");
288 }
289 // Substring constructor with pos.
290 StringPiece sp(abcd, -1);
291 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
292 errln("StringPiece(abcd, -1) failed");
293 }
294 sp=StringPiece(abcd, 5);
295 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
296 errln("StringPiece(abcd, 5) failed");
297 }
298 sp=StringPiece(abcd, 2);
299 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
300 errln("StringPiece(abcd, -1) failed");
301 }
302 // Substring constructor with pos and len.
303 sp=StringPiece(abcd, -1, 8);
304 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
305 errln("StringPiece(abcd, -1, 8) failed");
306 }
307 sp=StringPiece(abcd, 5, 8);
308 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
309 errln("StringPiece(abcd, 5, 8) failed");
310 }
311 sp=StringPiece(abcd, 2, 8);
312 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
313 errln("StringPiece(abcd, -1) failed");
314 }
315 sp=StringPiece(abcd, 2, -1);
316 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
317 errln("StringPiece(abcd, 5, -1) failed");
318 }
319 // static const npos
320 const int32_t *ptr_npos=&StringPiece::npos;
321 if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
322 errln("StringPiece::npos!=0x7fffffff");
323 }
324 // substr() method with pos, using len=npos.
325 sp=abcd.substr(-1);
326 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
327 errln("abcd.substr(-1) failed");
328 }
329 sp=abcd.substr(5);
330 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
331 errln("abcd.substr(5) failed");
332 }
333 sp=abcd.substr(2);
334 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
335 errln("abcd.substr(-1) failed");
336 }
337 // substr() method with pos and len.
338 sp=abcd.substr(-1, 8);
339 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
340 errln("abcd.substr(-1, 8) failed");
341 }
342 sp=abcd.substr(5, 8);
343 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
344 errln("abcd.substr(5, 8) failed");
345 }
346 sp=abcd.substr(2, 8);
347 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
348 errln("abcd.substr(-1) failed");
349 }
350 sp=abcd.substr(2, -1);
351 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
352 errln("abcd.substr(5, -1) failed");
353 }
354 // clear()
355 sp=abcd;
356 sp.clear();
357 if(!sp.empty() || sp.data()!=nullptr || sp.length()!=0 || sp.size()!=0) {
358 errln("abcd.clear() failed");
359 }
360 // remove_prefix()
361 sp=abcd;
362 sp.remove_prefix(-1);
363 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
364 errln("abcd.remove_prefix(-1) failed");
365 }
366 sp=abcd;
367 sp.remove_prefix(2);
368 if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
369 errln("abcd.remove_prefix(2) failed");
370 }
371 sp=abcd;
372 sp.remove_prefix(5);
373 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
374 errln("abcd.remove_prefix(5) failed");
375 }
376 // remove_suffix()
377 sp=abcd;
378 sp.remove_suffix(-1);
379 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
380 errln("abcd.remove_suffix(-1) failed");
381 }
382 sp=abcd;
383 sp.remove_suffix(2);
384 if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
385 errln("abcd.remove_suffix(2) failed");
386 }
387 sp=abcd;
388 sp.remove_suffix(5);
389 if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
390 errln("abcd.remove_suffix(5) failed");
391 }
392 }
393
394 void
TestStringPieceComparisons()395 StringTest::TestStringPieceComparisons() {
396 StringPiece empty;
397 StringPiece null(nullptr);
398 StringPiece abc("abc");
399 StringPiece abcd("abcdefg", 4);
400 StringPiece abx("abx");
401 if(empty!=null) {
402 errln("empty!=null");
403 }
404 if(empty==abc) {
405 errln("empty==abc");
406 }
407 if(abc==abcd) {
408 errln("abc==abcd");
409 }
410
411 assertTrue("null<abc", null.compare(abc) < 0);
412 assertTrue("abc>null", abc.compare(null) > 0);
413 assertTrue("abc<abcd", abc.compare(abcd) < 0);
414 assertTrue("abcd>abc", abcd.compare(abc) > 0);
415 assertTrue("abc<abx", abc.compare(abx) < 0);
416 assertTrue("abx>abc", abx.compare(abc) > 0);
417 assertTrue("abx>abcd", abx.compare(abcd) > 0);
418 assertTrue("abcd<abx", abcd.compare(abx) < 0);
419 assertTrue("abx==abx", abx.compare(abx) == 0);
420
421 // Behavior should be the same as std::string::compare
422 {
423 std::string null("");
424 std::string abc("abc");
425 std::string abcd("abcdefg", 4);
426 std::string abx("abx");
427
428 assertTrue("std: null<abc", null.compare(abc) < 0);
429 assertTrue("std: abc>null", abc.compare(null) > 0);
430 assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
431 assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
432 assertTrue("std: abc<abx", abc.compare(abx) < 0);
433 assertTrue("std: abx>abc", abx.compare(abc) > 0);
434 assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
435 assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
436 assertTrue("std: abx==abx", abx.compare(abx) == 0);
437 }
438
439 abcd.remove_suffix(1);
440 if(abc!=abcd) {
441 errln("abc!=abcd.remove_suffix(1)");
442 }
443 if(abc==abx) {
444 errln("abc==abx");
445 }
446 }
447
448 void
TestStringPieceFind()449 StringTest::TestStringPieceFind() {
450 struct TestCase {
451 const char* haystack;
452 const char* needle;
453 int32_t expected;
454 } cases[] = {
455 { "", "", 0 },
456 { "", "x", -1 },
457 { "x", "", 0 },
458 { "x", "x", 0 },
459 { "xy", "x", 0 },
460 { "xy", "y", 1 },
461 { "xy", "xy", 0 },
462 { "xy", "xyz", -1 },
463 { "qwerty", "qqw", -1 },
464 { "qwerty", "qw", 0 },
465 { "qwerty", "er", 2 },
466 { "qwerty", "err", -1 },
467 { "qwerty", "ert", 2 },
468 { "qwerty", "ty", 4 },
469 { "qwerty", "tyy", -1 },
470 { "qwerty", "a", -1 },
471 { "qwerty", "abc", -1 }
472 };
473 int32_t caseNumber = 0;
474 for (auto& cas : cases) {
475 StringPiece haystack(cas.haystack);
476 StringPiece needle(cas.needle);
477 assertEquals(Int64ToUnicodeString(caseNumber),
478 cas.expected, haystack.find(needle, 0));
479 // Should be same as std::string::find
480 std::string stdhaystack(cas.haystack);
481 std::string stdneedle(cas.needle);
482 assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
483 cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
484 // Test offsets against std::string::find
485 for (int32_t offset = 0; offset < haystack.length(); offset++) {
486 assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
487 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
488 }
489 caseNumber++;
490 }
491 }
492
493 void
TestStringPieceOther()494 StringTest::TestStringPieceOther() {
495 static constexpr char msg[] = "Kapow!";
496
497 // Another string piece implementation.
498 struct Other {
499 const char* data() { return msg; }
500 size_t size() { return sizeof msg - 1; }
501 };
502
503 Other other;
504 StringPiece piece(other);
505
506 assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
507 assertEquals("data()", piece.data(), other.data());
508 }
509
510 void
TestStringPieceStringView()511 StringTest::TestStringPieceStringView() {
512 static constexpr char msg[] = "Kapow!";
513
514 std::string_view view(msg); // C++17
515 StringPiece piece(view);
516
517 assertEquals("size()", piece.size(), view.size());
518 assertEquals("data()", piece.data(), view.data());
519 }
520
521 void
TestStringPieceU8()522 StringTest::TestStringPieceU8() {
523 // ICU-20984 "mitigate some C++20 char8_t breakages"
524 // For the following APIs there are overloads for both
525 // const char * and const char8_t *.
526 // A u8"string literal" has one type or the other
527 // depending on C++ version and compiler settings.
528 StringPiece abc(u8"abc");
529 assertEquals("abc.length", 3, abc.length());
530 assertEquals("abc", "\x61\x62\x63", abc.data());
531
532 StringPiece abc3(u8"abcdef", 3);
533 assertEquals("abc3.length", 3, abc3.length());
534 assertEquals("abc3[0]", 0x61, abc3.data()[0]);
535 assertEquals("abc3[1]", 0x62, abc3.data()[1]);
536 assertEquals("abc3[2]", 0x63, abc3.data()[2]);
537
538 StringPiece uvw("q");
539 uvw.set(u8"uvw");
540 assertEquals("uvw.length", 3, uvw.length());
541 assertEquals("uvw", "\x75\x76\x77", uvw.data());
542
543 StringPiece xyz("r");
544 xyz.set(u8"xyzXYZ", 3);
545 assertEquals("xyz.length", 3, xyz.length());
546 assertEquals("xyz[0]", 0x78, xyz.data()[0]);
547 assertEquals("xyz[1]", 0x79, xyz.data()[1]);
548 assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
549
550 StringPiece null(nullptr);
551 assertTrue("null is empty", null.empty());
552 assertTrue("null is null", null.data() == nullptr);
553
554 #ifdef __cpp_lib_char8_t
555 std::u8string_view u8sv(u8"sv"); // C++20
556 StringPiece u8svsp(u8sv);
557 assertEquals("u8svsp.length", 2, u8svsp.length());
558 assertEquals("u8svsp", "\x73\x76", u8svsp.data());
559
560 std::u8string u8str(u8"str"); // C++20
561 StringPiece u8strsp(u8str);
562 assertEquals("u8strsp.length", 3, u8strsp.length());
563 assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
564 #endif // __cpp_lib_char8_t
565 }
566
567 // Verify that ByteSink is subclassable and Flush() overridable.
568 class SimpleByteSink : public ByteSink {
569 public:
SimpleByteSink(char * outbuf)570 SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)571 virtual void Append(const char *bytes, int32_t n) override {
572 if(fOutbuf != bytes) {
573 memcpy(fOutbuf, bytes, n);
574 }
575 fOutbuf += n;
576 fLength += n;
577 }
Flush()578 virtual void Flush() override { Append("z", 1); }
length()579 int32_t length() { return fLength; }
580 private:
581 char *fOutbuf;
582 int32_t fLength;
583 };
584
585 // Test the ByteSink base class.
586 void
TestByteSink()587 StringTest::TestByteSink() {
588 char buffer[20];
589 buffer[4] = '!';
590 SimpleByteSink sink(buffer);
591 sink.Append("abc", 3);
592 sink.Flush();
593 if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
594 errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
595 return;
596 }
597 char scratch[20];
598 int32_t capacity = -1;
599 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
600 if(dest != nullptr || capacity != 0) {
601 errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return nullptr[0]");
602 return;
603 }
604 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
605 if(dest != nullptr || capacity != 0) {
606 errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return nullptr[0]");
607 return;
608 }
609 dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
610 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
611 errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
612 }
613 }
614
615 void
TestCheckedArrayByteSink()616 StringTest::TestCheckedArrayByteSink() {
617 char buffer[20]; // < 26 for the test code to work
618 buffer[3] = '!';
619 CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
620 sink.Append("abc", 3);
621 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
622 0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
623 !sink.Overflowed()
624 ) {
625 errln("CheckedArrayByteSink did not Append() as expected");
626 return;
627 }
628 char scratch[10];
629 int32_t capacity = -1;
630 char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
631 if(dest != nullptr || capacity != 0) {
632 errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return nullptr[0]");
633 return;
634 }
635 dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
636 if(dest != nullptr || capacity != 0) {
637 errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return nullptr[0]");
638 return;
639 }
640 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
641 if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
642 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
643 return;
644 }
645 memcpy(dest, "defghijklm", 10);
646 sink.Append(dest, 10);
647 if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
648 0 == memcmp("abcdefghijklm", buffer, 13) &&
649 !sink.Overflowed())
650 ) {
651 errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
652 return;
653 }
654 dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
655 if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
656 errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
657 }
658 memcpy(dest, "nopqrstuvw", 10);
659 sink.Append(dest, 10);
660 if(!(sink.NumberOfBytesAppended() == 23 &&
661 sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
662 0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
663 sink.Overflowed())
664 ) {
665 errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
666 return;
667 }
668 sink.Reset().Append("123", 3);
669 if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
670 0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
671 !sink.Overflowed())
672 ) {
673 errln("CheckedArrayByteSink did not Reset().Append() as expected");
674 return;
675 }
676 }
677
678 void
TestStringByteSink()679 StringTest::TestStringByteSink() {
680 // Not much to test because only the constructors and Append()
681 // are implemented, and trivially so.
682 std::string result("abc"); // std::string
683 StringByteSink<std::string> sink(&result);
684 sink.Append("def", 3);
685 if(result != "abcdef") {
686 errln("StringByteSink did not Append() as expected");
687 }
688 StringByteSink<std::string> sink2(&result, 20);
689 if(result.capacity() < (result.length() + 20)) {
690 errln("StringByteSink should have 20 append capacity, has only %d",
691 (int)(result.capacity() - result.length()));
692 }
693 sink.Append("ghi", 3);
694 if(result != "abcdefghi") {
695 errln("StringByteSink did not Append() as expected");
696 }
697 }
698
699 void
TestStringByteSinkAppendU8()700 StringTest::TestStringByteSinkAppendU8() {
701 // ICU-20984 "mitigate some C++20 char8_t breakages"
702 // For the following APIs there are overloads for both
703 // const char * and const char8_t *.
704 // A u8"string literal" has one type or the other
705 // depending on C++ version and compiler settings.
706 std::string result("abc");
707 StringByteSink<std::string> sink(&result);
708 sink.AppendU8("def", 3);
709 sink.AppendU8(u8"ghijkl", 4);
710 assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
711 }
712
713 #if defined(_MSC_VER)
714 #include <vector>
715 #endif
716
717 void
TestSTLCompatibility()718 StringTest::TestSTLCompatibility() {
719 #if defined(_MSC_VER)
720 /* Just make sure that it compiles with STL's placement new usage. */
721 std::vector<UnicodeString> myvect;
722 myvect.push_back(UnicodeString("blah"));
723 #endif
724 }
725
726 void
TestCharString()727 StringTest::TestCharString() {
728 IcuTestErrorCode errorCode(*this, "TestCharString()");
729 char expected[400];
730 static const char longStr[] =
731 "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
732 CharString chStr(longStr, errorCode);
733 if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
734 errln("CharString(longStr) failed.");
735 }
736 CharString test("Test", errorCode);
737 CharString copy(test,errorCode);
738 copy.copyFrom(chStr, errorCode);
739 if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
740 errln("CharString.copyFrom() failed.");
741 }
742 StringPiece sp(chStr.toStringPiece());
743 sp.remove_prefix(4);
744 chStr.append(sp, errorCode).append(chStr, errorCode);
745 strcpy(expected, longStr);
746 strcat(expected, longStr+4);
747 strcat(expected, longStr);
748 strcat(expected, longStr+4);
749 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
750 errln("CharString(longStr).append(substring of self).append(self) failed.");
751 }
752 chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
753 if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
754 errln("CharString.clear().append(abc).append(defghij, 3) failed.");
755 }
756 chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
757 "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
758 errorCode);
759 strcpy(expected, "abcdef");
760 strcat(expected, longStr);
761 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
762 errln("CharString.appendInvariantChars(longStr) failed.");
763 }
764 int32_t appendCapacity = 0;
765 char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
766 if (errorCode.isFailure()) {
767 return;
768 }
769 memcpy(buffer, "*****", 5);
770 chStr.append(buffer, 5, errorCode);
771 chStr.truncate(chStr.length()-3);
772 strcat(expected, "**");
773 if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
774 errln("CharString.getAppendBuffer().append(**) failed.");
775 }
776
777 UErrorCode ec = U_ZERO_ERROR;
778 chStr.clear();
779 chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
780 if (ec != U_INVARIANT_CONVERSION_ERROR) {
781 errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
782 }
783 if (chStr.length() != 0) {
784 errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
785 }
786
787 {
788 CharString s1("Short string", errorCode);
789 CharString s2(std::move(s1));
790 assertEquals("s2 should have content of s1", "Short string", s2.data());
791 CharString s3("Dummy", errorCode);
792 s3 = std::move(s2);
793 assertEquals("s3 should have content of s2", "Short string", s3.data());
794 }
795
796 {
797 CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
798 CharString s2(std::move(s1));
799 assertEquals("s2 should have content of s1",
800 "Long string over 40 characters to trigger heap allocation",
801 s2.data());
802 CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
803 s3 = std::move(s2);
804 assertEquals("s3 should have content of s2",
805 "Long string over 40 characters to trigger heap allocation",
806 s3.data());
807 }
808
809 {
810 // extract()
811 errorCode.reset();
812 CharString s("abc", errorCode);
813 char buffer[10];
814
815 s.extract(buffer, 10, errorCode);
816 assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
817 assertEquals("abc.extract(10) output", "abc", buffer);
818
819 strcpy(buffer, "012345");
820 s.extract(buffer, 3, errorCode);
821 assertEquals("abc.extract(3) not terminated",
822 U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
823 assertEquals("abc.extract(3) output", "abc345", buffer);
824
825 strcpy(buffer, "012345");
826 s.extract(buffer, 2, errorCode);
827 assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
828 }
829 }
830
831 void
TestCStr()832 StringTest::TestCStr() {
833 const char *cs = "This is a test string.";
834 UnicodeString us(cs);
835 if (0 != strcmp(CStr(us)(), cs)) {
836 errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
837 }
838 }
839
TestCharStrAppendNumber()840 void StringTest::TestCharStrAppendNumber() {
841 IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
842
843 CharString testString;
844 testString.appendNumber(1, errorCode);
845 assertEquals("TestAppendNumber 1", "1", testString.data());
846
847 testString.clear();
848 testString.appendNumber(-1, errorCode);
849 assertEquals("TestAppendNumber -1", "-1", testString.data());
850
851 testString.clear();
852 testString.appendNumber(12345, errorCode);
853 assertEquals("TestAppendNumber 12345", "12345", testString.data());
854 testString.appendNumber(123, errorCode);
855 assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
856
857 testString.clear();
858 testString.appendNumber(std::numeric_limits<int32_t>::max(), errorCode);
859 assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
860
861 testString.clear();
862 testString.appendNumber(std::numeric_limits<int32_t>::min(), errorCode);
863 assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
864
865 testString.clear();
866 testString.appendNumber(0, errorCode);
867 assertEquals("TestAppendNumber when appending zero", "0", testString.data());
868 }
869
870 void
Testctou()871 StringTest::Testctou() {
872 const char *cs = "Fa\\u0127mu";
873 UnicodeString u = ctou(cs);
874 assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
875 assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
876 }
877