• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*   file name:  strtest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifdef U_HAVE_STRING_VIEW
18 #include <string_view>
19 #endif
20 
21 #include <cstddef>
22 #include <string.h>
23 #include <limits>
24 
25 #include "unicode/utypes.h"
26 #include "unicode/putil.h"
27 #include "unicode/std_string.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/unistr.h"
30 #include "unicode/ustring.h"
31 #include "unicode/utf_old.h"    // for UTF8_COUNT_TRAIL_BYTES
32 #include "unicode/utf8.h"
33 #include "charstr.h"
34 #include "cstr.h"
35 #include "intltest.h"
36 #include "strtest.h"
37 #include "uinvchar.h"
38 
~StringTest()39 StringTest::~StringTest() {}
40 
TestEndian(void)41 void StringTest::TestEndian(void) {
42     union {
43         uint8_t byte;
44         uint16_t word;
45     } u;
46     u.word=0x0100;
47     if(U_IS_BIG_ENDIAN!=u.byte) {
48         errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
49     }
50 }
51 
TestSizeofTypes(void)52 void StringTest::TestSizeofTypes(void) {
53     if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
54         errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
55     }
56 #ifdef U_INT64_T_UNAVAILABLE
57     errln("int64_t and uint64_t are undefined.");
58 #else
59     if(8!=sizeof(int64_t)) {
60         errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
61     }
62     if(8!=sizeof(uint64_t)) {
63         errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
64     }
65 #endif
66     if(8!=sizeof(double)) {
67         errln("8!=sizeof(double) - putil.c code may not work");
68     }
69     if(4!=sizeof(int32_t)) {
70         errln("4!=sizeof(int32_t)");
71     }
72     if(4!=sizeof(uint32_t)) {
73         errln("4!=sizeof(uint32_t)");
74     }
75     if(2!=sizeof(int16_t)) {
76         errln("2!=sizeof(int16_t)");
77     }
78     if(2!=sizeof(uint16_t)) {
79         errln("2!=sizeof(uint16_t)");
80     }
81     if(2!=sizeof(UChar)) {
82         errln("2!=sizeof(UChar)");
83     }
84     if(1!=sizeof(int8_t)) {
85         errln("1!=sizeof(int8_t)");
86     }
87     if(1!=sizeof(uint8_t)) {
88         errln("1!=sizeof(uint8_t)");
89     }
90     if(1!=sizeof(UBool)) {
91         errln("1!=sizeof(UBool)");
92     }
93 }
94 
TestCharsetFamily(void)95 void StringTest::TestCharsetFamily(void) {
96     unsigned char c='A';
97     if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
98         (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
99     ) {
100         errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
101     }
102 }
103 
104 U_STRING_DECL(ustringVar, "aZ0 -", 5);
105 
106 void
Test_U_STRING()107 StringTest::Test_U_STRING() {
108     U_STRING_INIT(ustringVar, "aZ0 -", 5);
109     if( u_strlen(ustringVar)!=5 ||
110         ustringVar[0]!=0x61 ||
111         ustringVar[1]!=0x5a ||
112         ustringVar[2]!=0x30 ||
113         ustringVar[3]!=0x20 ||
114         ustringVar[4]!=0x2d ||
115         ustringVar[5]!=0
116     ) {
117         errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
118               "See putil.h and utypes.h with platform.h.");
119     }
120 }
121 
122 void
Test_UNICODE_STRING()123 StringTest::Test_UNICODE_STRING() {
124     UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
125     if( ustringVar.length()!=5 ||
126         ustringVar[0]!=0x61 ||
127         ustringVar[1]!=0x5a ||
128         ustringVar[2]!=0x30 ||
129         ustringVar[3]!=0x20 ||
130         ustringVar[4]!=0x2d
131     ) {
132         errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
133               "See unistr.h and utypes.h with platform.h.");
134     }
135 }
136 
137 void
Test_UNICODE_STRING_SIMPLE()138 StringTest::Test_UNICODE_STRING_SIMPLE() {
139     UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
140     if( ustringVar.length()!=5 ||
141         ustringVar[0]!=0x61 ||
142         ustringVar[1]!=0x5a ||
143         ustringVar[2]!=0x30 ||
144         ustringVar[3]!=0x20 ||
145         ustringVar[4]!=0x2d
146     ) {
147         errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
148               "See unistr.h and utypes.h with platform.h.");
149     }
150 }
151 
152 namespace {
153 
154 // See U_CHARSET_FAMILY in unicode/platform.h.
155 const char *nativeInvChars =
156     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
157     "abcdefghijklmnopqrstuvwxyz"
158     "0123456789 \"%&'()*+,-./:;<=>?_";
159 const char16_t *asciiInvChars =
160     u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
161     u"abcdefghijklmnopqrstuvwxyz"
162     u"0123456789 \"%&'()*+,-./:;<=>?_";
163 
164 }  // namespace
165 
166 void
TestUpperOrdinal()167 StringTest::TestUpperOrdinal() {
168     for (int32_t i = 0;; ++i) {
169         char ic = nativeInvChars[i];
170         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
171         int32_t expected = ac - 'A';
172         int32_t actual = uprv_upperOrdinal(ic);
173         if (0 <= expected && expected <= 25) {
174             if (actual != expected) {
175                 errln("uprv_upperOrdinal('%c')=%d != expected %d",
176                       ic, (int)actual, (int)expected);
177             }
178         } else {
179             if (0 <= actual && actual <= 25) {
180                 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
181                       ic, (int)actual);
182             }
183         }
184         if (ic == 0) { break; }
185     }
186 }
187 
188 void
TestLowerOrdinal()189 StringTest::TestLowerOrdinal() {
190     for (int32_t i = 0;; ++i) {
191         char ic = nativeInvChars[i];
192         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
193         int32_t expected = ac - 'a';
194         int32_t actual = uprv_lowerOrdinal(ic);
195         if (0 <= expected && expected <= 25) {
196             if (actual != expected) {
197                 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
198                       ic, (int)actual, (int)expected);
199             }
200         } else {
201             if (0 <= actual && actual <= 25) {
202                 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
203                       ic, (int)actual);
204             }
205         }
206         if (ic == 0) { break; }
207     }
208 }
209 
210 void
Test_UTF8_COUNT_TRAIL_BYTES()211 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
212 #if !U_HIDE_OBSOLETE_UTF_OLD_H
213     if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
214             || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
215             || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
216             || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
217         errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
218     }
219 #endif
220     // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
221     //       have completely different implementations.
222     if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
223             || U8_COUNT_TRAIL_BYTES(0xC2) != 1
224             || U8_COUNT_TRAIL_BYTES(0xE0) != 2
225             || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
226         errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
227     }
228 }
229 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)230 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
231     if(exec) {
232         logln("TestSuite Character and String Test: ");
233     }
234     TESTCASE_AUTO_BEGIN;
235     TESTCASE_AUTO(TestEndian);
236     TESTCASE_AUTO(TestSizeofTypes);
237     TESTCASE_AUTO(TestCharsetFamily);
238     TESTCASE_AUTO(Test_U_STRING);
239     TESTCASE_AUTO(Test_UNICODE_STRING);
240     TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
241     TESTCASE_AUTO(TestUpperOrdinal);
242     TESTCASE_AUTO(TestLowerOrdinal);
243     TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
244     TESTCASE_AUTO(TestSTLCompatibility);
245     TESTCASE_AUTO(TestStringPiece);
246     TESTCASE_AUTO(TestStringPieceComparisons);
247     TESTCASE_AUTO(TestStringPieceFind);
248     TESTCASE_AUTO(TestStringPieceOther);
249 #ifdef U_HAVE_STRING_VIEW
250     TESTCASE_AUTO(TestStringPieceStringView);
251 #endif
252     TESTCASE_AUTO(TestStringPieceU8);
253     TESTCASE_AUTO(TestByteSink);
254     TESTCASE_AUTO(TestCheckedArrayByteSink);
255     TESTCASE_AUTO(TestStringByteSink);
256     TESTCASE_AUTO(TestStringByteSinkAppendU8);
257     TESTCASE_AUTO(TestCharString);
258     TESTCASE_AUTO(TestCStr);
259     TESTCASE_AUTO(TestCharStrAppendNumber);
260     TESTCASE_AUTO(Testctou);
261     TESTCASE_AUTO_END;
262 }
263 
264 void
TestStringPiece()265 StringTest::TestStringPiece() {
266     // Default constructor.
267     StringPiece empty;
268     if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) {
269         errln("StringPiece() failed");
270     }
271     // Construct from NULL const char * pointer.
272     StringPiece null((const char *)nullptr);
273     if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
274         errln("StringPiece(NULL) failed");
275     }
276     // Construct from const char * pointer.
277     static const char *abc_chars="abc";
278     StringPiece abc(abc_chars);
279     if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
280         errln("StringPiece(abc_chars) failed");
281     }
282     // Construct from const char * pointer and length.
283     static const char *abcdefg_chars="abcdefg";
284     StringPiece abcd(abcdefg_chars, 4);
285     if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
286         errln("StringPiece(abcdefg_chars, 4) failed");
287     }
288     // Construct from std::string.
289     std::string uvwxyz_string("uvwxyz");
290     StringPiece uvwxyz(uvwxyz_string);
291     if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
292         errln("StringPiece(uvwxyz_string) failed");
293     }
294     // Substring constructor with pos.
295     StringPiece sp(abcd, -1);
296     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
297         errln("StringPiece(abcd, -1) failed");
298     }
299     sp=StringPiece(abcd, 5);
300     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
301         errln("StringPiece(abcd, 5) failed");
302     }
303     sp=StringPiece(abcd, 2);
304     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
305         errln("StringPiece(abcd, -1) failed");
306     }
307     // Substring constructor with pos and len.
308     sp=StringPiece(abcd, -1, 8);
309     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
310         errln("StringPiece(abcd, -1, 8) failed");
311     }
312     sp=StringPiece(abcd, 5, 8);
313     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
314         errln("StringPiece(abcd, 5, 8) failed");
315     }
316     sp=StringPiece(abcd, 2, 8);
317     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
318         errln("StringPiece(abcd, -1) failed");
319     }
320     sp=StringPiece(abcd, 2, -1);
321     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
322         errln("StringPiece(abcd, 5, -1) failed");
323     }
324     // static const npos
325     const int32_t *ptr_npos=&StringPiece::npos;
326     if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
327         errln("StringPiece::npos!=0x7fffffff");
328     }
329     // substr() method with pos, using len=npos.
330     sp=abcd.substr(-1);
331     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
332         errln("abcd.substr(-1) failed");
333     }
334     sp=abcd.substr(5);
335     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
336         errln("abcd.substr(5) failed");
337     }
338     sp=abcd.substr(2);
339     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
340         errln("abcd.substr(-1) failed");
341     }
342     // substr() method with pos and len.
343     sp=abcd.substr(-1, 8);
344     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
345         errln("abcd.substr(-1, 8) failed");
346     }
347     sp=abcd.substr(5, 8);
348     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
349         errln("abcd.substr(5, 8) failed");
350     }
351     sp=abcd.substr(2, 8);
352     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
353         errln("abcd.substr(-1) failed");
354     }
355     sp=abcd.substr(2, -1);
356     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
357         errln("abcd.substr(5, -1) failed");
358     }
359     // clear()
360     sp=abcd;
361     sp.clear();
362     if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) {
363         errln("abcd.clear() failed");
364     }
365     // remove_prefix()
366     sp=abcd;
367     sp.remove_prefix(-1);
368     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
369         errln("abcd.remove_prefix(-1) failed");
370     }
371     sp=abcd;
372     sp.remove_prefix(2);
373     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
374         errln("abcd.remove_prefix(2) failed");
375     }
376     sp=abcd;
377     sp.remove_prefix(5);
378     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
379         errln("abcd.remove_prefix(5) failed");
380     }
381     // remove_suffix()
382     sp=abcd;
383     sp.remove_suffix(-1);
384     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
385         errln("abcd.remove_suffix(-1) failed");
386     }
387     sp=abcd;
388     sp.remove_suffix(2);
389     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
390         errln("abcd.remove_suffix(2) failed");
391     }
392     sp=abcd;
393     sp.remove_suffix(5);
394     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
395         errln("abcd.remove_suffix(5) failed");
396     }
397 }
398 
399 void
TestStringPieceComparisons()400 StringTest::TestStringPieceComparisons() {
401     StringPiece empty;
402     StringPiece null(nullptr);
403     StringPiece abc("abc");
404     StringPiece abcd("abcdefg", 4);
405     StringPiece abx("abx");
406     if(empty!=null) {
407         errln("empty!=null");
408     }
409     if(empty==abc) {
410         errln("empty==abc");
411     }
412     if(abc==abcd) {
413         errln("abc==abcd");
414     }
415 
416     assertTrue("null<abc", null.compare(abc) < 0);
417     assertTrue("abc>null", abc.compare(null) > 0);
418     assertTrue("abc<abcd", abc.compare(abcd) < 0);
419     assertTrue("abcd>abc", abcd.compare(abc) > 0);
420     assertTrue("abc<abx", abc.compare(abx) < 0);
421     assertTrue("abx>abc", abx.compare(abc) > 0);
422     assertTrue("abx>abcd", abx.compare(abcd) > 0);
423     assertTrue("abcd<abx", abcd.compare(abx) < 0);
424     assertTrue("abx==abx", abx.compare(abx) == 0);
425 
426     // Behavior should be the same as std::string::compare
427     {
428         std::string null("");
429         std::string abc("abc");
430         std::string abcd("abcdefg", 4);
431         std::string abx("abx");
432 
433         assertTrue("std: null<abc", null.compare(abc) < 0);
434         assertTrue("std: abc>null", abc.compare(null) > 0);
435         assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
436         assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
437         assertTrue("std: abc<abx", abc.compare(abx) < 0);
438         assertTrue("std: abx>abc", abx.compare(abc) > 0);
439         assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
440         assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
441         assertTrue("std: abx==abx", abx.compare(abx) == 0);
442     }
443 
444     abcd.remove_suffix(1);
445     if(abc!=abcd) {
446         errln("abc!=abcd.remove_suffix(1)");
447     }
448     if(abc==abx) {
449         errln("abc==abx");
450     }
451 }
452 
453 void
TestStringPieceFind()454 StringTest::TestStringPieceFind() {
455     struct TestCase {
456         const char* haystack;
457         const char* needle;
458         int32_t expected;
459     } cases[] = {
460         { "", "", 0 },
461         { "", "x", -1 },
462         { "x", "", 0 },
463         { "x", "x", 0 },
464         { "xy", "x", 0 },
465         { "xy", "y", 1 },
466         { "xy", "xy", 0 },
467         { "xy", "xyz", -1 },
468         { "qwerty", "qqw", -1 },
469         { "qwerty", "qw", 0 },
470         { "qwerty", "er", 2 },
471         { "qwerty", "err", -1 },
472         { "qwerty", "ert", 2 },
473         { "qwerty", "ty", 4 },
474         { "qwerty", "tyy", -1 },
475         { "qwerty", "a", -1 },
476         { "qwerty", "abc", -1 }
477     };
478     int32_t caseNumber = 0;
479     for (auto& cas : cases) {
480         StringPiece haystack(cas.haystack);
481         StringPiece needle(cas.needle);
482         assertEquals(Int64ToUnicodeString(caseNumber),
483             cas.expected, haystack.find(needle, 0));
484         // Should be same as std::string::find
485         std::string stdhaystack(cas.haystack);
486         std::string stdneedle(cas.needle);
487         assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
488             cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
489         // Test offsets against std::string::find
490         for (int32_t offset = 0; offset < haystack.length(); offset++) {
491             assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
492                 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
493         }
494         caseNumber++;
495     }
496 }
497 
498 void
TestStringPieceOther()499 StringTest::TestStringPieceOther() {
500     static constexpr char msg[] = "Kapow!";
501 
502     // Another string piece implementation.
503     struct Other {
504         const char* data() { return msg; }
505         size_t size() { return sizeof msg - 1; }
506     };
507 
508     Other other;
509     StringPiece piece(other);
510 
511     assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
512     assertEquals("data()", piece.data(), other.data());
513 }
514 
515 #ifdef U_HAVE_STRING_VIEW
516 void
TestStringPieceStringView()517 StringTest::TestStringPieceStringView() {
518     static constexpr char msg[] = "Kapow!";
519 
520     std::string_view view(msg);  // C++17
521     StringPiece piece(view);
522 
523     assertEquals("size()", piece.size(), view.size());
524     assertEquals("data()", piece.data(), view.data());
525 }
526 #endif
527 
528 void
TestStringPieceU8()529 StringTest::TestStringPieceU8() {
530     // ICU-20984 "mitigate some C++20 char8_t breakages"
531     // For the following APIs there are overloads for both
532     // const char * and const char8_t *.
533     // A u8"string literal" has one type or the other
534     // depending on C++ version and compiler settings.
535     StringPiece abc(u8"abc");
536     assertEquals("abc.length", 3, abc.length());
537     assertEquals("abc", "\x61\x62\x63", abc.data());
538 
539     StringPiece abc3(u8"abcdef", 3);
540     assertEquals("abc3.length", 3, abc3.length());
541     assertEquals("abc3[0]", 0x61, abc3.data()[0]);
542     assertEquals("abc3[1]", 0x62, abc3.data()[1]);
543     assertEquals("abc3[2]", 0x63, abc3.data()[2]);
544 
545     StringPiece uvw("q");
546     uvw.set(u8"uvw");
547     assertEquals("uvw.length", 3, uvw.length());
548     assertEquals("uvw", "\x75\x76\x77", uvw.data());
549 
550     StringPiece xyz("r");
551     xyz.set(u8"xyzXYZ", 3);
552     assertEquals("xyz.length", 3, xyz.length());
553     assertEquals("xyz[0]", 0x78, xyz.data()[0]);
554     assertEquals("xyz[1]", 0x79, xyz.data()[1]);
555     assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
556 
557     StringPiece null(nullptr);
558     assertTrue("null is empty", null.empty());
559     assertTrue("null is null", null.data() == nullptr);
560 
561 #ifdef __cpp_lib_char8_t
562     std::u8string_view u8sv(u8"sv");  // C++20
563     StringPiece u8svsp(u8sv);
564     assertEquals("u8svsp.length", 2, u8svsp.length());
565     assertEquals("u8svsp", "\x73\x76", u8svsp.data());
566 
567     std::u8string u8str(u8"str");  // C++20
568     StringPiece u8strsp(u8str);
569     assertEquals("u8strsp.length", 3, u8strsp.length());
570     assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
571 #endif  // __cpp_lib_char8_t
572 }
573 
574 // Verify that ByteSink is subclassable and Flush() overridable.
575 class SimpleByteSink : public ByteSink {
576 public:
SimpleByteSink(char * outbuf)577     SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)578     virtual void Append(const char *bytes, int32_t n) override {
579         if(fOutbuf != bytes) {
580             memcpy(fOutbuf, bytes, n);
581         }
582         fOutbuf += n;
583         fLength += n;
584     }
Flush()585     virtual void Flush() override { Append("z", 1); }
length()586     int32_t length() { return fLength; }
587 private:
588     char *fOutbuf;
589     int32_t fLength;
590 };
591 
592 // Test the ByteSink base class.
593 void
TestByteSink()594 StringTest::TestByteSink() {
595     char buffer[20];
596     buffer[4] = '!';
597     SimpleByteSink sink(buffer);
598     sink.Append("abc", 3);
599     sink.Flush();
600     if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
601         errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
602         return;
603     }
604     char scratch[20];
605     int32_t capacity = -1;
606     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
607     if(dest != NULL || capacity != 0) {
608         errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
609         return;
610     }
611     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
612     if(dest != NULL || capacity != 0) {
613         errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
614         return;
615     }
616     dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
617     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
618         errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
619     }
620 }
621 
622 void
TestCheckedArrayByteSink()623 StringTest::TestCheckedArrayByteSink() {
624     char buffer[20];  // < 26 for the test code to work
625     buffer[3] = '!';
626     CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
627     sink.Append("abc", 3);
628     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
629          0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
630          !sink.Overflowed()
631     ) {
632         errln("CheckedArrayByteSink did not Append() as expected");
633         return;
634     }
635     char scratch[10];
636     int32_t capacity = -1;
637     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
638     if(dest != NULL || capacity != 0) {
639         errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
640         return;
641     }
642     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
643     if(dest != NULL || capacity != 0) {
644         errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
645         return;
646     }
647     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
648     if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
649         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
650         return;
651     }
652     memcpy(dest, "defghijklm", 10);
653     sink.Append(dest, 10);
654     if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
655          0 == memcmp("abcdefghijklm", buffer, 13) &&
656          !sink.Overflowed())
657     ) {
658         errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
659         return;
660     }
661     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
662     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
663         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
664     }
665     memcpy(dest, "nopqrstuvw", 10);
666     sink.Append(dest, 10);
667     if(!(sink.NumberOfBytesAppended() == 23 &&
668          sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
669          0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
670          sink.Overflowed())
671     ) {
672         errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
673         return;
674     }
675     sink.Reset().Append("123", 3);
676     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
677          0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
678          !sink.Overflowed())
679     ) {
680         errln("CheckedArrayByteSink did not Reset().Append() as expected");
681         return;
682     }
683 }
684 
685 void
TestStringByteSink()686 StringTest::TestStringByteSink() {
687     // Not much to test because only the constructors and Append()
688     // are implemented, and trivially so.
689     std::string result("abc");  // std::string
690     StringByteSink<std::string> sink(&result);
691     sink.Append("def", 3);
692     if(result != "abcdef") {
693         errln("StringByteSink did not Append() as expected");
694     }
695     StringByteSink<std::string> sink2(&result, 20);
696     if(result.capacity() < (result.length() + 20)) {
697         errln("StringByteSink should have 20 append capacity, has only %d",
698               (int)(result.capacity() - result.length()));
699     }
700     sink.Append("ghi", 3);
701     if(result != "abcdefghi") {
702         errln("StringByteSink did not Append() as expected");
703     }
704 }
705 
706 void
TestStringByteSinkAppendU8()707 StringTest::TestStringByteSinkAppendU8() {
708     // ICU-20984 "mitigate some C++20 char8_t breakages"
709     // For the following APIs there are overloads for both
710     // const char * and const char8_t *.
711     // A u8"string literal" has one type or the other
712     // depending on C++ version and compiler settings.
713     std::string result("abc");
714     StringByteSink<std::string> sink(&result);
715     sink.AppendU8("def", 3);
716     sink.AppendU8(u8"ghijkl", 4);
717     assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
718 }
719 
720 #if defined(_MSC_VER)
721 #include <vector>
722 #endif
723 
724 void
TestSTLCompatibility()725 StringTest::TestSTLCompatibility() {
726 #if defined(_MSC_VER)
727     /* Just make sure that it compiles with STL's placement new usage. */
728     std::vector<UnicodeString> myvect;
729     myvect.push_back(UnicodeString("blah"));
730 #endif
731 }
732 
733 void
TestCharString()734 StringTest::TestCharString() {
735     IcuTestErrorCode errorCode(*this, "TestCharString()");
736     char expected[400];
737     static const char longStr[] =
738         "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
739     CharString chStr(longStr, errorCode);
740     if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
741         errln("CharString(longStr) failed.");
742     }
743     CharString test("Test", errorCode);
744     CharString copy(test,errorCode);
745     copy.copyFrom(chStr, errorCode);
746     if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
747         errln("CharString.copyFrom() failed.");
748     }
749     StringPiece sp(chStr.toStringPiece());
750     sp.remove_prefix(4);
751     chStr.append(sp, errorCode).append(chStr, errorCode);
752     strcpy(expected, longStr);
753     strcat(expected, longStr+4);
754     strcat(expected, longStr);
755     strcat(expected, longStr+4);
756     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
757         errln("CharString(longStr).append(substring of self).append(self) failed.");
758     }
759     chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
760     if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
761         errln("CharString.clear().append(abc).append(defghij, 3) failed.");
762     }
763     chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
764         "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
765         errorCode);
766     strcpy(expected, "abcdef");
767     strcat(expected, longStr);
768     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
769         errln("CharString.appendInvariantChars(longStr) failed.");
770     }
771     int32_t appendCapacity = 0;
772     char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
773     if (errorCode.isFailure()) {
774         return;
775     }
776     memcpy(buffer, "*****", 5);
777     chStr.append(buffer, 5, errorCode);
778     chStr.truncate(chStr.length()-3);
779     strcat(expected, "**");
780     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
781         errln("CharString.getAppendBuffer().append(**) failed.");
782     }
783 
784     UErrorCode ec = U_ZERO_ERROR;
785     chStr.clear();
786     chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
787     if (ec != U_INVARIANT_CONVERSION_ERROR) {
788         errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
789     }
790     if (chStr.length() != 0) {
791         errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
792     }
793 
794     {
795         CharString s1("Short string", errorCode);
796         CharString s2(std::move(s1));
797         assertEquals("s2 should have content of s1", "Short string", s2.data());
798         CharString s3("Dummy", errorCode);
799         s3 = std::move(s2);
800         assertEquals("s3 should have content of s2", "Short string", s3.data());
801     }
802 
803     {
804         CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
805         CharString s2(std::move(s1));
806         assertEquals("s2 should have content of s1",
807                 "Long string over 40 characters to trigger heap allocation",
808                 s2.data());
809         CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
810         s3 = std::move(s2);
811         assertEquals("s3 should have content of s2",
812                 "Long string over 40 characters to trigger heap allocation",
813                 s3.data());
814     }
815 
816     {
817         // extract()
818         errorCode.reset();
819         CharString s("abc", errorCode);
820         char buffer[10];
821 
822         s.extract(buffer, 10, errorCode);
823         assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
824         assertEquals("abc.extract(10) output", "abc", buffer);
825 
826         strcpy(buffer, "012345");
827         s.extract(buffer, 3, errorCode);
828         assertEquals("abc.extract(3) not terminated",
829                      U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
830         assertEquals("abc.extract(3) output", "abc345", buffer);
831 
832         strcpy(buffer, "012345");
833         s.extract(buffer, 2, errorCode);
834         assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
835     }
836 }
837 
838 void
TestCStr()839 StringTest::TestCStr() {
840     const char *cs = "This is a test string.";
841     UnicodeString us(cs);
842     if (0 != strcmp(CStr(us)(), cs)) {
843         errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
844     }
845 }
846 
TestCharStrAppendNumber()847 void StringTest::TestCharStrAppendNumber() {
848     IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
849 
850     CharString testString;
851     testString.appendNumber(1, errorCode);
852     assertEquals("TestAppendNumber 1", "1", testString.data());
853 
854     testString.clear();
855     testString.appendNumber(-1, errorCode);
856     assertEquals("TestAppendNumber -1", "-1", testString.data());
857 
858     testString.clear();
859     testString.appendNumber(12345, errorCode);
860     assertEquals("TestAppendNumber 12345", "12345", testString.data());
861     testString.appendNumber(123, errorCode);
862     assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
863 
864     testString.clear();
865     testString.appendNumber(std::numeric_limits<int32_t>::max(), errorCode);
866     assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
867 
868     testString.clear();
869     testString.appendNumber(std::numeric_limits<int32_t>::min(), errorCode);
870     assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
871 
872     testString.clear();
873     testString.appendNumber(0, errorCode);
874     assertEquals("TestAppendNumber when appending zero", "0", testString.data());
875 }
876 
877 void
Testctou()878 StringTest::Testctou() {
879   const char *cs = "Fa\\u0127mu";
880   UnicodeString u = ctou(cs);
881   assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
882   assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
883 }
884