• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*   file name:  strtest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #ifdef U_HAVE_STRING_VIEW
18 #include <string_view>
19 #endif
20 
21 #include <cstddef>
22 #include <string.h>
23 
24 #include "unicode/utypes.h"
25 #include "unicode/putil.h"
26 #include "unicode/std_string.h"
27 #include "unicode/stringpiece.h"
28 #include "unicode/unistr.h"
29 #include "unicode/ustring.h"
30 #include "unicode/utf_old.h"    // for UTF8_COUNT_TRAIL_BYTES
31 #include "unicode/utf8.h"
32 #include "charstr.h"
33 #include "cstr.h"
34 #include "intltest.h"
35 #include "strtest.h"
36 #include "uinvchar.h"
37 
~StringTest()38 StringTest::~StringTest() {}
39 
TestEndian(void)40 void StringTest::TestEndian(void) {
41     union {
42         uint8_t byte;
43         uint16_t word;
44     } u;
45     u.word=0x0100;
46     if(U_IS_BIG_ENDIAN!=u.byte) {
47         errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
48     }
49 }
50 
TestSizeofTypes(void)51 void StringTest::TestSizeofTypes(void) {
52     if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
53         errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
54     }
55 #ifdef U_INT64_T_UNAVAILABLE
56     errln("int64_t and uint64_t are undefined.");
57 #else
58     if(8!=sizeof(int64_t)) {
59         errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
60     }
61     if(8!=sizeof(uint64_t)) {
62         errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
63     }
64 #endif
65     if(8!=sizeof(double)) {
66         errln("8!=sizeof(double) - putil.c code may not work");
67     }
68     if(4!=sizeof(int32_t)) {
69         errln("4!=sizeof(int32_t)");
70     }
71     if(4!=sizeof(uint32_t)) {
72         errln("4!=sizeof(uint32_t)");
73     }
74     if(2!=sizeof(int16_t)) {
75         errln("2!=sizeof(int16_t)");
76     }
77     if(2!=sizeof(uint16_t)) {
78         errln("2!=sizeof(uint16_t)");
79     }
80     if(2!=sizeof(UChar)) {
81         errln("2!=sizeof(UChar)");
82     }
83     if(1!=sizeof(int8_t)) {
84         errln("1!=sizeof(int8_t)");
85     }
86     if(1!=sizeof(uint8_t)) {
87         errln("1!=sizeof(uint8_t)");
88     }
89     if(1!=sizeof(UBool)) {
90         errln("1!=sizeof(UBool)");
91     }
92 }
93 
TestCharsetFamily(void)94 void StringTest::TestCharsetFamily(void) {
95     unsigned char c='A';
96     if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
97         (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
98     ) {
99         errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
100     }
101 }
102 
103 U_STRING_DECL(ustringVar, "aZ0 -", 5);
104 
105 void
Test_U_STRING()106 StringTest::Test_U_STRING() {
107     U_STRING_INIT(ustringVar, "aZ0 -", 5);
108     if( u_strlen(ustringVar)!=5 ||
109         ustringVar[0]!=0x61 ||
110         ustringVar[1]!=0x5a ||
111         ustringVar[2]!=0x30 ||
112         ustringVar[3]!=0x20 ||
113         ustringVar[4]!=0x2d ||
114         ustringVar[5]!=0
115     ) {
116         errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
117               "See putil.h and utypes.h with platform.h.");
118     }
119 }
120 
121 void
Test_UNICODE_STRING()122 StringTest::Test_UNICODE_STRING() {
123     UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
124     if( ustringVar.length()!=5 ||
125         ustringVar[0]!=0x61 ||
126         ustringVar[1]!=0x5a ||
127         ustringVar[2]!=0x30 ||
128         ustringVar[3]!=0x20 ||
129         ustringVar[4]!=0x2d
130     ) {
131         errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
132               "See unistr.h and utypes.h with platform.h.");
133     }
134 }
135 
136 void
Test_UNICODE_STRING_SIMPLE()137 StringTest::Test_UNICODE_STRING_SIMPLE() {
138     UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
139     if( ustringVar.length()!=5 ||
140         ustringVar[0]!=0x61 ||
141         ustringVar[1]!=0x5a ||
142         ustringVar[2]!=0x30 ||
143         ustringVar[3]!=0x20 ||
144         ustringVar[4]!=0x2d
145     ) {
146         errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
147               "See unistr.h and utypes.h with platform.h.");
148     }
149 }
150 
151 namespace {
152 
153 // See U_CHARSET_FAMILY in unicode/platform.h.
154 const char *nativeInvChars =
155     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
156     "abcdefghijklmnopqrstuvwxyz"
157     "0123456789 \"%&'()*+,-./:;<=>?_";
158 const char16_t *asciiInvChars =
159     u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
160     u"abcdefghijklmnopqrstuvwxyz"
161     u"0123456789 \"%&'()*+,-./:;<=>?_";
162 
163 }  // namespace
164 
165 void
TestUpperOrdinal()166 StringTest::TestUpperOrdinal() {
167     for (int32_t i = 0;; ++i) {
168         char ic = nativeInvChars[i];
169         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
170         int32_t expected = ac - 'A';
171         int32_t actual = uprv_upperOrdinal(ic);
172         if (0 <= expected && expected <= 25) {
173             if (actual != expected) {
174                 errln("uprv_upperOrdinal('%c')=%d != expected %d",
175                       ic, (int)actual, (int)expected);
176             }
177         } else {
178             if (0 <= actual && actual <= 25) {
179                 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
180                       ic, (int)actual);
181             }
182         }
183         if (ic == 0) { break; }
184     }
185 }
186 
187 void
TestLowerOrdinal()188 StringTest::TestLowerOrdinal() {
189     for (int32_t i = 0;; ++i) {
190         char ic = nativeInvChars[i];
191         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
192         int32_t expected = ac - 'a';
193         int32_t actual = uprv_lowerOrdinal(ic);
194         if (0 <= expected && expected <= 25) {
195             if (actual != expected) {
196                 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
197                       ic, (int)actual, (int)expected);
198             }
199         } else {
200             if (0 <= actual && actual <= 25) {
201                 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
202                       ic, (int)actual);
203             }
204         }
205         if (ic == 0) { break; }
206     }
207 }
208 
209 void
Test_UTF8_COUNT_TRAIL_BYTES()210 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
211 #if !U_HIDE_OBSOLETE_UTF_OLD_H
212     if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
213             || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
214             || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
215             || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
216         errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
217     }
218 #endif
219     // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
220     //       have completely different implementations.
221     if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
222             || U8_COUNT_TRAIL_BYTES(0xC2) != 1
223             || U8_COUNT_TRAIL_BYTES(0xE0) != 2
224             || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
225         errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
226     }
227 }
228 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)229 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
230     if(exec) {
231         logln("TestSuite Character and String Test: ");
232     }
233     TESTCASE_AUTO_BEGIN;
234     TESTCASE_AUTO(TestEndian);
235     TESTCASE_AUTO(TestSizeofTypes);
236     TESTCASE_AUTO(TestCharsetFamily);
237     TESTCASE_AUTO(Test_U_STRING);
238     TESTCASE_AUTO(Test_UNICODE_STRING);
239     TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
240     TESTCASE_AUTO(TestUpperOrdinal);
241     TESTCASE_AUTO(TestLowerOrdinal);
242     TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
243     TESTCASE_AUTO(TestSTLCompatibility);
244     TESTCASE_AUTO(TestStringPiece);
245     TESTCASE_AUTO(TestStringPieceComparisons);
246     TESTCASE_AUTO(TestStringPieceFind);
247     TESTCASE_AUTO(TestStringPieceOther);
248 #ifdef U_HAVE_STRING_VIEW
249     TESTCASE_AUTO(TestStringPieceStringView);
250 #endif
251     TESTCASE_AUTO(TestStringPieceU8);
252     TESTCASE_AUTO(TestByteSink);
253     TESTCASE_AUTO(TestCheckedArrayByteSink);
254     TESTCASE_AUTO(TestStringByteSink);
255     TESTCASE_AUTO(TestStringByteSinkAppendU8);
256     TESTCASE_AUTO(TestCharString);
257     TESTCASE_AUTO(TestCStr);
258     TESTCASE_AUTO(TestCharStrAppendNumber);
259     TESTCASE_AUTO(Testctou);
260     TESTCASE_AUTO_END;
261 }
262 
263 void
TestStringPiece()264 StringTest::TestStringPiece() {
265     // Default constructor.
266     StringPiece empty;
267     if(!empty.empty() || empty.data()!=NULL || empty.length()!=0 || empty.size()!=0) {
268         errln("StringPiece() failed");
269     }
270     // Construct from NULL const char * pointer.
271     StringPiece null((const char *)nullptr);
272     if(!null.empty() || null.data()!=NULL || null.length()!=0 || null.size()!=0) {
273         errln("StringPiece(NULL) failed");
274     }
275     // Construct from const char * pointer.
276     static const char *abc_chars="abc";
277     StringPiece abc(abc_chars);
278     if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
279         errln("StringPiece(abc_chars) failed");
280     }
281     // Construct from const char * pointer and length.
282     static const char *abcdefg_chars="abcdefg";
283     StringPiece abcd(abcdefg_chars, 4);
284     if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
285         errln("StringPiece(abcdefg_chars, 4) failed");
286     }
287     // Construct from std::string.
288     std::string uvwxyz_string("uvwxyz");
289     StringPiece uvwxyz(uvwxyz_string);
290     if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
291         errln("StringPiece(uvwxyz_string) failed");
292     }
293     // Substring constructor with pos.
294     StringPiece sp(abcd, -1);
295     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
296         errln("StringPiece(abcd, -1) failed");
297     }
298     sp=StringPiece(abcd, 5);
299     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
300         errln("StringPiece(abcd, 5) failed");
301     }
302     sp=StringPiece(abcd, 2);
303     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
304         errln("StringPiece(abcd, -1) failed");
305     }
306     // Substring constructor with pos and len.
307     sp=StringPiece(abcd, -1, 8);
308     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
309         errln("StringPiece(abcd, -1, 8) failed");
310     }
311     sp=StringPiece(abcd, 5, 8);
312     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
313         errln("StringPiece(abcd, 5, 8) failed");
314     }
315     sp=StringPiece(abcd, 2, 8);
316     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
317         errln("StringPiece(abcd, -1) failed");
318     }
319     sp=StringPiece(abcd, 2, -1);
320     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
321         errln("StringPiece(abcd, 5, -1) failed");
322     }
323     // static const npos
324     const int32_t *ptr_npos=&StringPiece::npos;
325     if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
326         errln("StringPiece::npos!=0x7fffffff");
327     }
328     // substr() method with pos, using len=npos.
329     sp=abcd.substr(-1);
330     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
331         errln("abcd.substr(-1) failed");
332     }
333     sp=abcd.substr(5);
334     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
335         errln("abcd.substr(5) failed");
336     }
337     sp=abcd.substr(2);
338     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
339         errln("abcd.substr(-1) failed");
340     }
341     // substr() method with pos and len.
342     sp=abcd.substr(-1, 8);
343     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
344         errln("abcd.substr(-1, 8) failed");
345     }
346     sp=abcd.substr(5, 8);
347     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
348         errln("abcd.substr(5, 8) failed");
349     }
350     sp=abcd.substr(2, 8);
351     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
352         errln("abcd.substr(-1) failed");
353     }
354     sp=abcd.substr(2, -1);
355     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
356         errln("abcd.substr(5, -1) failed");
357     }
358     // clear()
359     sp=abcd;
360     sp.clear();
361     if(!sp.empty() || sp.data()!=NULL || sp.length()!=0 || sp.size()!=0) {
362         errln("abcd.clear() failed");
363     }
364     // remove_prefix()
365     sp=abcd;
366     sp.remove_prefix(-1);
367     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
368         errln("abcd.remove_prefix(-1) failed");
369     }
370     sp=abcd;
371     sp.remove_prefix(2);
372     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
373         errln("abcd.remove_prefix(2) failed");
374     }
375     sp=abcd;
376     sp.remove_prefix(5);
377     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
378         errln("abcd.remove_prefix(5) failed");
379     }
380     // remove_suffix()
381     sp=abcd;
382     sp.remove_suffix(-1);
383     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
384         errln("abcd.remove_suffix(-1) failed");
385     }
386     sp=abcd;
387     sp.remove_suffix(2);
388     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
389         errln("abcd.remove_suffix(2) failed");
390     }
391     sp=abcd;
392     sp.remove_suffix(5);
393     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
394         errln("abcd.remove_suffix(5) failed");
395     }
396 }
397 
398 void
TestStringPieceComparisons()399 StringTest::TestStringPieceComparisons() {
400     StringPiece empty;
401     StringPiece null(nullptr);
402     StringPiece abc("abc");
403     StringPiece abcd("abcdefg", 4);
404     StringPiece abx("abx");
405     if(empty!=null) {
406         errln("empty!=null");
407     }
408     if(empty==abc) {
409         errln("empty==abc");
410     }
411     if(abc==abcd) {
412         errln("abc==abcd");
413     }
414 
415     assertTrue("null<abc", null.compare(abc) < 0);
416     assertTrue("abc>null", abc.compare(null) > 0);
417     assertTrue("abc<abcd", abc.compare(abcd) < 0);
418     assertTrue("abcd>abc", abcd.compare(abc) > 0);
419     assertTrue("abc<abx", abc.compare(abx) < 0);
420     assertTrue("abx>abc", abx.compare(abc) > 0);
421     assertTrue("abx>abcd", abx.compare(abcd) > 0);
422     assertTrue("abcd<abx", abcd.compare(abx) < 0);
423     assertTrue("abx==abx", abx.compare(abx) == 0);
424 
425     // Behavior should be the same as std::string::compare
426     {
427         std::string null("");
428         std::string abc("abc");
429         std::string abcd("abcdefg", 4);
430         std::string abx("abx");
431 
432         assertTrue("std: null<abc", null.compare(abc) < 0);
433         assertTrue("std: abc>null", abc.compare(null) > 0);
434         assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
435         assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
436         assertTrue("std: abc<abx", abc.compare(abx) < 0);
437         assertTrue("std: abx>abc", abx.compare(abc) > 0);
438         assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
439         assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
440         assertTrue("std: abx==abx", abx.compare(abx) == 0);
441     }
442 
443     abcd.remove_suffix(1);
444     if(abc!=abcd) {
445         errln("abc!=abcd.remove_suffix(1)");
446     }
447     if(abc==abx) {
448         errln("abc==abx");
449     }
450 }
451 
452 void
TestStringPieceFind()453 StringTest::TestStringPieceFind() {
454     struct TestCase {
455         const char* haystack;
456         const char* needle;
457         int32_t expected;
458     } cases[] = {
459         { "", "", 0 },
460         { "", "x", -1 },
461         { "x", "", 0 },
462         { "x", "x", 0 },
463         { "xy", "x", 0 },
464         { "xy", "y", 1 },
465         { "xy", "xy", 0 },
466         { "xy", "xyz", -1 },
467         { "qwerty", "qqw", -1 },
468         { "qwerty", "qw", 0 },
469         { "qwerty", "er", 2 },
470         { "qwerty", "err", -1 },
471         { "qwerty", "ert", 2 },
472         { "qwerty", "ty", 4 },
473         { "qwerty", "tyy", -1 },
474         { "qwerty", "a", -1 },
475         { "qwerty", "abc", -1 }
476     };
477     int32_t caseNumber = 0;
478     for (auto& cas : cases) {
479         StringPiece haystack(cas.haystack);
480         StringPiece needle(cas.needle);
481         assertEquals(Int64ToUnicodeString(caseNumber),
482             cas.expected, haystack.find(needle, 0));
483         // Should be same as std::string::find
484         std::string stdhaystack(cas.haystack);
485         std::string stdneedle(cas.needle);
486         assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
487             cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
488         // Test offsets against std::string::find
489         for (int32_t offset = 0; offset < haystack.length(); offset++) {
490             assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
491                 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
492         }
493         caseNumber++;
494     }
495 }
496 
497 void
TestStringPieceOther()498 StringTest::TestStringPieceOther() {
499     static constexpr char msg[] = "Kapow!";
500 
501     // Another string piece implementation.
502     struct Other {
503         const char* data() { return msg; }
504         size_t size() { return sizeof msg - 1; }
505     };
506 
507     Other other;
508     StringPiece piece(other);
509 
510     assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
511     assertEquals("data()", piece.data(), other.data());
512 }
513 
514 #ifdef U_HAVE_STRING_VIEW
515 void
TestStringPieceStringView()516 StringTest::TestStringPieceStringView() {
517     static constexpr char msg[] = "Kapow!";
518 
519     std::string_view view(msg);  // C++17
520     StringPiece piece(view);
521 
522     assertEquals("size()", piece.size(), view.size());
523     assertEquals("data()", piece.data(), view.data());
524 }
525 #endif
526 
527 void
TestStringPieceU8()528 StringTest::TestStringPieceU8() {
529     // ICU-20984 "mitigate some C++20 char8_t breakages"
530     // For the following APIs there are overloads for both
531     // const char * and const char8_t *.
532     // A u8"string literal" has one type or the other
533     // depending on C++ version and compiler settings.
534     StringPiece abc(u8"abc");
535     assertEquals("abc.length", 3, abc.length());
536     assertEquals("abc", "\x61\x62\x63", abc.data());
537 
538     StringPiece abc3(u8"abcdef", 3);
539     assertEquals("abc3.length", 3, abc3.length());
540     assertEquals("abc3[0]", 0x61, abc3.data()[0]);
541     assertEquals("abc3[1]", 0x62, abc3.data()[1]);
542     assertEquals("abc3[2]", 0x63, abc3.data()[2]);
543 
544     StringPiece uvw("q");
545     uvw.set(u8"uvw");
546     assertEquals("uvw.length", 3, uvw.length());
547     assertEquals("uvw", "\x75\x76\x77", uvw.data());
548 
549     StringPiece xyz("r");
550     xyz.set(u8"xyzXYZ", 3);
551     assertEquals("xyz.length", 3, xyz.length());
552     assertEquals("xyz[0]", 0x78, xyz.data()[0]);
553     assertEquals("xyz[1]", 0x79, xyz.data()[1]);
554     assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
555 
556     StringPiece null(nullptr);
557     assertTrue("null is empty", null.empty());
558     assertTrue("null is null", null.data() == nullptr);
559 
560 #ifdef __cpp_lib_char8_t
561     std::u8string_view u8sv(u8"sv");  // C++20
562     StringPiece u8svsp(u8sv);
563     assertEquals("u8svsp.length", 2, u8svsp.length());
564     assertEquals("u8svsp", "\x73\x76", u8svsp.data());
565 
566     std::u8string u8str(u8"str");  // C++20
567     StringPiece u8strsp(u8str);
568     assertEquals("u8strsp.length", 3, u8strsp.length());
569     assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
570 #endif  // __cpp_lib_char8_t
571 }
572 
573 // Verify that ByteSink is subclassable and Flush() overridable.
574 class SimpleByteSink : public ByteSink {
575 public:
SimpleByteSink(char * outbuf)576     SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)577     virtual void Append(const char *bytes, int32_t n) {
578         if(fOutbuf != bytes) {
579             memcpy(fOutbuf, bytes, n);
580         }
581         fOutbuf += n;
582         fLength += n;
583     }
Flush()584     virtual void Flush() { Append("z", 1); }
length()585     int32_t length() { return fLength; }
586 private:
587     char *fOutbuf;
588     int32_t fLength;
589 };
590 
591 // Test the ByteSink base class.
592 void
TestByteSink()593 StringTest::TestByteSink() {
594     char buffer[20];
595     buffer[4] = '!';
596     SimpleByteSink sink(buffer);
597     sink.Append("abc", 3);
598     sink.Flush();
599     if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
600         errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
601         return;
602     }
603     char scratch[20];
604     int32_t capacity = -1;
605     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
606     if(dest != NULL || capacity != 0) {
607         errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
608         return;
609     }
610     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
611     if(dest != NULL || capacity != 0) {
612         errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
613         return;
614     }
615     dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
616     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
617         errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
618     }
619 }
620 
621 void
TestCheckedArrayByteSink()622 StringTest::TestCheckedArrayByteSink() {
623     char buffer[20];  // < 26 for the test code to work
624     buffer[3] = '!';
625     CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
626     sink.Append("abc", 3);
627     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
628          0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
629          !sink.Overflowed()
630     ) {
631         errln("CheckedArrayByteSink did not Append() as expected");
632         return;
633     }
634     char scratch[10];
635     int32_t capacity = -1;
636     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
637     if(dest != NULL || capacity != 0) {
638         errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return NULL[0]");
639         return;
640     }
641     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
642     if(dest != NULL || capacity != 0) {
643         errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return NULL[0]");
644         return;
645     }
646     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
647     if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
648         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
649         return;
650     }
651     memcpy(dest, "defghijklm", 10);
652     sink.Append(dest, 10);
653     if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
654          0 == memcmp("abcdefghijklm", buffer, 13) &&
655          !sink.Overflowed())
656     ) {
657         errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
658         return;
659     }
660     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
661     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
662         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
663     }
664     memcpy(dest, "nopqrstuvw", 10);
665     sink.Append(dest, 10);
666     if(!(sink.NumberOfBytesAppended() == 23 &&
667          sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
668          0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
669          sink.Overflowed())
670     ) {
671         errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
672         return;
673     }
674     sink.Reset().Append("123", 3);
675     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
676          0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
677          !sink.Overflowed())
678     ) {
679         errln("CheckedArrayByteSink did not Reset().Append() as expected");
680         return;
681     }
682 }
683 
684 void
TestStringByteSink()685 StringTest::TestStringByteSink() {
686     // Not much to test because only the constructors and Append()
687     // are implemented, and trivially so.
688     std::string result("abc");  // std::string
689     StringByteSink<std::string> sink(&result);
690     sink.Append("def", 3);
691     if(result != "abcdef") {
692         errln("StringByteSink did not Append() as expected");
693     }
694     StringByteSink<std::string> sink2(&result, 20);
695     if(result.capacity() < (result.length() + 20)) {
696         errln("StringByteSink should have 20 append capacity, has only %d",
697               (int)(result.capacity() - result.length()));
698     }
699     sink.Append("ghi", 3);
700     if(result != "abcdefghi") {
701         errln("StringByteSink did not Append() as expected");
702     }
703 }
704 
705 void
TestStringByteSinkAppendU8()706 StringTest::TestStringByteSinkAppendU8() {
707     // ICU-20984 "mitigate some C++20 char8_t breakages"
708     // For the following APIs there are overloads for both
709     // const char * and const char8_t *.
710     // A u8"string literal" has one type or the other
711     // depending on C++ version and compiler settings.
712     std::string result("abc");
713     StringByteSink<std::string> sink(&result);
714     sink.AppendU8("def", 3);
715     sink.AppendU8(u8"ghijkl", 4);
716     assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
717 }
718 
719 #if defined(_MSC_VER)
720 #include <vector>
721 #endif
722 
723 void
TestSTLCompatibility()724 StringTest::TestSTLCompatibility() {
725 #if defined(_MSC_VER)
726     /* Just make sure that it compiles with STL's placement new usage. */
727     std::vector<UnicodeString> myvect;
728     myvect.push_back(UnicodeString("blah"));
729 #endif
730 }
731 
732 void
TestCharString()733 StringTest::TestCharString() {
734     IcuTestErrorCode errorCode(*this, "TestCharString()");
735     char expected[400];
736     static const char longStr[] =
737         "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
738     CharString chStr(longStr, errorCode);
739     if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
740         errln("CharString(longStr) failed.");
741     }
742     CharString test("Test", errorCode);
743     CharString copy(test,errorCode);
744     copy.copyFrom(chStr, errorCode);
745     if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
746         errln("CharString.copyFrom() failed.");
747     }
748     StringPiece sp(chStr.toStringPiece());
749     sp.remove_prefix(4);
750     chStr.append(sp, errorCode).append(chStr, errorCode);
751     strcpy(expected, longStr);
752     strcat(expected, longStr+4);
753     strcat(expected, longStr);
754     strcat(expected, longStr+4);
755     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
756         errln("CharString(longStr).append(substring of self).append(self) failed.");
757     }
758     chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
759     if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
760         errln("CharString.clear().append(abc).append(defghij, 3) failed.");
761     }
762     chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
763         "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
764         errorCode);
765     strcpy(expected, "abcdef");
766     strcat(expected, longStr);
767     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
768         errln("CharString.appendInvariantChars(longStr) failed.");
769     }
770     int32_t appendCapacity = 0;
771     char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
772     if (errorCode.isFailure()) {
773         return;
774     }
775     memcpy(buffer, "*****", 5);
776     chStr.append(buffer, 5, errorCode);
777     chStr.truncate(chStr.length()-3);
778     strcat(expected, "**");
779     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
780         errln("CharString.getAppendBuffer().append(**) failed.");
781     }
782 
783     UErrorCode ec = U_ZERO_ERROR;
784     chStr.clear();
785     chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
786     if (ec != U_INVARIANT_CONVERSION_ERROR) {
787         errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
788     }
789     if (chStr.length() != 0) {
790         errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
791     }
792 
793     {
794         CharString s1("Short string", errorCode);
795         CharString s2(std::move(s1));
796         assertEquals("s2 should have content of s1", "Short string", s2.data());
797         CharString s3("Dummy", errorCode);
798         s3 = std::move(s2);
799         assertEquals("s3 should have content of s2", "Short string", s3.data());
800     }
801 
802     {
803         CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
804         CharString s2(std::move(s1));
805         assertEquals("s2 should have content of s1",
806                 "Long string over 40 characters to trigger heap allocation",
807                 s2.data());
808         CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
809         s3 = std::move(s2);
810         assertEquals("s3 should have content of s2",
811                 "Long string over 40 characters to trigger heap allocation",
812                 s3.data());
813     }
814 
815     {
816         // extract()
817         errorCode.reset();
818         CharString s("abc", errorCode);
819         char buffer[10];
820 
821         s.extract(buffer, 10, errorCode);
822         assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
823         assertEquals("abc.extract(10) output", "abc", buffer);
824 
825         strcpy(buffer, "012345");
826         s.extract(buffer, 3, errorCode);
827         assertEquals("abc.extract(3) not terminated",
828                      U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
829         assertEquals("abc.extract(3) output", "abc345", buffer);
830 
831         strcpy(buffer, "012345");
832         s.extract(buffer, 2, errorCode);
833         assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
834     }
835 }
836 
837 void
TestCStr()838 StringTest::TestCStr() {
839     const char *cs = "This is a test string.";
840     UnicodeString us(cs);
841     if (0 != strcmp(CStr(us)(), cs)) {
842         errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
843     }
844 }
845 
TestCharStrAppendNumber()846 void StringTest::TestCharStrAppendNumber() {
847     IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
848 
849     CharString testString;
850     testString.appendNumber(1, errorCode);
851     assertEquals("TestAppendNumber 1", "1", testString.data());
852 
853     testString.clear();
854     testString.appendNumber(-1, errorCode);
855     assertEquals("TestAppendNumber -1", "-1", testString.data());
856 
857     testString.clear();
858     testString.appendNumber(12345, errorCode);
859     assertEquals("TestAppendNumber 12345", "12345", testString.data());
860     testString.appendNumber(123, errorCode);
861     assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
862 
863     testString.clear();
864     testString.appendNumber(2147483647, errorCode);
865     assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
866 
867     testString.clear();
868     testString.appendNumber(-2147483648, errorCode);
869     assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
870 
871     testString.clear();
872     testString.appendNumber(0, errorCode);
873     assertEquals("TestAppendNumber when appending zero", "0", testString.data());
874 }
875 
876 void
Testctou()877 StringTest::Testctou() {
878   const char *cs = "Fa\\u0127mu";
879   UnicodeString u = ctou(cs);
880   assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
881   assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
882 }
883