• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2015, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*   file name:  strtest.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 1999nov22
14 *   created by: Markus W. Scherer
15 */
16 
17 #include <string_view>
18 #include <cstddef>
19 #include <string.h>
20 #include <limits>
21 
22 #include "unicode/utypes.h"
23 #include "unicode/putil.h"
24 #include "unicode/std_string.h"
25 #include "unicode/stringpiece.h"
26 #include "unicode/unistr.h"
27 #include "unicode/ustring.h"
28 #include "unicode/utf_old.h"    // for UTF8_COUNT_TRAIL_BYTES
29 #include "unicode/utf8.h"
30 #include "charstr.h"
31 #include "cstr.h"
32 #include "intltest.h"
33 #include "strtest.h"
34 #include "uinvchar.h"
35 
~StringTest()36 StringTest::~StringTest() {}
37 
TestEndian()38 void StringTest::TestEndian() {
39     union {
40         uint8_t byte;
41         uint16_t word;
42     } u;
43     u.word=0x0100;
44     if(U_IS_BIG_ENDIAN!=u.byte) {
45         errln("TestEndian: U_IS_BIG_ENDIAN needs to be fixed in platform.h");
46     }
47 }
48 
TestSizeofTypes()49 void StringTest::TestSizeofTypes() {
50     if(U_SIZEOF_WCHAR_T!=sizeof(wchar_t)) {
51         errln("TestSizeofWCharT: U_SIZEOF_WCHAR_T!=sizeof(wchar_t) - U_SIZEOF_WCHAR_T needs to be fixed in platform.h");
52     }
53 #ifdef U_INT64_T_UNAVAILABLE
54     errln("int64_t and uint64_t are undefined.");
55 #else
56     if(8!=sizeof(int64_t)) {
57         errln("TestSizeofTypes: 8!=sizeof(int64_t) - int64_t needs to be fixed in platform.h");
58     }
59     if(8!=sizeof(uint64_t)) {
60         errln("TestSizeofTypes: 8!=sizeof(uint64_t) - uint64_t needs to be fixed in platform.h");
61     }
62 #endif
63     if(8!=sizeof(double)) {
64         errln("8!=sizeof(double) - putil.c code may not work");
65     }
66     if(4!=sizeof(int32_t)) {
67         errln("4!=sizeof(int32_t)");
68     }
69     if(4!=sizeof(uint32_t)) {
70         errln("4!=sizeof(uint32_t)");
71     }
72     if(2!=sizeof(int16_t)) {
73         errln("2!=sizeof(int16_t)");
74     }
75     if(2!=sizeof(uint16_t)) {
76         errln("2!=sizeof(uint16_t)");
77     }
78     if(2!=sizeof(char16_t)) {
79         errln("2!=sizeof(char16_t)");
80     }
81     if(1!=sizeof(int8_t)) {
82         errln("1!=sizeof(int8_t)");
83     }
84     if(1!=sizeof(uint8_t)) {
85         errln("1!=sizeof(uint8_t)");
86     }
87     if(1!=sizeof(UBool)) {
88         errln("1!=sizeof(UBool)");
89     }
90 }
91 
TestCharsetFamily()92 void StringTest::TestCharsetFamily() {
93     unsigned char c='A';
94     if( (U_CHARSET_FAMILY==U_ASCII_FAMILY && c!=0x41) ||
95         (U_CHARSET_FAMILY==U_EBCDIC_FAMILY && c!=0xc1)
96     ) {
97         errln("TestCharsetFamily: U_CHARSET_FAMILY needs to be fixed in platform.h");
98     }
99 }
100 
101 U_STRING_DECL(ustringVar, "aZ0 -", 5);
102 
103 void
Test_U_STRING()104 StringTest::Test_U_STRING() {
105     U_STRING_INIT(ustringVar, "aZ0 -", 5);
106     if( u_strlen(ustringVar)!=5 ||
107         ustringVar[0]!=0x61 ||
108         ustringVar[1]!=0x5a ||
109         ustringVar[2]!=0x30 ||
110         ustringVar[3]!=0x20 ||
111         ustringVar[4]!=0x2d ||
112         ustringVar[5]!=0
113     ) {
114         errln("Test_U_STRING: U_STRING_DECL with U_STRING_INIT does not work right! "
115               "See putil.h and utypes.h with platform.h.");
116     }
117 }
118 
119 void
Test_UNICODE_STRING()120 StringTest::Test_UNICODE_STRING() {
121     UnicodeString ustringVar=UNICODE_STRING("aZ0 -", 5);
122     if( ustringVar.length()!=5 ||
123         ustringVar[0]!=0x61 ||
124         ustringVar[1]!=0x5a ||
125         ustringVar[2]!=0x30 ||
126         ustringVar[3]!=0x20 ||
127         ustringVar[4]!=0x2d
128     ) {
129         errln("Test_UNICODE_STRING: UNICODE_STRING does not work right! "
130               "See unistr.h and utypes.h with platform.h.");
131     }
132 }
133 
134 void
Test_UNICODE_STRING_SIMPLE()135 StringTest::Test_UNICODE_STRING_SIMPLE() {
136     UnicodeString ustringVar=UNICODE_STRING_SIMPLE("aZ0 -");
137     if( ustringVar.length()!=5 ||
138         ustringVar[0]!=0x61 ||
139         ustringVar[1]!=0x5a ||
140         ustringVar[2]!=0x30 ||
141         ustringVar[3]!=0x20 ||
142         ustringVar[4]!=0x2d
143     ) {
144         errln("Test_UNICODE_STRING_SIMPLE: UNICODE_STRING_SIMPLE does not work right! "
145               "See unistr.h and utypes.h with platform.h.");
146     }
147 }
148 
149 namespace {
150 
151 // See U_CHARSET_FAMILY in unicode/platform.h.
152 const char *nativeInvChars =
153     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
154     "abcdefghijklmnopqrstuvwxyz"
155     "0123456789 \"%&'()*+,-./:;<=>?_";
156 const char16_t *asciiInvChars =
157     u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
158     u"abcdefghijklmnopqrstuvwxyz"
159     u"0123456789 \"%&'()*+,-./:;<=>?_";
160 
161 }  // namespace
162 
163 void
TestUpperOrdinal()164 StringTest::TestUpperOrdinal() {
165     for (int32_t i = 0;; ++i) {
166         char ic = nativeInvChars[i];
167         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
168         int32_t expected = ac - 'A';
169         int32_t actual = uprv_upperOrdinal(ic);
170         if (0 <= expected && expected <= 25) {
171             if (actual != expected) {
172                 errln("uprv_upperOrdinal('%c')=%d != expected %d",
173                       ic, (int)actual, (int)expected);
174             }
175         } else {
176             if (0 <= actual && actual <= 25) {
177                 errln("uprv_upperOrdinal('%c')=%d should have been outside 0..25",
178                       ic, (int)actual);
179             }
180         }
181         if (ic == 0) { break; }
182     }
183 }
184 
185 void
TestLowerOrdinal()186 StringTest::TestLowerOrdinal() {
187     for (int32_t i = 0;; ++i) {
188         char ic = nativeInvChars[i];
189         uint8_t ac = static_cast<uint8_t>(asciiInvChars[i]);
190         int32_t expected = ac - 'a';
191         int32_t actual = uprv_lowerOrdinal(ic);
192         if (0 <= expected && expected <= 25) {
193             if (actual != expected) {
194                 errln("uprv_lowerOrdinal('%c')=%d != expected %d",
195                       ic, (int)actual, (int)expected);
196             }
197         } else {
198             if (0 <= actual && actual <= 25) {
199                 errln("uprv_lowerOrdinal('%c')=%d should have been outside 0..25",
200                       ic, (int)actual);
201             }
202         }
203         if (ic == 0) { break; }
204     }
205 }
206 
207 void
Test_UTF8_COUNT_TRAIL_BYTES()208 StringTest::Test_UTF8_COUNT_TRAIL_BYTES() {
209 #if !U_HIDE_OBSOLETE_UTF_OLD_H
210     if(UTF8_COUNT_TRAIL_BYTES(0x7F) != 0
211             || UTF8_COUNT_TRAIL_BYTES(0xC2) != 1
212             || UTF8_COUNT_TRAIL_BYTES(0xE0) != 2
213             || UTF8_COUNT_TRAIL_BYTES(0xF0) != 3) {
214         errln("UTF8_COUNT_TRAIL_BYTES does not work right! See utf_old.h.");
215     }
216 #endif
217     // Note: U8_COUNT_TRAIL_BYTES (current) and UTF8_COUNT_TRAIL_BYTES (deprecated)
218     //       have completely different implementations.
219     if (U8_COUNT_TRAIL_BYTES(0x7F) != 0
220             || U8_COUNT_TRAIL_BYTES(0xC2) != 1
221             || U8_COUNT_TRAIL_BYTES(0xE0) != 2
222             || U8_COUNT_TRAIL_BYTES(0xF0) != 3) {
223         errln("U8_COUNT_TRAIL_BYTES does not work right! See utf8.h.");
224     }
225 }
226 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)227 void StringTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char * /*par*/) {
228     if(exec) {
229         logln("TestSuite Character and String Test: ");
230     }
231     TESTCASE_AUTO_BEGIN;
232     TESTCASE_AUTO(TestEndian);
233     TESTCASE_AUTO(TestSizeofTypes);
234     TESTCASE_AUTO(TestCharsetFamily);
235     TESTCASE_AUTO(Test_U_STRING);
236     TESTCASE_AUTO(Test_UNICODE_STRING);
237     TESTCASE_AUTO(Test_UNICODE_STRING_SIMPLE);
238     TESTCASE_AUTO(TestUpperOrdinal);
239     TESTCASE_AUTO(TestLowerOrdinal);
240     TESTCASE_AUTO(Test_UTF8_COUNT_TRAIL_BYTES);
241     TESTCASE_AUTO(TestSTLCompatibility);
242     TESTCASE_AUTO(TestStringPiece);
243     TESTCASE_AUTO(TestStringPieceComparisons);
244     TESTCASE_AUTO(TestStringPieceFind);
245     TESTCASE_AUTO(TestStringPieceOther);
246     TESTCASE_AUTO(TestStringPieceStringView);
247     TESTCASE_AUTO(TestStringPieceU8);
248     TESTCASE_AUTO(TestByteSink);
249     TESTCASE_AUTO(TestCheckedArrayByteSink);
250     TESTCASE_AUTO(TestStringByteSink);
251     TESTCASE_AUTO(TestStringByteSinkAppendU8);
252     TESTCASE_AUTO(TestCharString);
253     TESTCASE_AUTO(TestCStr);
254     TESTCASE_AUTO(TestCharStrAppendNumber);
255     TESTCASE_AUTO(Testctou);
256     TESTCASE_AUTO_END;
257 }
258 
259 void
TestStringPiece()260 StringTest::TestStringPiece() {
261     // Default constructor.
262     StringPiece empty;
263     if(!empty.empty() || empty.data()!=nullptr || empty.length()!=0 || empty.size()!=0) {
264         errln("StringPiece() failed");
265     }
266     // Construct from nullptr const char * pointer.
267     StringPiece null((const char *)nullptr);
268     if(!null.empty() || null.data()!=nullptr || null.length()!=0 || null.size()!=0) {
269         errln("StringPiece(nullptr) failed");
270     }
271     // Construct from const char * pointer.
272     static const char *abc_chars="abc";
273     StringPiece abc(abc_chars);
274     if(abc.empty() || abc.data()!=abc_chars || abc.length()!=3 || abc.size()!=3) {
275         errln("StringPiece(abc_chars) failed");
276     }
277     // Construct from const char * pointer and length.
278     static const char *abcdefg_chars="abcdefg";
279     StringPiece abcd(abcdefg_chars, 4);
280     if(abcd.empty() || abcd.data()!=abcdefg_chars || abcd.length()!=4 || abcd.size()!=4) {
281         errln("StringPiece(abcdefg_chars, 4) failed");
282     }
283     // Construct from std::string.
284     std::string uvwxyz_string("uvwxyz");
285     StringPiece uvwxyz(uvwxyz_string);
286     if(uvwxyz.empty() || uvwxyz.data()!=uvwxyz_string.data() || uvwxyz.length()!=6 || uvwxyz.size()!=6) {
287         errln("StringPiece(uvwxyz_string) failed");
288     }
289     // Substring constructor with pos.
290     StringPiece sp(abcd, -1);
291     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
292         errln("StringPiece(abcd, -1) failed");
293     }
294     sp=StringPiece(abcd, 5);
295     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
296         errln("StringPiece(abcd, 5) failed");
297     }
298     sp=StringPiece(abcd, 2);
299     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
300         errln("StringPiece(abcd, -1) failed");
301     }
302     // Substring constructor with pos and len.
303     sp=StringPiece(abcd, -1, 8);
304     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
305         errln("StringPiece(abcd, -1, 8) failed");
306     }
307     sp=StringPiece(abcd, 5, 8);
308     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
309         errln("StringPiece(abcd, 5, 8) failed");
310     }
311     sp=StringPiece(abcd, 2, 8);
312     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
313         errln("StringPiece(abcd, -1) failed");
314     }
315     sp=StringPiece(abcd, 2, -1);
316     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
317         errln("StringPiece(abcd, 5, -1) failed");
318     }
319     // static const npos
320     const int32_t *ptr_npos=&StringPiece::npos;
321     if(StringPiece::npos!=0x7fffffff || *ptr_npos!=0x7fffffff) {
322         errln("StringPiece::npos!=0x7fffffff");
323     }
324     // substr() method with pos, using len=npos.
325     sp=abcd.substr(-1);
326     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
327         errln("abcd.substr(-1) failed");
328     }
329     sp=abcd.substr(5);
330     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
331         errln("abcd.substr(5) failed");
332     }
333     sp=abcd.substr(2);
334     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
335         errln("abcd.substr(-1) failed");
336     }
337     // substr() method with pos and len.
338     sp=abcd.substr(-1, 8);
339     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
340         errln("abcd.substr(-1, 8) failed");
341     }
342     sp=abcd.substr(5, 8);
343     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
344         errln("abcd.substr(5, 8) failed");
345     }
346     sp=abcd.substr(2, 8);
347     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
348         errln("abcd.substr(-1) failed");
349     }
350     sp=abcd.substr(2, -1);
351     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
352         errln("abcd.substr(5, -1) failed");
353     }
354     // clear()
355     sp=abcd;
356     sp.clear();
357     if(!sp.empty() || sp.data()!=nullptr || sp.length()!=0 || sp.size()!=0) {
358         errln("abcd.clear() failed");
359     }
360     // remove_prefix()
361     sp=abcd;
362     sp.remove_prefix(-1);
363     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
364         errln("abcd.remove_prefix(-1) failed");
365     }
366     sp=abcd;
367     sp.remove_prefix(2);
368     if(sp.empty() || sp.data()!=abcdefg_chars+2 || sp.length()!=2 || sp.size()!=2) {
369         errln("abcd.remove_prefix(2) failed");
370     }
371     sp=abcd;
372     sp.remove_prefix(5);
373     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
374         errln("abcd.remove_prefix(5) failed");
375     }
376     // remove_suffix()
377     sp=abcd;
378     sp.remove_suffix(-1);
379     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=4 || sp.size()!=4) {
380         errln("abcd.remove_suffix(-1) failed");
381     }
382     sp=abcd;
383     sp.remove_suffix(2);
384     if(sp.empty() || sp.data()!=abcdefg_chars || sp.length()!=2 || sp.size()!=2) {
385         errln("abcd.remove_suffix(2) failed");
386     }
387     sp=abcd;
388     sp.remove_suffix(5);
389     if(!sp.empty() || sp.length()!=0 || sp.size()!=0) {
390         errln("abcd.remove_suffix(5) failed");
391     }
392 }
393 
394 void
TestStringPieceComparisons()395 StringTest::TestStringPieceComparisons() {
396     StringPiece empty;
397     StringPiece null(nullptr);
398     StringPiece abc("abc");
399     StringPiece abcd("abcdefg", 4);
400     StringPiece abx("abx");
401     if(empty!=null) {
402         errln("empty!=null");
403     }
404     if(empty==abc) {
405         errln("empty==abc");
406     }
407     if(abc==abcd) {
408         errln("abc==abcd");
409     }
410 
411     assertTrue("null<abc", null.compare(abc) < 0);
412     assertTrue("abc>null", abc.compare(null) > 0);
413     assertTrue("abc<abcd", abc.compare(abcd) < 0);
414     assertTrue("abcd>abc", abcd.compare(abc) > 0);
415     assertTrue("abc<abx", abc.compare(abx) < 0);
416     assertTrue("abx>abc", abx.compare(abc) > 0);
417     assertTrue("abx>abcd", abx.compare(abcd) > 0);
418     assertTrue("abcd<abx", abcd.compare(abx) < 0);
419     assertTrue("abx==abx", abx.compare(abx) == 0);
420 
421     // Behavior should be the same as std::string::compare
422     {
423         std::string null("");
424         std::string abc("abc");
425         std::string abcd("abcdefg", 4);
426         std::string abx("abx");
427 
428         assertTrue("std: null<abc", null.compare(abc) < 0);
429         assertTrue("std: abc>null", abc.compare(null) > 0);
430         assertTrue("std: abc<abcd", abc.compare(abcd) < 0);
431         assertTrue("std: abcd>abc", abcd.compare(abc) > 0);
432         assertTrue("std: abc<abx", abc.compare(abx) < 0);
433         assertTrue("std: abx>abc", abx.compare(abc) > 0);
434         assertTrue("std: abx>abcd", abx.compare(abcd) > 0);
435         assertTrue("std: abcd<abx", abcd.compare(abx) < 0);
436         assertTrue("std: abx==abx", abx.compare(abx) == 0);
437     }
438 
439     abcd.remove_suffix(1);
440     if(abc!=abcd) {
441         errln("abc!=abcd.remove_suffix(1)");
442     }
443     if(abc==abx) {
444         errln("abc==abx");
445     }
446 }
447 
448 void
TestStringPieceFind()449 StringTest::TestStringPieceFind() {
450     struct TestCase {
451         const char* haystack;
452         const char* needle;
453         int32_t expected;
454     } cases[] = {
455         { "", "", 0 },
456         { "", "x", -1 },
457         { "x", "", 0 },
458         { "x", "x", 0 },
459         { "xy", "x", 0 },
460         { "xy", "y", 1 },
461         { "xy", "xy", 0 },
462         { "xy", "xyz", -1 },
463         { "qwerty", "qqw", -1 },
464         { "qwerty", "qw", 0 },
465         { "qwerty", "er", 2 },
466         { "qwerty", "err", -1 },
467         { "qwerty", "ert", 2 },
468         { "qwerty", "ty", 4 },
469         { "qwerty", "tyy", -1 },
470         { "qwerty", "a", -1 },
471         { "qwerty", "abc", -1 }
472     };
473     int32_t caseNumber = 0;
474     for (auto& cas : cases) {
475         StringPiece haystack(cas.haystack);
476         StringPiece needle(cas.needle);
477         assertEquals(Int64ToUnicodeString(caseNumber),
478             cas.expected, haystack.find(needle, 0));
479         // Should be same as std::string::find
480         std::string stdhaystack(cas.haystack);
481         std::string stdneedle(cas.needle);
482         assertEquals(Int64ToUnicodeString(caseNumber) + u" (std)",
483             cas.expected, static_cast<int32_t>(stdhaystack.find(stdneedle, 0)));
484         // Test offsets against std::string::find
485         for (int32_t offset = 0; offset < haystack.length(); offset++) {
486             assertEquals(Int64ToUnicodeString(caseNumber) + "u @ " + Int64ToUnicodeString(offset),
487                 static_cast<int32_t>(stdhaystack.find(stdneedle, offset)), haystack.find(needle, offset));
488         }
489         caseNumber++;
490     }
491 }
492 
493 void
TestStringPieceOther()494 StringTest::TestStringPieceOther() {
495     static constexpr char msg[] = "Kapow!";
496 
497     // Another string piece implementation.
498     struct Other {
499         const char* data() { return msg; }
500         size_t size() { return sizeof msg - 1; }
501     };
502 
503     Other other;
504     StringPiece piece(other);
505 
506     assertEquals("size()", piece.size(), static_cast<int32_t>(other.size()));
507     assertEquals("data()", piece.data(), other.data());
508 }
509 
510 void
TestStringPieceStringView()511 StringTest::TestStringPieceStringView() {
512     static constexpr char msg[] = "Kapow!";
513 
514     std::string_view view(msg);  // C++17
515     StringPiece piece(view);
516 
517     assertEquals("size()", piece.size(), view.size());
518     assertEquals("data()", piece.data(), view.data());
519 }
520 
521 void
TestStringPieceU8()522 StringTest::TestStringPieceU8() {
523     // ICU-20984 "mitigate some C++20 char8_t breakages"
524     // For the following APIs there are overloads for both
525     // const char * and const char8_t *.
526     // A u8"string literal" has one type or the other
527     // depending on C++ version and compiler settings.
528     StringPiece abc(u8"abc");
529     assertEquals("abc.length", 3, abc.length());
530     assertEquals("abc", "\x61\x62\x63", abc.data());
531 
532     StringPiece abc3(u8"abcdef", 3);
533     assertEquals("abc3.length", 3, abc3.length());
534     assertEquals("abc3[0]", 0x61, abc3.data()[0]);
535     assertEquals("abc3[1]", 0x62, abc3.data()[1]);
536     assertEquals("abc3[2]", 0x63, abc3.data()[2]);
537 
538     StringPiece uvw("q");
539     uvw.set(u8"uvw");
540     assertEquals("uvw.length", 3, uvw.length());
541     assertEquals("uvw", "\x75\x76\x77", uvw.data());
542 
543     StringPiece xyz("r");
544     xyz.set(u8"xyzXYZ", 3);
545     assertEquals("xyz.length", 3, xyz.length());
546     assertEquals("xyz[0]", 0x78, xyz.data()[0]);
547     assertEquals("xyz[1]", 0x79, xyz.data()[1]);
548     assertEquals("xyz[2]", 0x7a, xyz.data()[2]);
549 
550     StringPiece null(nullptr);
551     assertTrue("null is empty", null.empty());
552     assertTrue("null is null", null.data() == nullptr);
553 
554 #ifdef __cpp_lib_char8_t
555     std::u8string_view u8sv(u8"sv");  // C++20
556     StringPiece u8svsp(u8sv);
557     assertEquals("u8svsp.length", 2, u8svsp.length());
558     assertEquals("u8svsp", "\x73\x76", u8svsp.data());
559 
560     std::u8string u8str(u8"str");  // C++20
561     StringPiece u8strsp(u8str);
562     assertEquals("u8strsp.length", 3, u8strsp.length());
563     assertEquals("u8strsp", "\x73\x74\x72", u8strsp.data());
564 #endif  // __cpp_lib_char8_t
565 }
566 
567 // Verify that ByteSink is subclassable and Flush() overridable.
568 class SimpleByteSink : public ByteSink {
569 public:
SimpleByteSink(char * outbuf)570     SimpleByteSink(char *outbuf) : fOutbuf(outbuf), fLength(0) {}
Append(const char * bytes,int32_t n)571     virtual void Append(const char *bytes, int32_t n) override {
572         if(fOutbuf != bytes) {
573             memcpy(fOutbuf, bytes, n);
574         }
575         fOutbuf += n;
576         fLength += n;
577     }
Flush()578     virtual void Flush() override { Append("z", 1); }
length()579     int32_t length() { return fLength; }
580 private:
581     char *fOutbuf;
582     int32_t fLength;
583 };
584 
585 // Test the ByteSink base class.
586 void
TestByteSink()587 StringTest::TestByteSink() {
588     char buffer[20];
589     buffer[4] = '!';
590     SimpleByteSink sink(buffer);
591     sink.Append("abc", 3);
592     sink.Flush();
593     if(!(sink.length() == 4 && 0 == memcmp("abcz", buffer, 4) && buffer[4] == '!')) {
594         errln("ByteSink (SimpleByteSink) did not Append() or Flush() as expected");
595         return;
596     }
597     char scratch[20];
598     int32_t capacity = -1;
599     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
600     if(dest != nullptr || capacity != 0) {
601         errln("ByteSink.GetAppendBuffer(min_capacity<1) did not properly return nullptr[0]");
602         return;
603     }
604     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
605     if(dest != nullptr || capacity != 0) {
606         errln("ByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return nullptr[0]");
607         return;
608     }
609     dest = sink.GetAppendBuffer(5, 50, scratch, (int32_t)sizeof(scratch), &capacity);
610     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
611         errln("ByteSink.GetAppendBuffer() did not properly return the scratch buffer");
612     }
613 }
614 
615 void
TestCheckedArrayByteSink()616 StringTest::TestCheckedArrayByteSink() {
617     char buffer[20];  // < 26 for the test code to work
618     buffer[3] = '!';
619     CheckedArrayByteSink sink(buffer, (int32_t)sizeof(buffer));
620     sink.Append("abc", 3);
621     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
622          0 == memcmp("abc", buffer, 3) && buffer[3] == '!') &&
623          !sink.Overflowed()
624     ) {
625         errln("CheckedArrayByteSink did not Append() as expected");
626         return;
627     }
628     char scratch[10];
629     int32_t capacity = -1;
630     char *dest = sink.GetAppendBuffer(0, 50, scratch, (int32_t)sizeof(scratch), &capacity);
631     if(dest != nullptr || capacity != 0) {
632         errln("CheckedArrayByteSink.GetAppendBuffer(min_capacity<1) did not properly return nullptr[0]");
633         return;
634     }
635     dest = sink.GetAppendBuffer(10, 50, scratch, 9, &capacity);
636     if(dest != nullptr || capacity != 0) {
637         errln("CheckedArrayByteSink.GetAppendBuffer(scratch_capacity<min_capacity) did not properly return nullptr[0]");
638         return;
639     }
640     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
641     if(dest != buffer + 3 || capacity != (int32_t)sizeof(buffer) - 3) {
642         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return its own buffer");
643         return;
644     }
645     memcpy(dest, "defghijklm", 10);
646     sink.Append(dest, 10);
647     if(!(sink.NumberOfBytesAppended() == 13 && sink.NumberOfBytesWritten() == 13 &&
648          0 == memcmp("abcdefghijklm", buffer, 13) &&
649          !sink.Overflowed())
650     ) {
651         errln("CheckedArrayByteSink did not Append(its own buffer) as expected");
652         return;
653     }
654     dest = sink.GetAppendBuffer(10, 50, scratch, (int32_t)sizeof(scratch), &capacity);
655     if(dest != scratch || capacity != (int32_t)sizeof(scratch)) {
656         errln("CheckedArrayByteSink.GetAppendBuffer() did not properly return the scratch buffer");
657     }
658     memcpy(dest, "nopqrstuvw", 10);
659     sink.Append(dest, 10);
660     if(!(sink.NumberOfBytesAppended() == 23 &&
661          sink.NumberOfBytesWritten() == (int32_t)sizeof(buffer) &&
662          0 == memcmp("abcdefghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
663          sink.Overflowed())
664     ) {
665         errln("CheckedArrayByteSink did not Append(scratch buffer) as expected");
666         return;
667     }
668     sink.Reset().Append("123", 3);
669     if(!(sink.NumberOfBytesAppended() == 3 && sink.NumberOfBytesWritten() == 3 &&
670          0 == memcmp("123defghijklmnopqrstuvwxyz", buffer, (int32_t)sizeof(buffer)) &&
671          !sink.Overflowed())
672     ) {
673         errln("CheckedArrayByteSink did not Reset().Append() as expected");
674         return;
675     }
676 }
677 
678 void
TestStringByteSink()679 StringTest::TestStringByteSink() {
680     // Not much to test because only the constructors and Append()
681     // are implemented, and trivially so.
682     std::string result("abc");  // std::string
683     StringByteSink<std::string> sink(&result);
684     sink.Append("def", 3);
685     if(result != "abcdef") {
686         errln("StringByteSink did not Append() as expected");
687     }
688     StringByteSink<std::string> sink2(&result, 20);
689     if(result.capacity() < (result.length() + 20)) {
690         errln("StringByteSink should have 20 append capacity, has only %d",
691               (int)(result.capacity() - result.length()));
692     }
693     sink.Append("ghi", 3);
694     if(result != "abcdefghi") {
695         errln("StringByteSink did not Append() as expected");
696     }
697 }
698 
699 void
TestStringByteSinkAppendU8()700 StringTest::TestStringByteSinkAppendU8() {
701     // ICU-20984 "mitigate some C++20 char8_t breakages"
702     // For the following APIs there are overloads for both
703     // const char * and const char8_t *.
704     // A u8"string literal" has one type or the other
705     // depending on C++ version and compiler settings.
706     std::string result("abc");
707     StringByteSink<std::string> sink(&result);
708     sink.AppendU8("def", 3);
709     sink.AppendU8(u8"ghijkl", 4);
710     assertEquals("abcdefghij", "abcdef\x67\x68\x69\x6a", result.c_str());
711 }
712 
713 #if defined(_MSC_VER)
714 #include <vector>
715 #endif
716 
717 void
TestSTLCompatibility()718 StringTest::TestSTLCompatibility() {
719 #if defined(_MSC_VER)
720     /* Just make sure that it compiles with STL's placement new usage. */
721     std::vector<UnicodeString> myvect;
722     myvect.push_back(UnicodeString("blah"));
723 #endif
724 }
725 
726 void
TestCharString()727 StringTest::TestCharString() {
728     IcuTestErrorCode errorCode(*this, "TestCharString()");
729     char expected[400];
730     static const char longStr[] =
731         "This is a long string that is meant to cause reallocation of the internal buffer of CharString.";
732     CharString chStr(longStr, errorCode);
733     if (0 != strcmp(longStr, chStr.data()) || (int32_t)strlen(longStr) != chStr.length()) {
734         errln("CharString(longStr) failed.");
735     }
736     CharString test("Test", errorCode);
737     CharString copy(test,errorCode);
738     copy.copyFrom(chStr, errorCode);
739     if (0 != strcmp(longStr, copy.data()) || (int32_t)strlen(longStr) != copy.length()) {
740         errln("CharString.copyFrom() failed.");
741     }
742     StringPiece sp(chStr.toStringPiece());
743     sp.remove_prefix(4);
744     chStr.append(sp, errorCode).append(chStr, errorCode);
745     strcpy(expected, longStr);
746     strcat(expected, longStr+4);
747     strcat(expected, longStr);
748     strcat(expected, longStr+4);
749     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
750         errln("CharString(longStr).append(substring of self).append(self) failed.");
751     }
752     chStr.clear().append("abc", errorCode).append("defghij", 3, errorCode);
753     if (0 != strcmp("abcdef", chStr.data()) || 6 != chStr.length()) {
754         errln("CharString.clear().append(abc).append(defghij, 3) failed.");
755     }
756     chStr.appendInvariantChars(UNICODE_STRING_SIMPLE(
757         "This is a long string that is meant to cause reallocation of the internal buffer of CharString."),
758         errorCode);
759     strcpy(expected, "abcdef");
760     strcat(expected, longStr);
761     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
762         errln("CharString.appendInvariantChars(longStr) failed.");
763     }
764     int32_t appendCapacity = 0;
765     char *buffer = chStr.getAppendBuffer(5, 10, appendCapacity, errorCode);
766     if (errorCode.isFailure()) {
767         return;
768     }
769     memcpy(buffer, "*****", 5);
770     chStr.append(buffer, 5, errorCode);
771     chStr.truncate(chStr.length()-3);
772     strcat(expected, "**");
773     if (0 != strcmp(expected, chStr.data()) || (int32_t)strlen(expected) != chStr.length()) {
774         errln("CharString.getAppendBuffer().append(**) failed.");
775     }
776 
777     UErrorCode ec = U_ZERO_ERROR;
778     chStr.clear();
779     chStr.appendInvariantChars(UnicodeString("The '@' character is not invariant."), ec);
780     if (ec != U_INVARIANT_CONVERSION_ERROR) {
781         errln("%s:%d expected U_INVARIANT_CONVERSION_ERROR, got %s", __FILE__, __LINE__, u_errorName(ec));
782     }
783     if (chStr.length() != 0) {
784         errln("%s:%d expected length() = 0, got %d", __FILE__, __LINE__, chStr.length());
785     }
786 
787     {
788         CharString s1("Short string", errorCode);
789         CharString s2(std::move(s1));
790         assertEquals("s2 should have content of s1", "Short string", s2.data());
791         CharString s3("Dummy", errorCode);
792         s3 = std::move(s2);
793         assertEquals("s3 should have content of s2", "Short string", s3.data());
794     }
795 
796     {
797         CharString s1("Long string over 40 characters to trigger heap allocation", errorCode);
798         CharString s2(std::move(s1));
799         assertEquals("s2 should have content of s1",
800                 "Long string over 40 characters to trigger heap allocation",
801                 s2.data());
802         CharString s3("Dummy string with over 40 characters to trigger heap allocation", errorCode);
803         s3 = std::move(s2);
804         assertEquals("s3 should have content of s2",
805                 "Long string over 40 characters to trigger heap allocation",
806                 s3.data());
807     }
808 
809     {
810         // extract()
811         errorCode.reset();
812         CharString s("abc", errorCode);
813         char buffer[10];
814 
815         s.extract(buffer, 10, errorCode);
816         assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
817         assertEquals("abc.extract(10) output", "abc", buffer);
818 
819         strcpy(buffer, "012345");
820         s.extract(buffer, 3, errorCode);
821         assertEquals("abc.extract(3) not terminated",
822                      U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
823         assertEquals("abc.extract(3) output", "abc345", buffer);
824 
825         strcpy(buffer, "012345");
826         s.extract(buffer, 2, errorCode);
827         assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
828     }
829 }
830 
831 void
TestCStr()832 StringTest::TestCStr() {
833     const char *cs = "This is a test string.";
834     UnicodeString us(cs);
835     if (0 != strcmp(CStr(us)(), cs)) {
836         errln("%s:%d CStr(s)() failed. Expected \"%s\", got \"%s\"", __FILE__, __LINE__, cs, CStr(us)());
837     }
838 }
839 
TestCharStrAppendNumber()840 void StringTest::TestCharStrAppendNumber() {
841     IcuTestErrorCode errorCode(*this, "TestCharStrAppendNumber()");
842 
843     CharString testString;
844     testString.appendNumber(1, errorCode);
845     assertEquals("TestAppendNumber 1", "1", testString.data());
846 
847     testString.clear();
848     testString.appendNumber(-1, errorCode);
849     assertEquals("TestAppendNumber -1", "-1", testString.data());
850 
851     testString.clear();
852     testString.appendNumber(12345, errorCode);
853     assertEquals("TestAppendNumber 12345", "12345", testString.data());
854     testString.appendNumber(123, errorCode);
855     assertEquals("TestAppendNumber 12345 and then 123", "12345123", testString.data());
856 
857     testString.clear();
858     testString.appendNumber(std::numeric_limits<int32_t>::max(), errorCode);
859     assertEquals("TestAppendNumber when appending the biggest int32", "2147483647", testString.data());
860 
861     testString.clear();
862     testString.appendNumber(std::numeric_limits<int32_t>::min(), errorCode);
863     assertEquals("TestAppendNumber when appending the smallest int32", "-2147483648", testString.data());
864 
865     testString.clear();
866     testString.appendNumber(0, errorCode);
867     assertEquals("TestAppendNumber when appending zero", "0", testString.data());
868 }
869 
870 void
Testctou()871 StringTest::Testctou() {
872   const char *cs = "Fa\\u0127mu";
873   UnicodeString u = ctou(cs);
874   assertEquals("Testing unescape@0", (int32_t)0x0046, u.charAt(0));
875   assertEquals("Testing unescape@2", (int32_t)295, u.charAt(2));
876 }
877