• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "unicode/utf16.h"
9 #include "putilimp.h"
10 #include "intltest.h"
11 #include "formatted_string_builder.h"
12 #include "formattedval_impl.h"
13 #include "unicode/unum.h"
14 
15 
16 class FormattedStringBuilderTest : public IntlTest {
17   public:
18     void testInsertAppendUnicodeString();
19     void testSplice();
20     void testInsertAppendCodePoint();
21     void testCopy();
22     void testFields();
23     void testUnlimitedCapacity();
24     void testCodePoints();
25     void testInsertOverflow();
26 
27     void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0) override;
28 
29   private:
30     void assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b);
31 };
32 
33 static const char16_t *EXAMPLE_STRINGS[] = {
34         u"",
35         u"xyz",
36         u"The quick brown fox jumps over the lazy dog",
37         u"��",
38         u"mixed �� and ASCII",
39         u"with combining characters like ��������",
40         u"A very very very very very very very very very very long string to force heap"};
41 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)42 void FormattedStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
43     if (exec) {
44         logln("TestSuite FormattedStringBuilderTest: ");
45     }
46     TESTCASE_AUTO_BEGIN;
47         TESTCASE_AUTO(testInsertAppendUnicodeString);
48         TESTCASE_AUTO(testSplice);
49         TESTCASE_AUTO(testInsertAppendCodePoint);
50         TESTCASE_AUTO(testCopy);
51         TESTCASE_AUTO(testFields);
52         TESTCASE_AUTO(testUnlimitedCapacity);
53         TESTCASE_AUTO(testCodePoints);
54         TESTCASE_AUTO(testInsertOverflow);
55     TESTCASE_AUTO_END;
56 }
57 
testInsertAppendUnicodeString()58 void FormattedStringBuilderTest::testInsertAppendUnicodeString() {
59     UErrorCode status = U_ZERO_ERROR;
60     UnicodeString sb1;
61     FormattedStringBuilder sb2;
62     for (const char16_t* strPtr : EXAMPLE_STRINGS) {
63         UnicodeString str(strPtr);
64 
65         FormattedStringBuilder sb3;
66         sb1.append(str);
67         sb2.append(str, kUndefinedField, status);
68         assertSuccess("Appending to sb2", status);
69         sb3.append(str, kUndefinedField, status);
70         assertSuccess("Appending to sb3", status);
71         assertEqualsImpl(sb1, sb2);
72         assertEqualsImpl(str, sb3);
73 
74         UnicodeString sb4;
75         FormattedStringBuilder sb5;
76         sb4.append(u"��");
77         sb4.append(str);
78         sb4.append(u"xx");
79         sb5.append(u"��xx", kUndefinedField, status);
80         assertSuccess("Appending to sb5", status);
81         sb5.insert(2, str, kUndefinedField, status);
82         assertSuccess("Inserting into sb5", status);
83         assertEqualsImpl(sb4, sb5);
84 
85         int start = uprv_min(1, str.length());
86         int end = uprv_min(10, str.length());
87         sb4.insert(3, str, start, end - start); // UnicodeString uses length instead of end index
88         sb5.insert(3, str, start, end, kUndefinedField, status);
89         assertSuccess("Inserting into sb5 again", status);
90         assertEqualsImpl(sb4, sb5);
91 
92         UnicodeString sb4cp(sb4);
93         FormattedStringBuilder sb5cp(sb5);
94         sb4.append(sb4cp);
95         sb5.append(sb5cp, status);
96         assertSuccess("Appending again to sb5", status);
97         assertEqualsImpl(sb4, sb5);
98     }
99 }
100 
testSplice()101 void FormattedStringBuilderTest::testSplice() {
102     static const struct TestCase {
103         const char16_t* input;
104         const int32_t startThis;
105         const int32_t endThis;
106     } cases[] = {
107             { u"", 0, 0 },
108             { u"abc", 0, 0 },
109             { u"abc", 1, 1 },
110             { u"abc", 1, 2 },
111             { u"abc", 0, 2 },
112             { u"abc", 0, 3 },
113             { u"lorem ipsum dolor sit amet", 8, 8 },
114             { u"lorem ipsum dolor sit amet", 8, 11 }, // 3 chars, equal to replacement "xyz"
115             { u"lorem ipsum dolor sit amet", 8, 18 } }; // 10 chars, larger than several replacements
116 
117     UErrorCode status = U_ZERO_ERROR;
118     UnicodeString sb1;
119     FormattedStringBuilder sb2;
120     for (auto cas : cases) {
121         for (const char16_t* replacementPtr : EXAMPLE_STRINGS) {
122             UnicodeString replacement(replacementPtr);
123 
124             // Test replacement with full string
125             sb1.remove();
126             sb1.append(cas.input);
127             sb1.replace(cas.startThis, cas.endThis - cas.startThis, replacement);
128             sb2.clear();
129             sb2.append(cas.input, kUndefinedField, status);
130             sb2.splice(cas.startThis, cas.endThis, replacement, 0, replacement.length(), kUndefinedField, status);
131             assertSuccess("Splicing into sb2 first time", status);
132             assertEqualsImpl(sb1, sb2);
133 
134             // Test replacement with partial string
135             if (replacement.length() <= 2) {
136                 continue;
137             }
138             sb1.remove();
139             sb1.append(cas.input);
140             sb1.replace(cas.startThis, cas.endThis - cas.startThis, UnicodeString(replacement, 1, 2));
141             sb2.clear();
142             sb2.append(cas.input, kUndefinedField, status);
143             sb2.splice(cas.startThis, cas.endThis, replacement, 1, 3, kUndefinedField, status);
144             assertSuccess("Splicing into sb2 second time", status);
145             assertEqualsImpl(sb1, sb2);
146         }
147     }
148 }
149 
testInsertAppendCodePoint()150 void FormattedStringBuilderTest::testInsertAppendCodePoint() {
151     static const UChar32 cases[] = {
152             0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff};
153     UErrorCode status = U_ZERO_ERROR;
154     UnicodeString sb1;
155     FormattedStringBuilder sb2;
156     for (UChar32 cas : cases) {
157         FormattedStringBuilder sb3;
158         sb1.append(cas);
159         sb2.appendCodePoint(cas, kUndefinedField, status);
160         assertSuccess("Appending to sb2", status);
161         sb3.appendCodePoint(cas, kUndefinedField, status);
162         assertSuccess("Appending to sb3", status);
163         assertEqualsImpl(sb1, sb2);
164         assertEquals("Length of sb3", U16_LENGTH(cas), sb3.length());
165         assertEquals("Code point count of sb3", 1, sb3.codePointCount());
166         assertEquals(
167                 "First code unit in sb3",
168                 !U_IS_SUPPLEMENTARY(cas) ? (char16_t) cas : U16_LEAD(cas),
169                 sb3.charAt(0));
170 
171         UnicodeString sb4;
172         FormattedStringBuilder sb5;
173         sb4.append(u"��xx");
174         sb4.insert(2, cas);
175         sb5.append(u"��xx", kUndefinedField, status);
176         assertSuccess("Appending to sb5", status);
177         sb5.insertCodePoint(2, cas, kUndefinedField, status);
178         assertSuccess("Inserting into sb5", status);
179         assertEqualsImpl(sb4, sb5);
180 
181         UnicodeString sb6;
182         FormattedStringBuilder sb7;
183         sb6.append(cas);
184         if (U_IS_SUPPLEMENTARY(cas)) {
185             sb7.appendChar16(U16_TRAIL(cas), kUndefinedField, status);
186             sb7.insertChar16(0, U16_LEAD(cas), kUndefinedField, status);
187         } else {
188             sb7.insertChar16(0, cas, kUndefinedField, status);
189         }
190         assertSuccess("Insert/append into sb7", status);
191         assertEqualsImpl(sb6, sb7);
192     }
193 }
194 
testCopy()195 void FormattedStringBuilderTest::testCopy() {
196     UErrorCode status = U_ZERO_ERROR;
197     for (UnicodeString str : EXAMPLE_STRINGS) {
198         FormattedStringBuilder sb1;
199         sb1.append(str, kUndefinedField, status);
200         assertSuccess("Appending to sb1 first time", status);
201         FormattedStringBuilder sb2(sb1);
202         assertTrue("Content should equal itself", sb1.contentEquals(sb2));
203 
204         sb1.append("12345", kUndefinedField, status);
205         assertSuccess("Appending to sb1 second time", status);
206         assertFalse("Content should no longer equal itself", sb1.contentEquals(sb2));
207     }
208 }
209 
testFields()210 void FormattedStringBuilderTest::testFields() {
211     typedef FormattedStringBuilder::Field Field;
212     UErrorCode status = U_ZERO_ERROR;
213     // Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration.
214     for (UnicodeString str : EXAMPLE_STRINGS) {
215         FormattedValueStringBuilderImpl sbi(kUndefinedField);
216         FormattedStringBuilder& sb = sbi.getStringRef();
217         sb.append(str, kUndefinedField, status);
218         assertSuccess("Appending to sb", status);
219         sb.append(str, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
220         assertSuccess("Appending to sb", status);
221         assertEquals("Reference string copied twice", str.length() * 2, sb.length());
222         for (int32_t i = 0; i < str.length(); i++) {
223             assertEquals("Null field first",
224                 kUndefinedField.bits, sb.fieldAt(i).bits);
225             assertEquals("Currency field second",
226                 Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD).bits,
227                 sb.fieldAt(i + str.length()).bits);
228         }
229 
230         // Very basic FieldPosition test. More robust tests happen in NumberFormatTest.
231         // Let NumberFormatTest also take care of FieldPositionIterator material.
232         FieldPosition fp(UNUM_CURRENCY_FIELD);
233         sbi.nextFieldPosition(fp, status);
234         assertSuccess("Populating the FieldPosition", status);
235         assertEquals("Currency start position", str.length(), fp.getBeginIndex());
236         assertEquals("Currency end position", str.length() * 2, fp.getEndIndex());
237 
238         if (str.length() > 0) {
239             sb.insertCodePoint(2, 100, {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status);
240             assertSuccess("Inserting code point into sb", status);
241             assertEquals("New length", str.length() * 2 + 1, sb.length());
242             assertEquals("Integer field",
243                 Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD).bits,
244                 sb.fieldAt(2).bits);
245         }
246 
247         FormattedStringBuilder old(sb);
248         sb.append(old, status);
249         assertSuccess("Appending to myself", status);
250         int32_t numNull = 0;
251         int32_t numCurr = 0;
252         int32_t numInt = 0;
253         for (int32_t i = 0; i < sb.length(); i++) {
254             auto field = sb.fieldAt(i);
255             assertEquals("Field should equal location in old",
256                 old.fieldAt(i % old.length()).bits, field.bits);
257             if (field == kUndefinedField) {
258                 numNull++;
259             } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
260                 numCurr++;
261             } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) {
262                 numInt++;
263             } else {
264                 errln("Encountered unknown field");
265             }
266         }
267         assertEquals("Number of null fields", str.length() * 2, numNull);
268         assertEquals("Number of currency fields", numNull, numCurr);
269         assertEquals("Number of integer fields", str.length() > 0 ? 2 : 0, numInt);
270     }
271 }
272 
testUnlimitedCapacity()273 void FormattedStringBuilderTest::testUnlimitedCapacity() {
274     UErrorCode status = U_ZERO_ERROR;
275     FormattedStringBuilder builder;
276     // The builder should never fail upon repeated appends.
277     for (int i = 0; i < 1000; i++) {
278         UnicodeString message("Iteration #");
279         message += Int64ToUnicodeString(i);
280         assertEquals(message, builder.length(), i);
281         builder.appendCodePoint(u'x', kUndefinedField, status);
282         assertSuccess(message, status);
283         assertEquals(message, builder.length(), i + 1);
284     }
285 }
286 
testCodePoints()287 void FormattedStringBuilderTest::testCodePoints() {
288     UErrorCode status = U_ZERO_ERROR;
289     FormattedStringBuilder nsb;
290     assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint());
291     assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint());
292     assertEquals("Length is 0 on empty string", 0, nsb.codePointCount());
293 
294     nsb.append(u"q", kUndefinedField, status);
295     assertSuccess("Spot 1", status);
296     assertEquals("First is q", u'q', nsb.getFirstCodePoint());
297     assertEquals("Last is q", u'q', nsb.getLastCodePoint());
298     assertEquals("0th is q", u'q', nsb.codePointAt(0));
299     assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
300     assertEquals("Code point count is 1", 1, nsb.codePointCount());
301 
302     // �� is two char16s
303     nsb.append(u"��", kUndefinedField, status);
304     assertSuccess("Spot 2" ,status);
305     assertEquals("First is still q", u'q', nsb.getFirstCodePoint());
306     assertEquals("Last is space ship", 128640, nsb.getLastCodePoint());
307     assertEquals("1st is space ship", 128640, nsb.codePointAt(1));
308     assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
309     assertEquals("Before 3rd is space ship", 128640, nsb.codePointBefore(3));
310     assertEquals("Code point count is 2", 2, nsb.codePointCount());
311 }
312 
testInsertOverflow()313 void FormattedStringBuilderTest::testInsertOverflow() {
314     if (quick || logKnownIssue("22047", "FormattedStringBuilder with long length crashes in toUnicodeString in CI Linux tests")) return;
315 
316     // Setup the test fixture in sb, sb2, ustr.
317     UErrorCode status = U_ZERO_ERROR;
318     FormattedStringBuilder sb;
319     int32_t data_length = INT32_MAX / 2;
320     infoln("# log: setup start, data_length %d", data_length);
321     UnicodeString ustr(data_length, u'a', data_length); // set ustr to length 1073741823
322     sb.append(ustr, kUndefinedField, status); // set sb to length 1073741823
323     infoln("# log: setup 1 done, ustr len %d, sb len %d, status %s", ustr.length(), sb.length(), u_errorName(status));
324     assertSuccess("Setup the first FormattedStringBuilder", status);
325 
326     FormattedStringBuilder sb2;
327     sb2.append(ustr, kUndefinedField, status);
328     sb2.insert(0, ustr, 0, data_length / 2, kUndefinedField, status); // set sb2 to length 1610612734
329     sb2.writeTerminator(status);
330     infoln("# log: setup 2 done, sb2 len %d, status %s", sb2.length(), u_errorName(status));
331     assertSuccess("Setup the second FormattedStringBuilder", status);
332 
333     // The following should set ustr to have length 1610612734, but is currently crashing
334     // in the CI test "C: Linux Clang Exhaustive Tests (Ubuntu 18.04)", though not
335     // crashing when running exhaustive tests locally on e.g. macOS 12.4 on Intel).
336     // Hence the logKnownIssue skip above.
337     ustr = sb2.toUnicodeString();
338     // Note that trying the following alternative approach which sets ustr to length 1073741871
339     // (still long enough to test the expected behavior for the remainder of the code here)
340     // also crashed in "C: Linux Clang Exhaustive Tests (Ubuntu 18.04)":
341     // ustr.append(u"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",-1);
342 
343     // Complete setting up the test fixture in sb, sb2 and ustr.
344     infoln("# log: setup 3 done, ustr len %d", ustr.length());
345 
346     // Test splice() of the second UnicodeString
347     sb.splice(0, 1, ustr, 1, ustr.length(),
348               kUndefinedField, status);
349     infoln("# log: sb.splice 1 done, sb len %d, status %s", sb.length(), u_errorName(status));
350     assertEquals(
351         "splice() long text should not crash but return U_INPUT_TOO_LONG_ERROR",
352         U_INPUT_TOO_LONG_ERROR, status);
353 
354     // Test sb.insert() of the first FormattedStringBuilder with the second one.
355     status = U_ZERO_ERROR;
356     sb.insert(0, sb2, status);
357     infoln("# log: sb.insert 1 done, sb len %d, status %s", sb.length(), u_errorName(status));
358     assertEquals(
359         "insert() long FormattedStringBuilder should not crash but return "
360         "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status);
361 
362     // Test sb.insert() of the first FormattedStringBuilder with UnicodeString.
363     status = U_ZERO_ERROR;
364     sb.insert(0, ustr, 0, ustr.length(), kUndefinedField, status);
365     infoln("# log: sb.insert 2 done, sb len %d, status %s", sb.length(), u_errorName(status));
366     assertEquals(
367         "insert() long UnicodeString should not crash but return "
368         "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status);
369 }
370 
assertEqualsImpl(const UnicodeString & a,const FormattedStringBuilder & b)371 void FormattedStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b) {
372     // TODO: Why won't this compile without the IntlTest:: qualifier?
373     IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());
374     IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount());
375 
376     if (a.length() != b.length()) {
377         return;
378     }
379 
380     for (int32_t i = 0; i < a.length(); i++) {
381         IntlTest::assertEquals(
382                 UnicodeString(u"Char at position ") + Int64ToUnicodeString(i) +
383                 UnicodeString(u" in \"") + a + UnicodeString("\" versus \"") +
384                 b.toUnicodeString() + UnicodeString("\""), a.charAt(i), b.charAt(i));
385     }
386 }
387 
388 
createFormattedStringBuilderTest()389 extern IntlTest *createFormattedStringBuilderTest() {
390     return new FormattedStringBuilderTest();
391 }
392 
393 #endif /* #if !UCONFIG_NO_FORMATTING */
394