1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "unicode/utf16.h"
9 #include "putilimp.h"
10 #include "intltest.h"
11 #include "formatted_string_builder.h"
12 #include "formattedval_impl.h"
13 #include "unicode/unum.h"
14
15
16 class FormattedStringBuilderTest : public IntlTest {
17 public:
18 void testInsertAppendUnicodeString();
19 void testSplice();
20 void testInsertAppendCodePoint();
21 void testCopy();
22 void testFields();
23 void testUnlimitedCapacity();
24 void testCodePoints();
25 void testInsertOverflow();
26
27 void runIndexedTest(int32_t index, UBool exec, const char *&name, char *par = 0) override;
28
29 private:
30 void assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b);
31 };
32
33 static const char16_t *EXAMPLE_STRINGS[] = {
34 u"",
35 u"xyz",
36 u"The quick brown fox jumps over the lazy dog",
37 u"",
38 u"mixed and ASCII",
39 u"with combining characters like ",
40 u"A very very very very very very very very very very long string to force heap"};
41
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)42 void FormattedStringBuilderTest::runIndexedTest(int32_t index, UBool exec, const char *&name, char *) {
43 if (exec) {
44 logln("TestSuite FormattedStringBuilderTest: ");
45 }
46 TESTCASE_AUTO_BEGIN;
47 TESTCASE_AUTO(testInsertAppendUnicodeString);
48 TESTCASE_AUTO(testSplice);
49 TESTCASE_AUTO(testInsertAppendCodePoint);
50 TESTCASE_AUTO(testCopy);
51 TESTCASE_AUTO(testFields);
52 TESTCASE_AUTO(testUnlimitedCapacity);
53 TESTCASE_AUTO(testCodePoints);
54 TESTCASE_AUTO(testInsertOverflow);
55 TESTCASE_AUTO_END;
56 }
57
testInsertAppendUnicodeString()58 void FormattedStringBuilderTest::testInsertAppendUnicodeString() {
59 UErrorCode status = U_ZERO_ERROR;
60 UnicodeString sb1;
61 FormattedStringBuilder sb2;
62 for (const char16_t* strPtr : EXAMPLE_STRINGS) {
63 UnicodeString str(strPtr);
64
65 FormattedStringBuilder sb3;
66 sb1.append(str);
67 sb2.append(str, kUndefinedField, status);
68 assertSuccess("Appending to sb2", status);
69 sb3.append(str, kUndefinedField, status);
70 assertSuccess("Appending to sb3", status);
71 assertEqualsImpl(sb1, sb2);
72 assertEqualsImpl(str, sb3);
73
74 UnicodeString sb4;
75 FormattedStringBuilder sb5;
76 sb4.append(u"");
77 sb4.append(str);
78 sb4.append(u"xx");
79 sb5.append(u"xx", kUndefinedField, status);
80 assertSuccess("Appending to sb5", status);
81 sb5.insert(2, str, kUndefinedField, status);
82 assertSuccess("Inserting into sb5", status);
83 assertEqualsImpl(sb4, sb5);
84
85 int start = uprv_min(1, str.length());
86 int end = uprv_min(10, str.length());
87 sb4.insert(3, str, start, end - start); // UnicodeString uses length instead of end index
88 sb5.insert(3, str, start, end, kUndefinedField, status);
89 assertSuccess("Inserting into sb5 again", status);
90 assertEqualsImpl(sb4, sb5);
91
92 UnicodeString sb4cp(sb4);
93 FormattedStringBuilder sb5cp(sb5);
94 sb4.append(sb4cp);
95 sb5.append(sb5cp, status);
96 assertSuccess("Appending again to sb5", status);
97 assertEqualsImpl(sb4, sb5);
98 }
99 }
100
testSplice()101 void FormattedStringBuilderTest::testSplice() {
102 static const struct TestCase {
103 const char16_t* input;
104 const int32_t startThis;
105 const int32_t endThis;
106 } cases[] = {
107 { u"", 0, 0 },
108 { u"abc", 0, 0 },
109 { u"abc", 1, 1 },
110 { u"abc", 1, 2 },
111 { u"abc", 0, 2 },
112 { u"abc", 0, 3 },
113 { u"lorem ipsum dolor sit amet", 8, 8 },
114 { u"lorem ipsum dolor sit amet", 8, 11 }, // 3 chars, equal to replacement "xyz"
115 { u"lorem ipsum dolor sit amet", 8, 18 } }; // 10 chars, larger than several replacements
116
117 UErrorCode status = U_ZERO_ERROR;
118 UnicodeString sb1;
119 FormattedStringBuilder sb2;
120 for (auto cas : cases) {
121 for (const char16_t* replacementPtr : EXAMPLE_STRINGS) {
122 UnicodeString replacement(replacementPtr);
123
124 // Test replacement with full string
125 sb1.remove();
126 sb1.append(cas.input);
127 sb1.replace(cas.startThis, cas.endThis - cas.startThis, replacement);
128 sb2.clear();
129 sb2.append(cas.input, kUndefinedField, status);
130 sb2.splice(cas.startThis, cas.endThis, replacement, 0, replacement.length(), kUndefinedField, status);
131 assertSuccess("Splicing into sb2 first time", status);
132 assertEqualsImpl(sb1, sb2);
133
134 // Test replacement with partial string
135 if (replacement.length() <= 2) {
136 continue;
137 }
138 sb1.remove();
139 sb1.append(cas.input);
140 sb1.replace(cas.startThis, cas.endThis - cas.startThis, UnicodeString(replacement, 1, 2));
141 sb2.clear();
142 sb2.append(cas.input, kUndefinedField, status);
143 sb2.splice(cas.startThis, cas.endThis, replacement, 1, 3, kUndefinedField, status);
144 assertSuccess("Splicing into sb2 second time", status);
145 assertEqualsImpl(sb1, sb2);
146 }
147 }
148 }
149
testInsertAppendCodePoint()150 void FormattedStringBuilderTest::testInsertAppendCodePoint() {
151 static const UChar32 cases[] = {
152 0, 1, 60, 127, 128, 0x7fff, 0x8000, 0xffff, 0x10000, 0x1f000, 0x10ffff};
153 UErrorCode status = U_ZERO_ERROR;
154 UnicodeString sb1;
155 FormattedStringBuilder sb2;
156 for (UChar32 cas : cases) {
157 FormattedStringBuilder sb3;
158 sb1.append(cas);
159 sb2.appendCodePoint(cas, kUndefinedField, status);
160 assertSuccess("Appending to sb2", status);
161 sb3.appendCodePoint(cas, kUndefinedField, status);
162 assertSuccess("Appending to sb3", status);
163 assertEqualsImpl(sb1, sb2);
164 assertEquals("Length of sb3", U16_LENGTH(cas), sb3.length());
165 assertEquals("Code point count of sb3", 1, sb3.codePointCount());
166 assertEquals(
167 "First code unit in sb3",
168 !U_IS_SUPPLEMENTARY(cas) ? (char16_t) cas : U16_LEAD(cas),
169 sb3.charAt(0));
170
171 UnicodeString sb4;
172 FormattedStringBuilder sb5;
173 sb4.append(u"xx");
174 sb4.insert(2, cas);
175 sb5.append(u"xx", kUndefinedField, status);
176 assertSuccess("Appending to sb5", status);
177 sb5.insertCodePoint(2, cas, kUndefinedField, status);
178 assertSuccess("Inserting into sb5", status);
179 assertEqualsImpl(sb4, sb5);
180
181 UnicodeString sb6;
182 FormattedStringBuilder sb7;
183 sb6.append(cas);
184 if (U_IS_SUPPLEMENTARY(cas)) {
185 sb7.appendChar16(U16_TRAIL(cas), kUndefinedField, status);
186 sb7.insertChar16(0, U16_LEAD(cas), kUndefinedField, status);
187 } else {
188 sb7.insertChar16(0, cas, kUndefinedField, status);
189 }
190 assertSuccess("Insert/append into sb7", status);
191 assertEqualsImpl(sb6, sb7);
192 }
193 }
194
testCopy()195 void FormattedStringBuilderTest::testCopy() {
196 UErrorCode status = U_ZERO_ERROR;
197 for (UnicodeString str : EXAMPLE_STRINGS) {
198 FormattedStringBuilder sb1;
199 sb1.append(str, kUndefinedField, status);
200 assertSuccess("Appending to sb1 first time", status);
201 FormattedStringBuilder sb2(sb1);
202 assertTrue("Content should equal itself", sb1.contentEquals(sb2));
203
204 sb1.append("12345", kUndefinedField, status);
205 assertSuccess("Appending to sb1 second time", status);
206 assertFalse("Content should no longer equal itself", sb1.contentEquals(sb2));
207 }
208 }
209
testFields()210 void FormattedStringBuilderTest::testFields() {
211 typedef FormattedStringBuilder::Field Field;
212 UErrorCode status = U_ZERO_ERROR;
213 // Note: This is a C++11 for loop that calls the UnicodeString constructor on each iteration.
214 for (UnicodeString str : EXAMPLE_STRINGS) {
215 FormattedValueStringBuilderImpl sbi(kUndefinedField);
216 FormattedStringBuilder& sb = sbi.getStringRef();
217 sb.append(str, kUndefinedField, status);
218 assertSuccess("Appending to sb", status);
219 sb.append(str, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
220 assertSuccess("Appending to sb", status);
221 assertEquals("Reference string copied twice", str.length() * 2, sb.length());
222 for (int32_t i = 0; i < str.length(); i++) {
223 assertEquals("Null field first",
224 kUndefinedField.bits, sb.fieldAt(i).bits);
225 assertEquals("Currency field second",
226 Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD).bits,
227 sb.fieldAt(i + str.length()).bits);
228 }
229
230 // Very basic FieldPosition test. More robust tests happen in NumberFormatTest.
231 // Let NumberFormatTest also take care of FieldPositionIterator material.
232 FieldPosition fp(UNUM_CURRENCY_FIELD);
233 sbi.nextFieldPosition(fp, status);
234 assertSuccess("Populating the FieldPosition", status);
235 assertEquals("Currency start position", str.length(), fp.getBeginIndex());
236 assertEquals("Currency end position", str.length() * 2, fp.getEndIndex());
237
238 if (str.length() > 0) {
239 sb.insertCodePoint(2, 100, {UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD}, status);
240 assertSuccess("Inserting code point into sb", status);
241 assertEquals("New length", str.length() * 2 + 1, sb.length());
242 assertEquals("Integer field",
243 Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD).bits,
244 sb.fieldAt(2).bits);
245 }
246
247 FormattedStringBuilder old(sb);
248 sb.append(old, status);
249 assertSuccess("Appending to myself", status);
250 int32_t numNull = 0;
251 int32_t numCurr = 0;
252 int32_t numInt = 0;
253 for (int32_t i = 0; i < sb.length(); i++) {
254 auto field = sb.fieldAt(i);
255 assertEquals("Field should equal location in old",
256 old.fieldAt(i % old.length()).bits, field.bits);
257 if (field == kUndefinedField) {
258 numNull++;
259 } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
260 numCurr++;
261 } else if (field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) {
262 numInt++;
263 } else {
264 errln("Encountered unknown field");
265 }
266 }
267 assertEquals("Number of null fields", str.length() * 2, numNull);
268 assertEquals("Number of currency fields", numNull, numCurr);
269 assertEquals("Number of integer fields", str.length() > 0 ? 2 : 0, numInt);
270 }
271 }
272
testUnlimitedCapacity()273 void FormattedStringBuilderTest::testUnlimitedCapacity() {
274 UErrorCode status = U_ZERO_ERROR;
275 FormattedStringBuilder builder;
276 // The builder should never fail upon repeated appends.
277 for (int i = 0; i < 1000; i++) {
278 UnicodeString message("Iteration #");
279 message += Int64ToUnicodeString(i);
280 assertEquals(message, builder.length(), i);
281 builder.appendCodePoint(u'x', kUndefinedField, status);
282 assertSuccess(message, status);
283 assertEquals(message, builder.length(), i + 1);
284 }
285 }
286
testCodePoints()287 void FormattedStringBuilderTest::testCodePoints() {
288 UErrorCode status = U_ZERO_ERROR;
289 FormattedStringBuilder nsb;
290 assertEquals("First is -1 on empty string", -1, nsb.getFirstCodePoint());
291 assertEquals("Last is -1 on empty string", -1, nsb.getLastCodePoint());
292 assertEquals("Length is 0 on empty string", 0, nsb.codePointCount());
293
294 nsb.append(u"q", kUndefinedField, status);
295 assertSuccess("Spot 1", status);
296 assertEquals("First is q", u'q', nsb.getFirstCodePoint());
297 assertEquals("Last is q", u'q', nsb.getLastCodePoint());
298 assertEquals("0th is q", u'q', nsb.codePointAt(0));
299 assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
300 assertEquals("Code point count is 1", 1, nsb.codePointCount());
301
302 // is two char16s
303 nsb.append(u"", kUndefinedField, status);
304 assertSuccess("Spot 2" ,status);
305 assertEquals("First is still q", u'q', nsb.getFirstCodePoint());
306 assertEquals("Last is space ship", 128640, nsb.getLastCodePoint());
307 assertEquals("1st is space ship", 128640, nsb.codePointAt(1));
308 assertEquals("Before 1st is q", u'q', nsb.codePointBefore(1));
309 assertEquals("Before 3rd is space ship", 128640, nsb.codePointBefore(3));
310 assertEquals("Code point count is 2", 2, nsb.codePointCount());
311 }
312
testInsertOverflow()313 void FormattedStringBuilderTest::testInsertOverflow() {
314 if (quick || logKnownIssue("22047", "FormattedStringBuilder with long length crashes in toUnicodeString in CI Linux tests")) return;
315
316 // Setup the test fixture in sb, sb2, ustr.
317 UErrorCode status = U_ZERO_ERROR;
318 FormattedStringBuilder sb;
319 int32_t data_length = INT32_MAX / 2;
320 infoln("# log: setup start, data_length %d", data_length);
321 UnicodeString ustr(data_length, u'a', data_length); // set ustr to length 1073741823
322 sb.append(ustr, kUndefinedField, status); // set sb to length 1073741823
323 infoln("# log: setup 1 done, ustr len %d, sb len %d, status %s", ustr.length(), sb.length(), u_errorName(status));
324 assertSuccess("Setup the first FormattedStringBuilder", status);
325
326 FormattedStringBuilder sb2;
327 sb2.append(ustr, kUndefinedField, status);
328 sb2.insert(0, ustr, 0, data_length / 2, kUndefinedField, status); // set sb2 to length 1610612734
329 sb2.writeTerminator(status);
330 infoln("# log: setup 2 done, sb2 len %d, status %s", sb2.length(), u_errorName(status));
331 assertSuccess("Setup the second FormattedStringBuilder", status);
332
333 // The following should set ustr to have length 1610612734, but is currently crashing
334 // in the CI test "C: Linux Clang Exhaustive Tests (Ubuntu 18.04)", though not
335 // crashing when running exhaustive tests locally on e.g. macOS 12.4 on Intel).
336 // Hence the logKnownIssue skip above.
337 ustr = sb2.toUnicodeString();
338 // Note that trying the following alternative approach which sets ustr to length 1073741871
339 // (still long enough to test the expected behavior for the remainder of the code here)
340 // also crashed in "C: Linux Clang Exhaustive Tests (Ubuntu 18.04)":
341 // ustr.append(u"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",-1);
342
343 // Complete setting up the test fixture in sb, sb2 and ustr.
344 infoln("# log: setup 3 done, ustr len %d", ustr.length());
345
346 // Test splice() of the second UnicodeString
347 sb.splice(0, 1, ustr, 1, ustr.length(),
348 kUndefinedField, status);
349 infoln("# log: sb.splice 1 done, sb len %d, status %s", sb.length(), u_errorName(status));
350 assertEquals(
351 "splice() long text should not crash but return U_INPUT_TOO_LONG_ERROR",
352 U_INPUT_TOO_LONG_ERROR, status);
353
354 // Test sb.insert() of the first FormattedStringBuilder with the second one.
355 status = U_ZERO_ERROR;
356 sb.insert(0, sb2, status);
357 infoln("# log: sb.insert 1 done, sb len %d, status %s", sb.length(), u_errorName(status));
358 assertEquals(
359 "insert() long FormattedStringBuilder should not crash but return "
360 "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status);
361
362 // Test sb.insert() of the first FormattedStringBuilder with UnicodeString.
363 status = U_ZERO_ERROR;
364 sb.insert(0, ustr, 0, ustr.length(), kUndefinedField, status);
365 infoln("# log: sb.insert 2 done, sb len %d, status %s", sb.length(), u_errorName(status));
366 assertEquals(
367 "insert() long UnicodeString should not crash but return "
368 "U_INPUT_TOO_LONG_ERROR", U_INPUT_TOO_LONG_ERROR, status);
369 }
370
assertEqualsImpl(const UnicodeString & a,const FormattedStringBuilder & b)371 void FormattedStringBuilderTest::assertEqualsImpl(const UnicodeString &a, const FormattedStringBuilder &b) {
372 // TODO: Why won't this compile without the IntlTest:: qualifier?
373 IntlTest::assertEquals("Lengths should be the same", a.length(), b.length());
374 IntlTest::assertEquals("Code point counts should be the same", a.countChar32(), b.codePointCount());
375
376 if (a.length() != b.length()) {
377 return;
378 }
379
380 for (int32_t i = 0; i < a.length(); i++) {
381 IntlTest::assertEquals(
382 UnicodeString(u"Char at position ") + Int64ToUnicodeString(i) +
383 UnicodeString(u" in \"") + a + UnicodeString("\" versus \"") +
384 b.toUnicodeString() + UnicodeString("\""), a.charAt(i), b.charAt(i));
385 }
386 }
387
388
createFormattedStringBuilderTest()389 extern IntlTest *createFormattedStringBuilderTest() {
390 return new FormattedStringBuilderTest();
391 }
392
393 #endif /* #if !UCONFIG_NO_FORMATTING */
394