1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "numbertest.h"
9 #include "numparse_impl.h"
10 #include "static_unicode_sets.h"
11 #include "unicode/dcfmtsym.h"
12 #include "unicode/testlog.h"
13
14 #include <cmath>
15 #include <numparse_affixes.h>
16
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)17 void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
18 if (exec) {
19 logln("TestSuite NumberParserTest: ");
20 }
21 TESTCASE_AUTO_BEGIN;
22 TESTCASE_AUTO(testBasic);
23 TESTCASE_AUTO(testSeriesMatcher);
24 TESTCASE_AUTO(testCombinedCurrencyMatcher);
25 TESTCASE_AUTO(testAffixPatternMatcher);
26 TESTCASE_AUTO(test20360_BidiOverflow);
27 TESTCASE_AUTO(testInfiniteRecursion);
28 TESTCASE_AUTO_END;
29 }
30
testBasic()31 void NumberParserTest::testBasic() {
32 IcuTestErrorCode status(*this, "testBasic");
33
34 static const struct TestCase {
35 int32_t flags;
36 const char16_t* inputString;
37 const char16_t* patternString;
38 int32_t expectedCharsConsumed;
39 double expectedResultDouble;
40 } cases[] = {{3, u"51423", u"0", 5, 51423.},
41 {3, u"51423x", u"0", 5, 51423.},
42 {3, u" 51423", u"0", 6, 51423.},
43 {3, u"51423 ", u"0", 5, 51423.},
44 {3, u"", u"0", 10, 51423.},
45 {3, u"x", u"0", 10, 51423.},
46 {3, u" ", u"0", 11, 51423.},
47 {3, u" ", u"0", 10, 51423.},
48 {7, u"51,423", u"#,##,##0", 6, 51423.},
49 {7, u" 51,423", u"#,##,##0", 7, 51423.},
50 {7, u"51,423 ", u"#,##,##0", 6, 51423.},
51 {7, u"51,423,", u"#,##,##0", 6, 51423.},
52 {7, u"51,423,,", u"#,##,##0", 6, 51423.},
53 {7, u"51,423.5", u"#,##,##0", 8, 51423.5},
54 {7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
55 {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
56 {7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
57 {7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
58 {7, u",", u"#,##,##0", 11, 51423.},
59 {7, u",,,", u"#,##,##0", 19, 78951423.},
60 {7, u",.", u"#,##,##0", 18, 78951.423},
61 {7, u",", u"#,##,##0", 11, 78000.},
62 {7, u",.", u"#,##,##0", 18, 78000.},
63 {7, u",.", u"#,##,##0", 18, 78000.023},
64 {7, u"..", u"#,##,##0", 11, 78.},
65 {7, u"1,", u"#,##,##0", 1, 1.},
66 {7, u"1,,", u"#,##,##0", 1, 1.},
67 {7, u"1.,", u"#,##,##0", 2, 1.},
68 {3, u"1,.", u"#,##,##0", 3, 1.},
69 {7, u"1..", u"#,##,##0", 2, 1.},
70 {3, u",1", u"#,##,##0", 2, 1.},
71 {3, u"1,1", u"#,##,##0", 1, 1.},
72 {3, u"1,1,", u"#,##,##0", 1, 1.},
73 {3, u"1,1,,", u"#,##,##0", 1, 1.},
74 {3, u"1,1,1", u"#,##,##0", 1, 1.},
75 {3, u"1,1,1,", u"#,##,##0", 1, 1.},
76 {3, u"1,1,1,,", u"#,##,##0", 1, 1.},
77 {3, u"-51423", u"0", 6, -51423.},
78 {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
79 {3, u"+51423", u"0", 6, 51423.},
80 {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
81 {3, u"%51423", u"0", 6, 51423.},
82 {3, u"51423%", u"0", 6, 51423.},
83 {3, u"51423%%", u"0", 6, 51423.},
84 {3, u"‰51423", u"0", 6, 51423.},
85 {3, u"51423‰", u"0", 6, 51423.},
86 {3, u"51423‰‰", u"0", 6, 51423.},
87 {3, u"∞", u"0", 1, INFINITY},
88 {3, u"-∞", u"0", 2, -INFINITY},
89 {3, u"@@@123 @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
90 {3, u"@@@123@@ ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
91 {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
92 {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
93 {3, u"514.23 USD", u"¤0", 10, 514.23},
94 {3, u"514.23 GBP", u"¤0", 10, 514.23},
95 {3, u"a b", u"a0b", 14, 51423.},
96 {3, u"-a b", u"a0b", 15, -51423.},
97 {3, u"a - b", u"a0b", 15, -51423.},
98 {3, u"", u"[0];(0)", 10, 51423.},
99 {3, u"[", u"[0];(0)", 11, 51423.},
100 {3, u"]", u"[0];(0)", 11, 51423.},
101 {3, u"[]", u"[0];(0)", 12, 51423.},
102 {3, u"(", u"[0];(0)", 11, -51423.},
103 {3, u")", u"[0];(0)", 11, -51423.},
104 {3, u"()", u"[0];(0)", 12, -51423.},
105 {3, u"", u"{0};{0}", 10, 51423.},
106 {3, u"{", u"{0};{0}", 11, 51423.},
107 {3, u"}", u"{0};{0}", 11, 51423.},
108 {3, u"{}", u"{0};{0}", 12, 51423.},
109 {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
110 {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
111 {3, u".E", u"0", 12, 5142.},
112 {3, u".E-", u"0", 13, 0.005142},
113 {3, u".e-", u"0", 13, 0.005142},
114 {3, u"5.142e+3", u"0", 8, 5142.0 },
115 {3, u"5.142\u200Ee+3", u"0", 9, 5142.0},
116 {3, u"5.142e\u200E+3", u"0", 9, 5142.0},
117 {3, u"5.142e+\u200E3", u"0", 9, 5142.0},
118 {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
119 {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
120 {3, u"1.23", u"0;0", 6, 1.23},
121 {3, u"1.23", u"0;0", 6, -1.23},
122 {3, u".00", u"0", 3, 0.0},
123 {3, u" 1,234", u"a0", 35, 1234.}, // should not hang
124 {3, u"NaN", u"0", 3, NAN},
125 {3, u"NaN E5", u"0", 6, NAN},
126 {3, u"0", u"0", 1, 0.0}};
127
128 parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
129 for (auto& cas : cases) {
130 UnicodeString inputString(cas.inputString);
131 UnicodeString patternString(cas.patternString);
132 LocalPointer<const NumberParserImpl> parser(
133 NumberParserImpl::createSimpleParser(
134 Locale("en"), patternString, parseFlags, status));
135 if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
136 continue;
137 }
138 UnicodeString message =
139 UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
140
141 if (0 != (cas.flags & 0x01)) {
142 // Test greedy code path
143 ParsedNumber resultObject;
144 parser->parse(inputString, true, resultObject, status);
145 assertTrue("Greedy Parse failed: " + message, resultObject.success());
146 assertEquals("Greedy Parse failed: " + message,
147 cas.expectedCharsConsumed, resultObject.charEnd);
148 assertEquals("Greedy Parse failed: " + message,
149 cas.expectedResultDouble, resultObject.getDouble(status));
150 }
151
152 if (0 != (cas.flags & 0x02)) {
153 // Test slow code path
154 ParsedNumber resultObject;
155 parser->parse(inputString, false, resultObject, status);
156 assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
157 assertEquals(
158 "Non-Greedy Parse failed: " + message,
159 cas.expectedCharsConsumed,
160 resultObject.charEnd);
161 assertEquals(
162 "Non-Greedy Parse failed: " + message,
163 cas.expectedResultDouble,
164 resultObject.getDouble(status));
165 }
166
167 if (0 != (cas.flags & 0x04)) {
168 // Test with strict separators
169 parser.adoptInstead(
170 NumberParserImpl::createSimpleParser(
171 Locale("en"),
172 patternString,
173 parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
174 status));
175 ParsedNumber resultObject;
176 parser->parse(inputString, true, resultObject, status);
177 assertTrue("Strict Parse failed: " + message, resultObject.success());
178 assertEquals("Strict Parse failed: " + message,
179 cas.expectedCharsConsumed, resultObject.charEnd);
180 assertEquals("Strict Parse failed: " + message,
181 cas.expectedResultDouble, resultObject.getDouble(status));
182 }
183 }
184 }
185
testSeriesMatcher()186 void NumberParserTest::testSeriesMatcher() {
187 IcuTestErrorCode status(*this, "testSeriesMatcher");
188
189 DecimalFormatSymbols symbols("en", status);
190 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
191 return;
192 }
193 PlusSignMatcher m0(symbols, false);
194 MinusSignMatcher m1(symbols, false);
195 IgnorablesMatcher m2(0);
196 PercentMatcher m3(symbols);
197 IgnorablesMatcher m4(0);
198
199 ArraySeriesMatcher::MatcherArray matchers(5, status);
200 status.assertSuccess();
201 matchers[0] = &m0;
202 matchers[1] = &m1;
203 matchers[2] = &m2;
204 matchers[3] = &m3;
205 matchers[4] = &m4;
206 ArraySeriesMatcher series(matchers, 5);
207
208 assertFalse("", series.smokeTest(StringSegment(u"x", false)));
209 assertFalse("", series.smokeTest(StringSegment(u"-", false)));
210 assertTrue("", series.smokeTest(StringSegment(u"+", false)));
211
212 static const struct TestCase {
213 const char16_t* input;
214 int32_t expectedOffset;
215 bool expectedMaybeMore;
216 } cases[] = {{u"", 0, true},
217 {u" ", 0, false},
218 {u"$", 0, false},
219 {u"+", 0, true},
220 {u" +", 0, false},
221 {u"+-", 0, true},
222 {u"+ -", 0, false},
223 {u"+- ", 0, true},
224 {u"+- $", 0, false},
225 {u"+-%", 3, true},
226 {u" +- % ", 0, false},
227 {u"+- % ", 7, true},
228 {u"+-%$", 3, false}};
229
230 for (auto& cas : cases) {
231 UnicodeString input(cas.input);
232
233 StringSegment segment(input, false);
234 ParsedNumber result;
235 bool actualMaybeMore = series.match(segment, result, status);
236 int actualOffset = segment.getOffset();
237
238 assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
239 assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
240 }
241 }
242
testCombinedCurrencyMatcher()243 void NumberParserTest::testCombinedCurrencyMatcher() {
244 IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
245
246 IgnorablesMatcher ignorables(0);
247 Locale locale = Locale::getEnglish();
248
249 DecimalFormatSymbols dfs(locale, status);
250 if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
251 return;
252 }
253 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
254 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
255 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
256
257 AffixTokenMatcherSetupData affixSetupData = {
258 currencySymbols, {"en", status}, ignorables, "en", 0};
259 AffixTokenMatcherWarehouse warehouse(&affixSetupData);
260 NumberParseMatcher& matcher = warehouse.currency(status);
261 affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
262 AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
263 NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
264
265 static const struct TestCase {
266 const char16_t* input;
267 const char16_t* expectedCurrencyCode;
268 const char16_t* expectedNoForeignCurrencyCode;
269 } cases[]{{u"", u"", u""},
270 {u"FOO", u"", u""},
271 {u"USD", u"USD", u""},
272 {u"$", u"USD", u""},
273 {u"US dollars", u"USD", u""},
274 {u"eu", u"", u""},
275 {u"euros", u"EUR", u""},
276 {u"ICU", u"ICU", u"ICU"},
277 {u"IU$", u"ICU", u"ICU"}};
278 for (auto& cas : cases) {
279 UnicodeString input(cas.input);
280
281 {
282 StringSegment segment(input, false);
283 ParsedNumber result;
284 matcher.match(segment, result, status);
285 assertEquals(
286 "Parsing " + input,
287 cas.expectedCurrencyCode,
288 result.currencyCode);
289 assertEquals(
290 "Whole string on " + input,
291 cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
292 result.charEnd);
293 }
294 {
295 StringSegment segment(input, false);
296 ParsedNumber result;
297 matcherNoForeign.match(segment, result, status);
298 assertEquals(
299 "[no foreign] Parsing " + input,
300 cas.expectedNoForeignCurrencyCode,
301 result.currencyCode);
302 assertEquals(
303 "[no foreign] Whole string on " + input,
304 cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
305 result.charEnd);
306 }
307 }
308 }
309
testAffixPatternMatcher()310 void NumberParserTest::testAffixPatternMatcher() {
311 IcuTestErrorCode status(*this, "testAffixPatternMatcher");
312 Locale locale = Locale::getEnglish();
313 IgnorablesMatcher ignorables(0);
314
315 DecimalFormatSymbols dfs(locale, status);
316 dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
317 dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
318 CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
319
320 AffixTokenMatcherSetupData affixSetupData = {
321 currencySymbols, {"en", status}, ignorables, "en", 0};
322 AffixTokenMatcherWarehouse warehouse(&affixSetupData);
323
324 static const struct TestCase {
325 bool exactMatch;
326 const char16_t* affixPattern;
327 int32_t expectedMatcherLength;
328 const char16_t* sampleParseableString;
329 } cases[] = {{false, u"-", 1, u"-"},
330 {false, u"+-%", 5, u"+-%"},
331 {true, u"+-%", 3, u"+-%"},
332 {false, u"ab c", 5, u"a bc"},
333 {true, u"abc", 3, u"abc"},
334 {false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}};
335
336 for (auto& cas : cases) {
337 UnicodeString affixPattern(cas.affixPattern);
338 UnicodeString sampleParseableString(cas.sampleParseableString);
339 int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
340
341 bool success;
342 AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
343 affixPattern, warehouse, parseFlags, &success, status);
344 if (!status.errDataIfFailureAndReset("Creation should be successful")) {
345
346 // Check that the matcher has the expected number of children
347 assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
348
349 // Check that the matcher works on a sample string
350 StringSegment segment(sampleParseableString, false);
351 ParsedNumber result;
352 matcher.match(segment, result, status);
353 assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
354 }
355 }
356 }
357
test20360_BidiOverflow()358 void NumberParserTest::test20360_BidiOverflow() {
359 IcuTestErrorCode status(*this, "test20360_BidiOverflow");
360 UnicodeString inputString;
361 inputString.append(u'-');
362 for (int32_t i=0; i<100000; i++) {
363 inputString.append(u'\u061C');
364 }
365 inputString.append(u'5');
366
367 LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
368 if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
369 return;
370 }
371
372 ParsedNumber resultObject;
373 parser->parse(inputString, true, resultObject, status);
374 assertTrue("Greedy Parse, success", resultObject.success());
375 assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
376 assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
377
378 resultObject.clear();
379 parser->parse(inputString, false, resultObject, status);
380 assertFalse("Non-Greedy Parse, success", resultObject.success());
381 assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
382 }
383
testInfiniteRecursion()384 void NumberParserTest::testInfiniteRecursion() {
385 IcuTestErrorCode status(*this, "testInfiniteRecursion");
386 UnicodeString inputString;
387 inputString.append(u'-');
388 for (int32_t i=0; i<200; i++) {
389 inputString.append(u'\u061C');
390 }
391 inputString.append(u'5');
392
393 LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
394 if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
395 return;
396 }
397
398 ParsedNumber resultObject;
399 parser->parse(inputString, false, resultObject, status);
400 assertFalse("Default recursion limit, success", resultObject.success());
401 assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
402
403 parser.adoptInstead(NumberParserImpl::createSimpleParser(
404 "en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
405 resultObject.clear();
406 parser->parse(inputString, false, resultObject, status);
407 assertTrue("Unlimited recursion, success", resultObject.success());
408 assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
409 assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
410 }
411
412
413 #endif
414