• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "numbertest.h"
9 #include "numparse_impl.h"
10 #include "static_unicode_sets.h"
11 #include "unicode/dcfmtsym.h"
12 #include "unicode/testlog.h"
13 
14 #include <cmath>
15 #include <numparse_affixes.h>
16 
runIndexedTest(int32_t index,UBool exec,const char * & name,char *)17 void NumberParserTest::runIndexedTest(int32_t index, UBool exec, const char*& name, char*) {
18     if (exec) {
19         logln("TestSuite NumberParserTest: ");
20     }
21     TESTCASE_AUTO_BEGIN;
22         TESTCASE_AUTO(testBasic);
23         TESTCASE_AUTO(testSeriesMatcher);
24         TESTCASE_AUTO(testCombinedCurrencyMatcher);
25         TESTCASE_AUTO(testAffixPatternMatcher);
26         TESTCASE_AUTO(test20360_BidiOverflow);
27         TESTCASE_AUTO(testInfiniteRecursion);
28     TESTCASE_AUTO_END;
29 }
30 
testBasic()31 void NumberParserTest::testBasic() {
32     IcuTestErrorCode status(*this, "testBasic");
33 
34     static const struct TestCase {
35         int32_t flags;
36         const char16_t* inputString;
37         const char16_t* patternString;
38         int32_t expectedCharsConsumed;
39         double expectedResultDouble;
40     } cases[] = {{3, u"51423", u"0", 5, 51423.},
41                  {3, u"51423x", u"0", 5, 51423.},
42                  {3, u" 51423", u"0", 6, 51423.},
43                  {3, u"51423 ", u"0", 5, 51423.},
44                  {3, u"����������", u"0", 10, 51423.},
45                  {3, u"����������x", u"0", 10, 51423.},
46                  {3, u" ����������", u"0", 11, 51423.},
47                  {3, u"���������� ", u"0", 10, 51423.},
48                  {7, u"51,423", u"#,##,##0", 6, 51423.},
49                  {7, u" 51,423", u"#,##,##0", 7, 51423.},
50                  {7, u"51,423 ", u"#,##,##0", 6, 51423.},
51                  {7, u"51,423,", u"#,##,##0", 6, 51423.},
52                  {7, u"51,423,,", u"#,##,##0", 6, 51423.},
53                  {7, u"51,423.5", u"#,##,##0", 8, 51423.5},
54                  {7, u"51,423.5,", u"#,##,##0", 8, 51423.5},
55                  {7, u"51,423.5,,", u"#,##,##0", 8, 51423.5},
56                  {7, u"51,423.5.", u"#,##,##0", 8, 51423.5},
57                  {7, u"51,423.5..", u"#,##,##0", 8, 51423.5},
58                  {7, u"����,������", u"#,##,##0", 11, 51423.},
59                  {7, u"��,����,����,������", u"#,##,##0", 19, 78951423.},
60                  {7, u"����,������.������", u"#,##,##0", 18, 78951.423},
61                  {7, u"����,������", u"#,##,##0", 11, 78000.},
62                  {7, u"����,������.������", u"#,##,##0", 18, 78000.},
63                  {7, u"����,������.������", u"#,##,##0", 18, 78000.023},
64                  {7, u"����.������.������", u"#,##,##0", 11, 78.},
65                  {7, u"1,", u"#,##,##0", 1, 1.},
66                  {7, u"1,,", u"#,##,##0", 1, 1.},
67                  {7, u"1.,", u"#,##,##0", 2, 1.},
68                  {3, u"1,.", u"#,##,##0", 3, 1.},
69                  {7, u"1..", u"#,##,##0", 2, 1.},
70                  {3, u",1", u"#,##,##0", 2, 1.},
71                  {3, u"1,1", u"#,##,##0", 1, 1.},
72                  {3, u"1,1,", u"#,##,##0", 1, 1.},
73                  {3, u"1,1,,", u"#,##,##0", 1, 1.},
74                  {3, u"1,1,1", u"#,##,##0", 1, 1.},
75                  {3, u"1,1,1,", u"#,##,##0", 1, 1.},
76                  {3, u"1,1,1,,", u"#,##,##0", 1, 1.},
77                  {3, u"-51423", u"0", 6, -51423.},
78                  {3, u"51423-", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
79                  {3, u"+51423", u"0", 6, 51423.},
80                  {3, u"51423+", u"0", 5, 51423.}, // plus and minus sign by default do NOT match after
81                  {3, u"%51423", u"0", 6, 51423.},
82                  {3, u"51423%", u"0", 6, 51423.},
83                  {3, u"51423%%", u"0", 6, 51423.},
84                  {3, u"‰51423", u"0", 6, 51423.},
85                  {3, u"51423‰", u"0", 6, 51423.},
86                  {3, u"51423‰‰", u"0", 6, 51423.},
87                  {3, u"∞", u"0", 1, INFINITY},
88                  {3, u"-∞", u"0", 2, -INFINITY},
89                  {3, u"@@@123  @@", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
90                  {3, u"@@@123@@  ", u"0", 6, 123.}, // TODO: Should padding be strong instead of weak?
91                  {3, u"a51423US dollars", u"a0¤¤¤", 16, 51423.},
92                  {3, u"a 51423 US dollars", u"a0¤¤¤", 18, 51423.},
93                  {3, u"514.23 USD", u"¤0", 10, 514.23},
94                  {3, u"514.23 GBP", u"¤0", 10, 514.23},
95                  {3, u"a ���������� b", u"a0b", 14, 51423.},
96                  {3, u"-a ���������� b", u"a0b", 15, -51423.},
97                  {3, u"a -���������� b", u"a0b", 15, -51423.},
98                  {3, u"����������", u"[0];(0)", 10, 51423.},
99                  {3, u"[����������", u"[0];(0)", 11, 51423.},
100                  {3, u"����������]", u"[0];(0)", 11, 51423.},
101                  {3, u"[����������]", u"[0];(0)", 12, 51423.},
102                  {3, u"(����������", u"[0];(0)", 11, -51423.},
103                  {3, u"����������)", u"[0];(0)", 11, -51423.},
104                  {3, u"(����������)", u"[0];(0)", 12, -51423.},
105                  {3, u"����������", u"{0};{0}", 10, 51423.},
106                  {3, u"{����������", u"{0};{0}", 11, 51423.},
107                  {3, u"����������}", u"{0};{0}", 11, 51423.},
108                  {3, u"{����������}", u"{0};{0}", 12, 51423.},
109                  {1, u"a40b", u"a0'0b'", 3, 40.}, // greedy code path thinks "40" is the number
110                  {2, u"a40b", u"a0'0b'", 4, 4.}, // slow code path finds the suffix "0b"
111                  {3, u"��.������E��", u"0", 12, 5142.},
112                  {3, u"��.������E-��", u"0", 13, 0.005142},
113                  {3, u"��.������e-��", u"0", 13, 0.005142},
114                  {3, u"5.142e+3", u"0", 8, 5142.0 },
115                  {3, u"5.142\u200Ee+3", u"0", 9, 5142.0},
116                  {3, u"5.142e\u200E+3", u"0", 9, 5142.0},
117                  {3, u"5.142e+\u200E3", u"0", 9, 5142.0},
118                  {7, u"5,142.50 Canadian dollars", u"#,##,##0 ¤¤¤", 25, 5142.5},
119                  {3, u"a$ b5", u"a ¤ b0", 5, 5.0},
120                  {3, u"��1.23", u"��0;��0", 6, 1.23},
121                  {3, u"��1.23", u"��0;��0", 6, -1.23},
122                  {3, u".00", u"0", 3, 0.0},
123                  {3, u"                              1,234", u"a0", 35, 1234.}, // should not hang
124                  {3, u"NaN", u"0", 3, NAN},
125                  {3, u"NaN E5", u"0", 6, NAN},
126                  {3, u"0", u"0", 1, 0.0}};
127 
128     parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
129     for (auto& cas : cases) {
130         UnicodeString inputString(cas.inputString);
131         UnicodeString patternString(cas.patternString);
132         LocalPointer<const NumberParserImpl> parser(
133                 NumberParserImpl::createSimpleParser(
134                         Locale("en"), patternString, parseFlags, status));
135         if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
136             continue;
137         }
138         UnicodeString message =
139                 UnicodeString("Input <") + inputString + UnicodeString("> Parser ") + parser->toString();
140 
141         if (0 != (cas.flags & 0x01)) {
142             // Test greedy code path
143             ParsedNumber resultObject;
144             parser->parse(inputString, true, resultObject, status);
145             assertTrue("Greedy Parse failed: " + message, resultObject.success());
146             assertEquals("Greedy Parse failed: " + message,
147                 cas.expectedCharsConsumed, resultObject.charEnd);
148             assertEquals("Greedy Parse failed: " + message,
149                 cas.expectedResultDouble, resultObject.getDouble(status));
150         }
151 
152         if (0 != (cas.flags & 0x02)) {
153             // Test slow code path
154             ParsedNumber resultObject;
155             parser->parse(inputString, false, resultObject, status);
156             assertTrue("Non-Greedy Parse failed: " + message, resultObject.success());
157             assertEquals(
158                     "Non-Greedy Parse failed: " + message,
159                     cas.expectedCharsConsumed,
160                     resultObject.charEnd);
161             assertEquals(
162                     "Non-Greedy Parse failed: " + message,
163                     cas.expectedResultDouble,
164                     resultObject.getDouble(status));
165         }
166 
167         if (0 != (cas.flags & 0x04)) {
168             // Test with strict separators
169             parser.adoptInstead(
170                     NumberParserImpl::createSimpleParser(
171                             Locale("en"),
172                             patternString,
173                             parseFlags | PARSE_FLAG_STRICT_GROUPING_SIZE,
174                             status));
175             ParsedNumber resultObject;
176             parser->parse(inputString, true, resultObject, status);
177             assertTrue("Strict Parse failed: " + message, resultObject.success());
178             assertEquals("Strict Parse failed: " + message,
179                 cas.expectedCharsConsumed, resultObject.charEnd);
180             assertEquals("Strict Parse failed: " + message,
181                 cas.expectedResultDouble, resultObject.getDouble(status));
182         }
183     }
184 }
185 
testSeriesMatcher()186 void NumberParserTest::testSeriesMatcher() {
187     IcuTestErrorCode status(*this, "testSeriesMatcher");
188 
189     DecimalFormatSymbols symbols("en", status);
190     if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
191         return;
192     }
193     PlusSignMatcher m0(symbols, false);
194     MinusSignMatcher m1(symbols, false);
195     IgnorablesMatcher m2(0);
196     PercentMatcher m3(symbols);
197     IgnorablesMatcher m4(0);
198 
199     ArraySeriesMatcher::MatcherArray matchers(5);
200     matchers[0] = &m0;
201     matchers[1] = &m1;
202     matchers[2] = &m2;
203     matchers[3] = &m3;
204     matchers[4] = &m4;
205     ArraySeriesMatcher series(matchers, 5);
206 
207     assertFalse("", series.smokeTest(StringSegment(u"x", false)));
208     assertFalse("", series.smokeTest(StringSegment(u"-", false)));
209     assertTrue("", series.smokeTest(StringSegment(u"+", false)));
210 
211     static const struct TestCase {
212         const char16_t* input;
213         int32_t expectedOffset;
214         bool expectedMaybeMore;
215     } cases[] = {{u"", 0, true},
216                  {u" ", 0, false},
217                  {u"$", 0, false},
218                  {u"+", 0, true},
219                  {u" +", 0, false},
220                  {u"+-", 0, true},
221                  {u"+ -", 0, false},
222                  {u"+-  ", 0, true},
223                  {u"+-  $", 0, false},
224                  {u"+-%", 3, true},
225                  {u"  +-  %  ", 0, false},
226                  {u"+-  %  ", 7, true},
227                  {u"+-%$", 3, false}};
228 
229     for (auto& cas : cases) {
230         UnicodeString input(cas.input);
231 
232         StringSegment segment(input, false);
233         ParsedNumber result;
234         bool actualMaybeMore = series.match(segment, result, status);
235         int actualOffset = segment.getOffset();
236 
237         assertEquals("'" + input + "'", cas.expectedOffset, actualOffset);
238         assertEquals("'" + input + "'", cas.expectedMaybeMore, actualMaybeMore);
239     }
240 }
241 
testCombinedCurrencyMatcher()242 void NumberParserTest::testCombinedCurrencyMatcher() {
243     IcuTestErrorCode status(*this, "testCombinedCurrencyMatcher");
244 
245     IgnorablesMatcher ignorables(0);
246     Locale locale = Locale::getEnglish();
247 
248     DecimalFormatSymbols dfs(locale, status);
249     if (status.errDataIfFailureAndReset("Failure in DecimalFormtSymbols constructor")) {
250         return;
251     }
252     dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
253     dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
254     CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
255 
256     AffixTokenMatcherSetupData affixSetupData = {
257             currencySymbols, {"en", status}, ignorables, "en", 0};
258     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
259     NumberParseMatcher& matcher = warehouse.currency(status);
260     affixSetupData.parseFlags = PARSE_FLAG_NO_FOREIGN_CURRENCY;
261     AffixTokenMatcherWarehouse warehouseNoForeign(&affixSetupData);
262     NumberParseMatcher& matcherNoForeign = warehouseNoForeign.currency(status);
263 
264     static const struct TestCase {
265         const char16_t* input;
266         const char16_t* expectedCurrencyCode;
267         const char16_t* expectedNoForeignCurrencyCode;
268     } cases[]{{u"", u"", u""},
269               {u"FOO", u"", u""},
270               {u"USD", u"USD", u""},
271               {u"$", u"USD", u""},
272               {u"US dollars", u"USD", u""},
273               {u"eu", u"", u""},
274               {u"euros", u"EUR", u""},
275               {u"ICU", u"ICU", u"ICU"},
276               {u"IU$", u"ICU", u"ICU"}};
277     for (auto& cas : cases) {
278         UnicodeString input(cas.input);
279 
280         {
281             StringSegment segment(input, false);
282             ParsedNumber result;
283             matcher.match(segment, result, status);
284             assertEquals(
285                     "Parsing " + input,
286                     cas.expectedCurrencyCode,
287                     result.currencyCode);
288             assertEquals(
289                     "Whole string on " + input,
290                     cas.expectedCurrencyCode[0] == 0 ? 0 : input.length(),
291                     result.charEnd);
292         }
293         {
294             StringSegment segment(input, false);
295             ParsedNumber result;
296             matcherNoForeign.match(segment, result, status);
297             assertEquals(
298                     "[no foreign] Parsing " + input,
299                     cas.expectedNoForeignCurrencyCode,
300                     result.currencyCode);
301             assertEquals(
302                     "[no foreign] Whole string on " + input,
303                     cas.expectedNoForeignCurrencyCode[0] == 0 ? 0 : input.length(),
304                     result.charEnd);
305         }
306     }
307 }
308 
testAffixPatternMatcher()309 void NumberParserTest::testAffixPatternMatcher() {
310     IcuTestErrorCode status(*this, "testAffixPatternMatcher");
311     Locale locale = Locale::getEnglish();
312     IgnorablesMatcher ignorables(0);
313 
314     DecimalFormatSymbols dfs(locale, status);
315     dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$", status);
316     dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU", status);
317     CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
318 
319     AffixTokenMatcherSetupData affixSetupData = {
320             currencySymbols, {"en", status}, ignorables, "en", 0};
321     AffixTokenMatcherWarehouse warehouse(&affixSetupData);
322 
323     static const struct TestCase {
324         bool exactMatch;
325         const char16_t* affixPattern;
326         int32_t expectedMatcherLength;
327         const char16_t* sampleParseableString;
328     } cases[] = {{false, u"-", 1, u"-"},
329                  {false, u"+-%", 5, u"+-%"},
330                  {true, u"+-%", 3, u"+-%"},
331                  {false, u"ab c", 5, u"a    bc"},
332                  {true, u"abc", 3, u"abc"},
333                  {false, u"hello-to+this%very¤long‰string", 59, u"hello-to+this%very USD long‰string"}};
334 
335     for (auto& cas : cases) {
336         UnicodeString affixPattern(cas.affixPattern);
337         UnicodeString sampleParseableString(cas.sampleParseableString);
338         int parseFlags = cas.exactMatch ? PARSE_FLAG_EXACT_AFFIX : 0;
339 
340         bool success;
341         AffixPatternMatcher matcher = AffixPatternMatcher::fromAffixPattern(
342                 affixPattern, warehouse, parseFlags, &success, status);
343         if (!status.errDataIfFailureAndReset("Creation should be successful")) {
344 
345             // Check that the matcher has the expected number of children
346             assertEquals(affixPattern + " " + cas.exactMatch, cas.expectedMatcherLength, matcher.length());
347 
348             // Check that the matcher works on a sample string
349             StringSegment segment(sampleParseableString, false);
350             ParsedNumber result;
351             matcher.match(segment, result, status);
352             assertEquals(affixPattern + " " + cas.exactMatch, sampleParseableString.length(), result.charEnd);
353         }
354     }
355 }
356 
test20360_BidiOverflow()357 void NumberParserTest::test20360_BidiOverflow() {
358     IcuTestErrorCode status(*this, "test20360_BidiOverflow");
359     UnicodeString inputString;
360     inputString.append(u'-');
361     for (int32_t i=0; i<100000; i++) {
362         inputString.append(u'\u061C');
363     }
364     inputString.append(u'5');
365 
366     LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
367     if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
368         return;
369     }
370 
371     ParsedNumber resultObject;
372     parser->parse(inputString, true, resultObject, status);
373     assertTrue("Greedy Parse, success", resultObject.success());
374     assertEquals("Greedy Parse, chars consumed", 100002, resultObject.charEnd);
375     assertEquals("Greedy Parse, expected double", -5.0, resultObject.getDouble(status));
376 
377     resultObject.clear();
378     parser->parse(inputString, false, resultObject, status);
379     assertFalse("Non-Greedy Parse, success", resultObject.success());
380     assertEquals("Non-Greedy Parse, chars consumed", 1, resultObject.charEnd);
381 }
382 
testInfiniteRecursion()383 void NumberParserTest::testInfiniteRecursion() {
384     IcuTestErrorCode status(*this, "testInfiniteRecursion");
385     UnicodeString inputString;
386     inputString.append(u'-');
387     for (int32_t i=0; i<200; i++) {
388         inputString.append(u'\u061C');
389     }
390     inputString.append(u'5');
391 
392     LocalPointer<const NumberParserImpl> parser(NumberParserImpl::createSimpleParser("en", u"0", 0, status));
393     if (status.errDataIfFailureAndReset("createSimpleParser() failed")) {
394         return;
395     }
396 
397     ParsedNumber resultObject;
398     parser->parse(inputString, false, resultObject, status);
399     assertFalse("Default recursion limit, success", resultObject.success());
400     assertEquals("Default recursion limit, chars consumed", 1, resultObject.charEnd);
401 
402     parser.adoptInstead(NumberParserImpl::createSimpleParser(
403         "en", u"0", PARSE_FLAG_ALLOW_INFINITE_RECURSION, status));
404     resultObject.clear();
405     parser->parse(inputString, false, resultObject, status);
406     assertTrue("Unlimited recursion, success", resultObject.success());
407     assertEquals("Unlimited recursion, chars consumed", 202, resultObject.charEnd);
408     assertEquals("Unlimited recursion, expected double", -5.0, resultObject.getDouble(status));
409 }
410 
411 
412 #endif
413