• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include <typeinfo>
13 #include <array>
14 #include "number_types.h"
15 #include "number_patternstring.h"
16 #include "numparse_types.h"
17 #include "numparse_impl.h"
18 #include "numparse_symbols.h"
19 #include "numparse_decimal.h"
20 #include "unicode/numberformatter.h"
21 #include "cstr.h"
22 #include "number_mapper.h"
23 #include "static_unicode_sets.h"
24 
25 using namespace icu;
26 using namespace icu::number;
27 using namespace icu::number::impl;
28 using namespace icu::numparse;
29 using namespace icu::numparse::impl;
30 
31 
32 NumberParseMatcher::~NumberParseMatcher() = default;
33 
34 
35 NumberParserImpl*
createSimpleParser(const Locale & locale,const UnicodeString & patternString,parse_flags_t parseFlags,UErrorCode & status)36 NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString& patternString,
37                                      parse_flags_t parseFlags, UErrorCode& status) {
38 
39     LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
40     DecimalFormatSymbols symbols(locale, status);
41 
42     parser->fLocalMatchers.ignorables = {unisets::DEFAULT_IGNORABLES};
43     IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
44 
45     DecimalFormatSymbols dfs(locale, status);
46     dfs.setSymbol(DecimalFormatSymbols::kCurrencySymbol, u"IU$");
47     dfs.setSymbol(DecimalFormatSymbols::kIntlCurrencySymbol, u"ICU");
48     CurrencySymbols currencySymbols({u"ICU", status}, locale, dfs, status);
49 
50     ParsedPatternInfo patternInfo;
51     PatternParser::parseToPatternInfo(patternString, patternInfo, status);
52 
53     // The following statements set up the affix matchers.
54     AffixTokenMatcherSetupData affixSetupData = {
55             currencySymbols, symbols, ignorables, locale, parseFlags};
56     parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
57     parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
58     parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
59             patternInfo, *parser, ignorables, parseFlags, status);
60 
61     Grouper grouper = Grouper::forStrategy(UNUM_GROUPING_AUTO);
62     grouper.setLocaleData(patternInfo, locale);
63 
64     parser->addMatcher(parser->fLocalMatchers.ignorables);
65     parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
66     parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
67     parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
68     parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
69     parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
70     parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
71     parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
72     parser->addMatcher(parser->fLocalMatchers.padding = {u"@"});
73     parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
74     parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
75 //    parser.addMatcher(new RequireNumberMatcher());
76 
77     parser->freeze();
78     return parser.orphan();
79 }
80 
81 NumberParserImpl*
createParserFromProperties(const number::impl::DecimalFormatProperties & properties,const DecimalFormatSymbols & symbols,bool parseCurrency,UErrorCode & status)82 NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatProperties& properties,
83                                              const DecimalFormatSymbols& symbols, bool parseCurrency,
84                                              UErrorCode& status) {
85     Locale locale = symbols.getLocale();
86     PropertiesAffixPatternProvider localPAPP;
87     CurrencyPluralInfoAffixProvider localCPIAP;
88     AffixPatternProvider* affixProvider;
89     if (properties.currencyPluralInfo.fPtr.isNull()) {
90         localPAPP.setTo(properties, status);
91         affixProvider = &localPAPP;
92     } else {
93         localCPIAP.setTo(*properties.currencyPluralInfo.fPtr, properties, status);
94         affixProvider = &localCPIAP;
95     }
96     if (affixProvider == nullptr || U_FAILURE(status)) { return nullptr; }
97     CurrencyUnit currency = resolveCurrency(properties, locale, status);
98     CurrencySymbols currencySymbols(currency, locale, symbols, status);
99     bool isStrict = properties.parseMode.getOrDefault(PARSE_MODE_STRICT) == PARSE_MODE_STRICT;
100     Grouper grouper = Grouper::forProperties(properties);
101     int parseFlags = 0;
102     if (affixProvider == nullptr || U_FAILURE(status)) { return nullptr; }
103     if (!properties.parseCaseSensitive) {
104         parseFlags |= PARSE_FLAG_IGNORE_CASE;
105     }
106     if (properties.parseIntegerOnly) {
107         parseFlags |= PARSE_FLAG_INTEGER_ONLY;
108     }
109     if (properties.signAlwaysShown) {
110         parseFlags |= PARSE_FLAG_PLUS_SIGN_ALLOWED;
111     }
112     if (isStrict) {
113         parseFlags |= PARSE_FLAG_STRICT_GROUPING_SIZE;
114         parseFlags |= PARSE_FLAG_STRICT_SEPARATORS;
115         parseFlags |= PARSE_FLAG_USE_FULL_AFFIXES;
116         parseFlags |= PARSE_FLAG_EXACT_AFFIX;
117     } else {
118         parseFlags |= PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
119     }
120     if (grouper.getPrimary() <= 0) {
121         parseFlags |= PARSE_FLAG_GROUPING_DISABLED;
122     }
123     if (parseCurrency || affixProvider->hasCurrencySign()) {
124         parseFlags |= PARSE_FLAG_MONETARY_SEPARATORS;
125     }
126     if (!parseCurrency) {
127         parseFlags |= PARSE_FLAG_NO_FOREIGN_CURRENCY;
128     }
129 
130     LocalPointer<NumberParserImpl> parser(new NumberParserImpl(parseFlags));
131 
132     parser->fLocalMatchers.ignorables = {
133             isStrict ? unisets::STRICT_IGNORABLES : unisets::DEFAULT_IGNORABLES};
134     IgnorablesMatcher& ignorables = parser->fLocalMatchers.ignorables;
135 
136     //////////////////////
137     /// AFFIX MATCHERS ///
138     //////////////////////
139 
140     // The following statements set up the affix matchers.
141     AffixTokenMatcherSetupData affixSetupData = {
142             currencySymbols, symbols, ignorables, locale, parseFlags};
143     parser->fLocalMatchers.affixTokenMatcherWarehouse = {&affixSetupData};
144     parser->fLocalMatchers.affixMatcherWarehouse = {&parser->fLocalMatchers.affixTokenMatcherWarehouse};
145     parser->fLocalMatchers.affixMatcherWarehouse.createAffixMatchers(
146             *affixProvider, *parser, ignorables, parseFlags, status);
147 
148     ////////////////////////
149     /// CURRENCY MATCHER ///
150     ////////////////////////
151 
152     if (parseCurrency || affixProvider->hasCurrencySign()) {
153         parser->addMatcher(parser->fLocalMatchers.currency = {currencySymbols, symbols, parseFlags, status});
154     }
155 
156     ///////////////
157     /// PERCENT ///
158     ///////////////
159 
160     // ICU-TC meeting, April 11, 2018: accept percent/permille only if it is in the pattern,
161     // and to maintain regressive behavior, divide by 100 even if no percent sign is present.
162     if (!isStrict && affixProvider->containsSymbolType(AffixPatternType::TYPE_PERCENT, status)) {
163         parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
164     }
165     if (!isStrict && affixProvider->containsSymbolType(AffixPatternType::TYPE_PERMILLE, status)) {
166         parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
167     }
168 
169     ///////////////////////////////
170     /// OTHER STANDARD MATCHERS ///
171     ///////////////////////////////
172 
173     if (!isStrict) {
174         parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
175         parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
176     }
177     parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
178     parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
179     UnicodeString padString = properties.padString;
180     if (!padString.isBogus() && !ignorables.getSet()->contains(padString)) {
181         parser->addMatcher(parser->fLocalMatchers.padding = {padString});
182     }
183     parser->addMatcher(parser->fLocalMatchers.ignorables);
184     parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
185     // NOTE: parseNoExponent doesn't disable scientific parsing if we have a scientific formatter
186     if (!properties.parseNoExponent || properties.minimumExponentDigits > 0) {
187         parser->addMatcher(parser->fLocalMatchers.scientific = {symbols, grouper});
188     }
189 
190     //////////////////
191     /// VALIDATORS ///
192     //////////////////
193 
194     parser->addMatcher(parser->fLocalValidators.number = {});
195     if (isStrict) {
196         parser->addMatcher(parser->fLocalValidators.affix = {});
197     }
198     if (parseCurrency) {
199         parser->addMatcher(parser->fLocalValidators.currency = {});
200     }
201     if (properties.decimalPatternMatchRequired) {
202         bool patternHasDecimalSeparator =
203                 properties.decimalSeparatorAlwaysShown || properties.maximumFractionDigits != 0;
204         parser->addMatcher(parser->fLocalValidators.decimalSeparator = {patternHasDecimalSeparator});
205     }
206     // The multiplier takes care of scaling percentages.
207     Scale multiplier = scaleFromProperties(properties);
208     if (multiplier.isValid()) {
209         parser->addMatcher(parser->fLocalValidators.multiplier = {multiplier});
210     }
211 
212     parser->freeze();
213     return parser.orphan();
214 }
215 
NumberParserImpl(parse_flags_t parseFlags)216 NumberParserImpl::NumberParserImpl(parse_flags_t parseFlags)
217         : fParseFlags(parseFlags) {
218 }
219 
~NumberParserImpl()220 NumberParserImpl::~NumberParserImpl() {
221     fNumMatchers = 0;
222 }
223 
addMatcher(NumberParseMatcher & matcher)224 void NumberParserImpl::addMatcher(NumberParseMatcher& matcher) {
225     if (fNumMatchers + 1 > fMatchers.getCapacity()) {
226         fMatchers.resize(fNumMatchers * 2, fNumMatchers);
227     }
228     fMatchers[fNumMatchers] = &matcher;
229     fNumMatchers++;
230 }
231 
freeze()232 void NumberParserImpl::freeze() {
233     fFrozen = true;
234 }
235 
getParseFlags() const236 parse_flags_t NumberParserImpl::getParseFlags() const {
237     return fParseFlags;
238 }
239 
parse(const UnicodeString & input,bool greedy,ParsedNumber & result,UErrorCode & status) const240 void NumberParserImpl::parse(const UnicodeString& input, bool greedy, ParsedNumber& result,
241                              UErrorCode& status) const {
242     return parse(input, 0, greedy, result, status);
243 }
244 
parse(const UnicodeString & input,int32_t start,bool greedy,ParsedNumber & result,UErrorCode & status) const245 void NumberParserImpl::parse(const UnicodeString& input, int32_t start, bool greedy, ParsedNumber& result,
246                              UErrorCode& status) const {
247     if (U_FAILURE(status)) {
248         return;
249     }
250     U_ASSERT(fFrozen);
251     // TODO: Check start >= 0 and start < input.length()
252     StringSegment segment(input, 0 != (fParseFlags & PARSE_FLAG_IGNORE_CASE));
253     segment.adjustOffset(start);
254     if (greedy) {
255         parseGreedyRecursive(segment, result, status);
256     } else {
257         parseLongestRecursive(segment, result, status);
258     }
259     for (int32_t i = 0; i < fNumMatchers; i++) {
260         fMatchers[i]->postProcess(result);
261     }
262     result.postProcess();
263 }
264 
parseGreedyRecursive(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const265 void NumberParserImpl::parseGreedyRecursive(StringSegment& segment, ParsedNumber& result,
266                                             UErrorCode& status) const {
267     // Base Case
268     if (segment.length() == 0) {
269         return;
270     }
271 
272     int initialOffset = segment.getOffset();
273     for (int32_t i = 0; i < fNumMatchers; i++) {
274         const NumberParseMatcher* matcher = fMatchers[i];
275         if (!matcher->smokeTest(segment)) {
276             continue;
277         }
278         matcher->match(segment, result, status);
279         if (U_FAILURE(status)) {
280             return;
281         }
282         if (segment.getOffset() != initialOffset) {
283             // In a greedy parse, recurse on only the first match.
284             parseGreedyRecursive(segment, result, status);
285             // The following line resets the offset so that the StringSegment says the same across
286             // the function
287             // call boundary. Since we recurse only once, this line is not strictly necessary.
288             segment.setOffset(initialOffset);
289             return;
290         }
291     }
292 
293     // NOTE: If we get here, the greedy parse completed without consuming the entire string.
294 }
295 
parseLongestRecursive(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const296 void NumberParserImpl::parseLongestRecursive(StringSegment& segment, ParsedNumber& result,
297                                              UErrorCode& status) const {
298     // Base Case
299     if (segment.length() == 0) {
300         return;
301     }
302 
303     // TODO: Give a nice way for the matcher to reset the ParsedNumber?
304     ParsedNumber initial(result);
305     ParsedNumber candidate;
306 
307     int initialOffset = segment.getOffset();
308     for (int32_t i = 0; i < fNumMatchers; i++) {
309         const NumberParseMatcher* matcher = fMatchers[i];
310         if (!matcher->smokeTest(segment)) {
311             continue;
312         }
313 
314         // In a non-greedy parse, we attempt all possible matches and pick the best.
315         for (int32_t charsToConsume = 0; charsToConsume < segment.length();) {
316             charsToConsume += U16_LENGTH(segment.codePointAt(charsToConsume));
317 
318             // Run the matcher on a segment of the current length.
319             candidate = initial;
320             segment.setLength(charsToConsume);
321             bool maybeMore = matcher->match(segment, candidate, status);
322             segment.resetLength();
323             if (U_FAILURE(status)) {
324                 return;
325             }
326 
327             // If the entire segment was consumed, recurse.
328             if (segment.getOffset() - initialOffset == charsToConsume) {
329                 parseLongestRecursive(segment, candidate, status);
330                 if (U_FAILURE(status)) {
331                     return;
332                 }
333                 if (candidate.isBetterThan(result)) {
334                     result = candidate;
335                 }
336             }
337 
338             // Since the segment can be re-used, reset the offset.
339             // This does not have an effect if the matcher did not consume any chars.
340             segment.setOffset(initialOffset);
341 
342             // Unless the matcher wants to see the next char, continue to the next matcher.
343             if (!maybeMore) {
344                 break;
345             }
346         }
347     }
348 }
349 
toString() const350 UnicodeString NumberParserImpl::toString() const {
351     UnicodeString result(u"<NumberParserImpl matchers:[");
352     for (int32_t i = 0; i < fNumMatchers; i++) {
353         result.append(u' ');
354         result.append(fMatchers[i]->toString());
355     }
356     result.append(u" ]>", -1);
357     return result;
358 }
359 
360 
361 #endif /* #if !UCONFIG_NO_FORMATTING */
362