• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2017 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 package ohos.global.icu.impl.number.parse;
5 
6 import ohos.global.icu.impl.StaticUnicodeSets;
7 import ohos.global.icu.impl.StaticUnicodeSets.Key;
8 import ohos.global.icu.impl.StringSegment;
9 import ohos.global.icu.impl.number.DecimalQuantity_DualStorageBCD;
10 import ohos.global.icu.impl.number.Grouper;
11 import ohos.global.icu.lang.UCharacter;
12 import ohos.global.icu.text.DecimalFormatSymbols;
13 import ohos.global.icu.text.UnicodeSet;
14 
15 /**
16  * @author sffc
17  * @hide exposed on OHOS
18  *
19  */
20 public class DecimalMatcher implements NumberParseMatcher {
21 
22     /** If true, only accept strings whose grouping sizes match the locale */
23     private final boolean requireGroupingMatch;
24 
25     /** If true, do not accept grouping separators at all */
26     private final boolean groupingDisabled;
27 
28     // Fraction grouping parsing is disabled for now but could be enabled later.
29     // See http://bugs.icu-project.org/trac/ticket/10794
30     // private final boolean fractionGrouping;
31 
32     /** If true, do not accept numbers in the fraction */
33     private final boolean integerOnly;
34 
35     private final int grouping1;
36     private final int grouping2;
37 
38     private final String groupingSeparator;
39     private final String decimalSeparator;
40 
41     // Assumption: these sets all consist of single code points. If this assumption needs to be broken,
42     // fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact.
43     private final UnicodeSet groupingUniSet;
44     private final UnicodeSet decimalUniSet;
45     private final UnicodeSet separatorSet;
46     private final UnicodeSet leadSet;
47     private final String[] digitStrings;
48 
getInstance( DecimalFormatSymbols symbols, Grouper grouper, int parseFlags)49     public static DecimalMatcher getInstance(
50             DecimalFormatSymbols symbols,
51             Grouper grouper,
52             int parseFlags) {
53         // TODO: Cache popular instances?
54         return new DecimalMatcher(symbols, grouper, parseFlags);
55     }
56 
DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags)57     private DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) {
58         if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS)) {
59             groupingSeparator = symbols.getMonetaryGroupingSeparatorString();
60             decimalSeparator = symbols.getMonetaryDecimalSeparatorString();
61         } else {
62             groupingSeparator = symbols.getGroupingSeparatorString();
63             decimalSeparator = symbols.getDecimalSeparatorString();
64         }
65         boolean strictSeparators = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS);
66         Key groupingKey = strictSeparators ? Key.STRICT_ALL_SEPARATORS : Key.ALL_SEPARATORS;
67 
68         // Attempt to find separators in the static cache
69 
70         groupingUniSet = StaticUnicodeSets.get(groupingKey);
71         Key decimalKey = StaticUnicodeSets.chooseFrom(decimalSeparator,
72                 strictSeparators ? Key.STRICT_COMMA : Key.COMMA,
73                 strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD);
74         if (decimalKey != null) {
75             decimalUniSet = StaticUnicodeSets.get(decimalKey);
76         } else if (!decimalSeparator.isEmpty()) {
77             decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze();
78         } else {
79             decimalUniSet = UnicodeSet.EMPTY;
80         }
81 
82         if (groupingKey != null && decimalKey != null) {
83             // Everything is available in the static cache
84             separatorSet = groupingUniSet;
85             leadSet = StaticUnicodeSets.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS
86                     : Key.DIGITS_OR_STRICT_ALL_SEPARATORS);
87         } else {
88             separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze();
89             leadSet = null;
90         }
91 
92         int cpZero = symbols.getCodePointZero();
93         if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZero) != 0) {
94             digitStrings = symbols.getDigitStringsLocal();
95         } else {
96             digitStrings = null;
97         }
98 
99         requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE);
100         groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_DISABLED);
101         integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY);
102         grouping1 = grouper.getPrimary();
103         grouping2 = grouper.getSecondary();
104 
105         // Fraction grouping parsing is disabled for now but could be enabled later.
106         // See http://bugs.icu-project.org/trac/ticket/10794
107         // fractionGrouping = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_ENABLED);
108     }
109 
110     @Override
match(StringSegment segment, ParsedNumber result)111     public boolean match(StringSegment segment, ParsedNumber result) {
112         return match(segment, result, 0);
113     }
114 
115     /**
116      * @param exponentSign
117      *            -1 means a negative exponent; +1 means a positive exponent; 0 means NO exponent. If -1
118      *            or +1, the number will be saved by scaling the pre-existing DecimalQuantity in the
119      *            ParsedNumber. If 0, a new DecimalQuantity will be created to store the number.
120      */
match(StringSegment segment, ParsedNumber result, int exponentSign)121     public boolean match(StringSegment segment, ParsedNumber result, int exponentSign) {
122         if (result.seenNumber() && exponentSign == 0) {
123             // A number has already been consumed.
124             return false;
125         } else if (exponentSign != 0) {
126             // scientific notation always comes after the number
127             assert result.quantity != null;
128         }
129 
130         // Initial offset before any character consumption.
131         int initialOffset = segment.getOffset();
132 
133         // Return value: whether to ask for more characters.
134         boolean maybeMore = false;
135 
136         // All digits consumed so far.
137         DecimalQuantity_DualStorageBCD digitsConsumed = null;
138 
139         // The total number of digits after the decimal place, used for scaling the result.
140         int digitsAfterDecimalPlace = 0;
141 
142         // The actual grouping and decimal separators used in the string.
143         // If non-null, we have seen that token.
144         String actualGroupingString = null;
145         String actualDecimalString = null;
146 
147         // Information for two groups: the previous group and the current group.
148         //
149         // Each group has three pieces of information:
150         //
151         // Offset: the string position of the beginning of the group, including a leading separator
152         // if there was a leading separator. This is needed in case we need to rewind the parse to
153         // that position.
154         //
155         // Separator type:
156         // 0 => beginning of string
157         // 1 => lead separator is a grouping separator
158         // 2 => lead separator is a decimal separator
159         //
160         // Count: the number of digits in the group. If -1, the group has been validated.
161         int currGroupOffset = 0;
162         int currGroupSepType = 0;
163         int currGroupCount = 0;
164         int prevGroupOffset = -1;
165         int prevGroupSepType = -1;
166         int prevGroupCount = -1;
167 
168         while (segment.length() > 0) {
169             maybeMore = false;
170 
171             // Attempt to match a digit.
172             byte digit = -1;
173 
174             // Try by code point digit value.
175             int cp = segment.getCodePoint();
176             if (UCharacter.isDigit(cp)) {
177                 segment.adjustOffset(Character.charCount(cp));
178                 digit = (byte) UCharacter.digit(cp);
179             }
180 
181             // Try by digit string.
182             if (digit == -1 && digitStrings != null) {
183                 for (int i = 0; i < digitStrings.length; i++) {
184                     String str = digitStrings[i];
185                     if (str.isEmpty()) {
186                         continue;
187                     }
188                     int overlap = segment.getCommonPrefixLength(str);
189                     if (overlap == str.length()) {
190                         segment.adjustOffset(overlap);
191                         digit = (byte) i;
192                         break;
193                     }
194                     maybeMore = maybeMore || (overlap == segment.length());
195                 }
196             }
197 
198             if (digit >= 0) {
199                 // Digit was found.
200                 if (digitsConsumed == null) {
201                     digitsConsumed = new DecimalQuantity_DualStorageBCD();
202                 }
203                 digitsConsumed.appendDigit(digit, 0, true);
204                 currGroupCount++;
205                 if (actualDecimalString != null) {
206                     digitsAfterDecimalPlace++;
207                 }
208                 continue;
209             }
210 
211             // Attempt to match a literal grouping or decimal separator.
212             boolean isDecimal = false;
213             boolean isGrouping = false;
214 
215             // 1) Attempt the decimal separator string literal.
216             // if (we have not seen a decimal separator yet) { ... }
217             if (actualDecimalString == null && !decimalSeparator.isEmpty()) {
218                 int overlap = segment.getCommonPrefixLength(decimalSeparator);
219                 maybeMore = maybeMore || (overlap == segment.length());
220                 if (overlap == decimalSeparator.length()) {
221                     isDecimal = true;
222                     actualDecimalString = decimalSeparator;
223                 }
224             }
225 
226             // 2) Attempt to match the actual grouping string literal.
227             if (actualGroupingString != null) {
228                 int overlap = segment.getCommonPrefixLength(actualGroupingString);
229                 maybeMore = maybeMore || (overlap == segment.length());
230                 if (overlap == actualGroupingString.length()) {
231                     isGrouping = true;
232                 }
233             }
234 
235             // 2.5) Attempt to match a new the grouping separator string literal.
236             // if (we have not seen a grouping or decimal separator yet) { ... }
237             if (!groupingDisabled
238                     && actualGroupingString == null
239                     && actualDecimalString == null
240                     && !groupingSeparator.isEmpty()) {
241                 int overlap = segment.getCommonPrefixLength(groupingSeparator);
242                 maybeMore = maybeMore || (overlap == segment.length());
243                 if (overlap == groupingSeparator.length()) {
244                     isGrouping = true;
245                     actualGroupingString = groupingSeparator;
246                 }
247             }
248 
249             // 3) Attempt to match a decimal separator from the equivalence set.
250             // if (we have not seen a decimal separator yet) { ... }
251             // The !isGrouping is to confirm that we haven't yet matched the current character.
252             if (!isGrouping && actualDecimalString == null) {
253                 if (decimalUniSet.contains(cp)) {
254                     isDecimal = true;
255                     actualDecimalString = UCharacter.toString(cp);
256                 }
257             }
258 
259             // 4) Attempt to match a grouping separator from the equivalence set.
260             // if (we have not seen a grouping or decimal separator yet) { ... }
261             if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) {
262                 if (groupingUniSet.contains(cp)) {
263                     isGrouping = true;
264                     actualGroupingString = UCharacter.toString(cp);
265                 }
266             }
267 
268             // Leave if we failed to match this as a separator.
269             if (!isDecimal && !isGrouping) {
270                 break;
271             }
272 
273             // Check for conditions when we don't want to accept the separator.
274             if (isDecimal && integerOnly) {
275                 break;
276             } else if (currGroupSepType == 2 && isGrouping) {
277                 // Fraction grouping
278                 break;
279             }
280 
281             // Validate intermediate grouping sizes.
282             boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
283             boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
284             if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
285                 // Invalid grouping sizes.
286                 if (isGrouping && currGroupCount == 0) {
287                     // Trailing grouping separators: these are taken care of below
288                     assert currGroupSepType == 1;
289                 } else if (requireGroupingMatch) {
290                     // Strict mode: reject the parse
291                     digitsConsumed = null;
292                 }
293                 break;
294             } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
295                 break;
296             } else {
297                 // Grouping sizes OK so far.
298                 prevGroupOffset = currGroupOffset;
299                 prevGroupCount = currGroupCount;
300                 if (isDecimal) {
301                     // Do not validate this group any more.
302                     prevGroupSepType = -1;
303                 } else {
304                     prevGroupSepType = currGroupSepType;
305                 }
306             }
307 
308             // OK to accept the separator.
309             // Special case: don't update currGroup if it is empty. This is to allow
310             // adjacent grouping separators in lenient mode: "1,,234"
311             if (currGroupCount != 0) {
312                 currGroupOffset = segment.getOffset();
313             }
314             currGroupSepType = isGrouping ? 1 : 2;
315             currGroupCount = 0;
316             if (isGrouping) {
317                 segment.adjustOffset(actualGroupingString.length());
318             } else {
319                 segment.adjustOffset(actualDecimalString.length());
320             }
321         }
322 
323         // End of main loop.
324         // Back up if there was a trailing grouping separator.
325         // Shift prev -> curr so we can check it as a final group.
326         if (currGroupSepType != 2 && currGroupCount == 0) {
327             maybeMore = true;
328             segment.setOffset(currGroupOffset);
329             currGroupOffset = prevGroupOffset;
330             currGroupSepType = prevGroupSepType;
331             currGroupCount = prevGroupCount;
332             prevGroupOffset = -1;
333             prevGroupSepType = 0;
334             prevGroupCount = 1;
335         }
336 
337         // Validate final grouping sizes.
338         boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
339         boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
340         if (!requireGroupingMatch) {
341             // The cases we need to handle here are lone digits.
342             // Examples: "1,1"  "1,1,"  "1,1,1"  "1,1,1,"  ",1" (all parse as 1)
343             // See more examples in numberformattestspecification.txt
344             int digitsToRemove = 0;
345             if (!prevValidSecondary) {
346                 segment.setOffset(prevGroupOffset);
347                 digitsToRemove += prevGroupCount;
348                 digitsToRemove += currGroupCount;
349             } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
350                 maybeMore = true;
351                 segment.setOffset(currGroupOffset);
352                 digitsToRemove += currGroupCount;
353             }
354             if (digitsToRemove != 0) {
355                 digitsConsumed.adjustMagnitude(-digitsToRemove);
356                 digitsConsumed.truncate();
357             }
358             prevValidSecondary = true;
359             currValidPrimary = true;
360         }
361         if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
362             // Grouping failure.
363             digitsConsumed = null;
364         }
365 
366         // Strings that start with a separator but have no digits,
367         // or strings that failed a grouping size check.
368         if (digitsConsumed == null) {
369             maybeMore = maybeMore || (segment.length() == 0);
370             segment.setOffset(initialOffset);
371             return maybeMore;
372         }
373 
374         // We passed all inspections. Start post-processing.
375 
376         // Adjust for fraction part.
377         digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
378 
379         // Set the digits, either normal or exponent.
380         if (exponentSign != 0 && segment.getOffset() != initialOffset) {
381             boolean overflow = false;
382             if (digitsConsumed.fitsInLong()) {
383                 long exponentLong = digitsConsumed.toLong(false);
384                 assert exponentLong >= 0;
385                 if (exponentLong <= Integer.MAX_VALUE) {
386                     int exponentInt = (int) exponentLong;
387                     try {
388                         result.quantity.adjustMagnitude(exponentSign * exponentInt);
389                     } catch (ArithmeticException e) {
390                         overflow = true;
391                     }
392                 } else {
393                     overflow = true;
394                 }
395             } else {
396                 overflow = true;
397             }
398             if (overflow) {
399                 if (exponentSign == -1) {
400                     // Set to zero
401                     result.quantity.clear();
402                 } else {
403                     // Set to infinity
404                     result.quantity = null;
405                     result.flags |= ParsedNumber.FLAG_INFINITY;
406                 }
407             }
408         } else {
409             result.quantity = digitsConsumed;
410         }
411 
412         // Set other information into the result and return.
413         if (actualDecimalString != null) {
414             result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR;
415         }
416         result.setCharsConsumed(segment);
417         return segment.length() == 0 || maybeMore;
418     }
419 
validateGroup(int sepType, int count, boolean isPrimary)420     private boolean validateGroup(int sepType, int count, boolean isPrimary) {
421         if (requireGroupingMatch) {
422             if (sepType == -1) {
423                 // No such group (prevGroup before first shift).
424                 return true;
425             } else if (sepType == 0) {
426                 // First group.
427                 if (isPrimary) {
428                     // No grouping separators is OK.
429                     return true;
430                 } else {
431                     return count != 0 && count <= grouping2;
432                 }
433             } else if (sepType == 1) {
434                 // Middle group.
435                 if (isPrimary) {
436                     return count == grouping1;
437                 } else {
438                     return count == grouping2;
439                 }
440             } else {
441                 assert sepType == 2;
442                 // After the decimal separator.
443                 return true;
444             }
445         } else {
446             if (sepType == 1) {
447                 // #11230: don't accept middle groups with only 1 digit.
448                 return count != 1;
449             } else {
450                 return true;
451             }
452         }
453     }
454 
455     @Override
smokeTest(StringSegment segment)456     public boolean smokeTest(StringSegment segment) {
457         // The common case uses a static leadSet for efficiency.
458         if (digitStrings == null && leadSet != null) {
459             return segment.startsWith(leadSet);
460         }
461         if (segment.startsWith(separatorSet) || UCharacter.isDigit(segment.getCodePoint())) {
462             return true;
463         }
464         if (digitStrings == null) {
465             return false;
466         }
467         for (int i = 0; i < digitStrings.length; i++) {
468             if (segment.startsWith(digitStrings[i])) {
469                 return true;
470             }
471         }
472         return false;
473     }
474 
475     @Override
postProcess(ParsedNumber result)476     public void postProcess(ParsedNumber result) {
477         // No-op
478     }
479 
480     @Override
toString()481     public String toString() {
482         return "<DecimalMatcher>";
483     }
484 }
485