1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2017 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 package ohos.global.icu.impl.number.parse; 5 6 import ohos.global.icu.impl.StaticUnicodeSets; 7 import ohos.global.icu.impl.StaticUnicodeSets.Key; 8 import ohos.global.icu.impl.StringSegment; 9 import ohos.global.icu.impl.number.DecimalQuantity_DualStorageBCD; 10 import ohos.global.icu.impl.number.Grouper; 11 import ohos.global.icu.lang.UCharacter; 12 import ohos.global.icu.text.DecimalFormatSymbols; 13 import ohos.global.icu.text.UnicodeSet; 14 15 /** 16 * @author sffc 17 * @hide exposed on OHOS 18 * 19 */ 20 public class DecimalMatcher implements NumberParseMatcher { 21 22 /** If true, only accept strings whose grouping sizes match the locale */ 23 private final boolean requireGroupingMatch; 24 25 /** If true, do not accept grouping separators at all */ 26 private final boolean groupingDisabled; 27 28 // Fraction grouping parsing is disabled for now but could be enabled later. 29 // See http://bugs.icu-project.org/trac/ticket/10794 30 // private final boolean fractionGrouping; 31 32 /** If true, do not accept numbers in the fraction */ 33 private final boolean integerOnly; 34 35 private final int grouping1; 36 private final int grouping2; 37 38 private final String groupingSeparator; 39 private final String decimalSeparator; 40 41 // Assumption: these sets all consist of single code points. If this assumption needs to be broken, 42 // fix getLeadCodePoints() as well as matching logic. Be careful of the performance impact. 43 private final UnicodeSet groupingUniSet; 44 private final UnicodeSet decimalUniSet; 45 private final UnicodeSet separatorSet; 46 private final UnicodeSet leadSet; 47 private final String[] digitStrings; 48 getInstance( DecimalFormatSymbols symbols, Grouper grouper, int parseFlags)49 public static DecimalMatcher getInstance( 50 DecimalFormatSymbols symbols, 51 Grouper grouper, 52 int parseFlags) { 53 // TODO: Cache popular instances? 54 return new DecimalMatcher(symbols, grouper, parseFlags); 55 } 56 DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags)57 private DecimalMatcher(DecimalFormatSymbols symbols, Grouper grouper, int parseFlags) { 58 if (0 != (parseFlags & ParsingUtils.PARSE_FLAG_MONETARY_SEPARATORS)) { 59 groupingSeparator = symbols.getMonetaryGroupingSeparatorString(); 60 decimalSeparator = symbols.getMonetaryDecimalSeparatorString(); 61 } else { 62 groupingSeparator = symbols.getGroupingSeparatorString(); 63 decimalSeparator = symbols.getDecimalSeparatorString(); 64 } 65 boolean strictSeparators = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_SEPARATORS); 66 Key groupingKey = strictSeparators ? Key.STRICT_ALL_SEPARATORS : Key.ALL_SEPARATORS; 67 68 // Attempt to find separators in the static cache 69 70 groupingUniSet = StaticUnicodeSets.get(groupingKey); 71 Key decimalKey = StaticUnicodeSets.chooseFrom(decimalSeparator, 72 strictSeparators ? Key.STRICT_COMMA : Key.COMMA, 73 strictSeparators ? Key.STRICT_PERIOD : Key.PERIOD); 74 if (decimalKey != null) { 75 decimalUniSet = StaticUnicodeSets.get(decimalKey); 76 } else if (!decimalSeparator.isEmpty()) { 77 decimalUniSet = new UnicodeSet().add(decimalSeparator.codePointAt(0)).freeze(); 78 } else { 79 decimalUniSet = UnicodeSet.EMPTY; 80 } 81 82 if (groupingKey != null && decimalKey != null) { 83 // Everything is available in the static cache 84 separatorSet = groupingUniSet; 85 leadSet = StaticUnicodeSets.get(strictSeparators ? Key.DIGITS_OR_ALL_SEPARATORS 86 : Key.DIGITS_OR_STRICT_ALL_SEPARATORS); 87 } else { 88 separatorSet = new UnicodeSet().addAll(groupingUniSet).addAll(decimalUniSet).freeze(); 89 leadSet = null; 90 } 91 92 int cpZero = symbols.getCodePointZero(); 93 if (cpZero == -1 || !UCharacter.isDigit(cpZero) || UCharacter.digit(cpZero) != 0) { 94 digitStrings = symbols.getDigitStringsLocal(); 95 } else { 96 digitStrings = null; 97 } 98 99 requireGroupingMatch = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_STRICT_GROUPING_SIZE); 100 groupingDisabled = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_GROUPING_DISABLED); 101 integerOnly = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_INTEGER_ONLY); 102 grouping1 = grouper.getPrimary(); 103 grouping2 = grouper.getSecondary(); 104 105 // Fraction grouping parsing is disabled for now but could be enabled later. 106 // See http://bugs.icu-project.org/trac/ticket/10794 107 // fractionGrouping = 0 != (parseFlags & ParsingUtils.PARSE_FLAG_FRACTION_GROUPING_ENABLED); 108 } 109 110 @Override match(StringSegment segment, ParsedNumber result)111 public boolean match(StringSegment segment, ParsedNumber result) { 112 return match(segment, result, 0); 113 } 114 115 /** 116 * @param exponentSign 117 * -1 means a negative exponent; +1 means a positive exponent; 0 means NO exponent. If -1 118 * or +1, the number will be saved by scaling the pre-existing DecimalQuantity in the 119 * ParsedNumber. If 0, a new DecimalQuantity will be created to store the number. 120 */ match(StringSegment segment, ParsedNumber result, int exponentSign)121 public boolean match(StringSegment segment, ParsedNumber result, int exponentSign) { 122 if (result.seenNumber() && exponentSign == 0) { 123 // A number has already been consumed. 124 return false; 125 } else if (exponentSign != 0) { 126 // scientific notation always comes after the number 127 assert result.quantity != null; 128 } 129 130 // Initial offset before any character consumption. 131 int initialOffset = segment.getOffset(); 132 133 // Return value: whether to ask for more characters. 134 boolean maybeMore = false; 135 136 // All digits consumed so far. 137 DecimalQuantity_DualStorageBCD digitsConsumed = null; 138 139 // The total number of digits after the decimal place, used for scaling the result. 140 int digitsAfterDecimalPlace = 0; 141 142 // The actual grouping and decimal separators used in the string. 143 // If non-null, we have seen that token. 144 String actualGroupingString = null; 145 String actualDecimalString = null; 146 147 // Information for two groups: the previous group and the current group. 148 // 149 // Each group has three pieces of information: 150 // 151 // Offset: the string position of the beginning of the group, including a leading separator 152 // if there was a leading separator. This is needed in case we need to rewind the parse to 153 // that position. 154 // 155 // Separator type: 156 // 0 => beginning of string 157 // 1 => lead separator is a grouping separator 158 // 2 => lead separator is a decimal separator 159 // 160 // Count: the number of digits in the group. If -1, the group has been validated. 161 int currGroupOffset = 0; 162 int currGroupSepType = 0; 163 int currGroupCount = 0; 164 int prevGroupOffset = -1; 165 int prevGroupSepType = -1; 166 int prevGroupCount = -1; 167 168 while (segment.length() > 0) { 169 maybeMore = false; 170 171 // Attempt to match a digit. 172 byte digit = -1; 173 174 // Try by code point digit value. 175 int cp = segment.getCodePoint(); 176 if (UCharacter.isDigit(cp)) { 177 segment.adjustOffset(Character.charCount(cp)); 178 digit = (byte) UCharacter.digit(cp); 179 } 180 181 // Try by digit string. 182 if (digit == -1 && digitStrings != null) { 183 for (int i = 0; i < digitStrings.length; i++) { 184 String str = digitStrings[i]; 185 if (str.isEmpty()) { 186 continue; 187 } 188 int overlap = segment.getCommonPrefixLength(str); 189 if (overlap == str.length()) { 190 segment.adjustOffset(overlap); 191 digit = (byte) i; 192 break; 193 } 194 maybeMore = maybeMore || (overlap == segment.length()); 195 } 196 } 197 198 if (digit >= 0) { 199 // Digit was found. 200 if (digitsConsumed == null) { 201 digitsConsumed = new DecimalQuantity_DualStorageBCD(); 202 } 203 digitsConsumed.appendDigit(digit, 0, true); 204 currGroupCount++; 205 if (actualDecimalString != null) { 206 digitsAfterDecimalPlace++; 207 } 208 continue; 209 } 210 211 // Attempt to match a literal grouping or decimal separator. 212 boolean isDecimal = false; 213 boolean isGrouping = false; 214 215 // 1) Attempt the decimal separator string literal. 216 // if (we have not seen a decimal separator yet) { ... } 217 if (actualDecimalString == null && !decimalSeparator.isEmpty()) { 218 int overlap = segment.getCommonPrefixLength(decimalSeparator); 219 maybeMore = maybeMore || (overlap == segment.length()); 220 if (overlap == decimalSeparator.length()) { 221 isDecimal = true; 222 actualDecimalString = decimalSeparator; 223 } 224 } 225 226 // 2) Attempt to match the actual grouping string literal. 227 if (actualGroupingString != null) { 228 int overlap = segment.getCommonPrefixLength(actualGroupingString); 229 maybeMore = maybeMore || (overlap == segment.length()); 230 if (overlap == actualGroupingString.length()) { 231 isGrouping = true; 232 } 233 } 234 235 // 2.5) Attempt to match a new the grouping separator string literal. 236 // if (we have not seen a grouping or decimal separator yet) { ... } 237 if (!groupingDisabled 238 && actualGroupingString == null 239 && actualDecimalString == null 240 && !groupingSeparator.isEmpty()) { 241 int overlap = segment.getCommonPrefixLength(groupingSeparator); 242 maybeMore = maybeMore || (overlap == segment.length()); 243 if (overlap == groupingSeparator.length()) { 244 isGrouping = true; 245 actualGroupingString = groupingSeparator; 246 } 247 } 248 249 // 3) Attempt to match a decimal separator from the equivalence set. 250 // if (we have not seen a decimal separator yet) { ... } 251 // The !isGrouping is to confirm that we haven't yet matched the current character. 252 if (!isGrouping && actualDecimalString == null) { 253 if (decimalUniSet.contains(cp)) { 254 isDecimal = true; 255 actualDecimalString = UCharacter.toString(cp); 256 } 257 } 258 259 // 4) Attempt to match a grouping separator from the equivalence set. 260 // if (we have not seen a grouping or decimal separator yet) { ... } 261 if (!groupingDisabled && actualGroupingString == null && actualDecimalString == null) { 262 if (groupingUniSet.contains(cp)) { 263 isGrouping = true; 264 actualGroupingString = UCharacter.toString(cp); 265 } 266 } 267 268 // Leave if we failed to match this as a separator. 269 if (!isDecimal && !isGrouping) { 270 break; 271 } 272 273 // Check for conditions when we don't want to accept the separator. 274 if (isDecimal && integerOnly) { 275 break; 276 } else if (currGroupSepType == 2 && isGrouping) { 277 // Fraction grouping 278 break; 279 } 280 281 // Validate intermediate grouping sizes. 282 boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); 283 boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); 284 if (!prevValidSecondary || (isDecimal && !currValidPrimary)) { 285 // Invalid grouping sizes. 286 if (isGrouping && currGroupCount == 0) { 287 // Trailing grouping separators: these are taken care of below 288 assert currGroupSepType == 1; 289 } else if (requireGroupingMatch) { 290 // Strict mode: reject the parse 291 digitsConsumed = null; 292 } 293 break; 294 } else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) { 295 break; 296 } else { 297 // Grouping sizes OK so far. 298 prevGroupOffset = currGroupOffset; 299 prevGroupCount = currGroupCount; 300 if (isDecimal) { 301 // Do not validate this group any more. 302 prevGroupSepType = -1; 303 } else { 304 prevGroupSepType = currGroupSepType; 305 } 306 } 307 308 // OK to accept the separator. 309 // Special case: don't update currGroup if it is empty. This is to allow 310 // adjacent grouping separators in lenient mode: "1,,234" 311 if (currGroupCount != 0) { 312 currGroupOffset = segment.getOffset(); 313 } 314 currGroupSepType = isGrouping ? 1 : 2; 315 currGroupCount = 0; 316 if (isGrouping) { 317 segment.adjustOffset(actualGroupingString.length()); 318 } else { 319 segment.adjustOffset(actualDecimalString.length()); 320 } 321 } 322 323 // End of main loop. 324 // Back up if there was a trailing grouping separator. 325 // Shift prev -> curr so we can check it as a final group. 326 if (currGroupSepType != 2 && currGroupCount == 0) { 327 maybeMore = true; 328 segment.setOffset(currGroupOffset); 329 currGroupOffset = prevGroupOffset; 330 currGroupSepType = prevGroupSepType; 331 currGroupCount = prevGroupCount; 332 prevGroupOffset = -1; 333 prevGroupSepType = 0; 334 prevGroupCount = 1; 335 } 336 337 // Validate final grouping sizes. 338 boolean prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false); 339 boolean currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true); 340 if (!requireGroupingMatch) { 341 // The cases we need to handle here are lone digits. 342 // Examples: "1,1" "1,1," "1,1,1" "1,1,1," ",1" (all parse as 1) 343 // See more examples in numberformattestspecification.txt 344 int digitsToRemove = 0; 345 if (!prevValidSecondary) { 346 segment.setOffset(prevGroupOffset); 347 digitsToRemove += prevGroupCount; 348 digitsToRemove += currGroupCount; 349 } else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) { 350 maybeMore = true; 351 segment.setOffset(currGroupOffset); 352 digitsToRemove += currGroupCount; 353 } 354 if (digitsToRemove != 0) { 355 digitsConsumed.adjustMagnitude(-digitsToRemove); 356 digitsConsumed.truncate(); 357 } 358 prevValidSecondary = true; 359 currValidPrimary = true; 360 } 361 if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) { 362 // Grouping failure. 363 digitsConsumed = null; 364 } 365 366 // Strings that start with a separator but have no digits, 367 // or strings that failed a grouping size check. 368 if (digitsConsumed == null) { 369 maybeMore = maybeMore || (segment.length() == 0); 370 segment.setOffset(initialOffset); 371 return maybeMore; 372 } 373 374 // We passed all inspections. Start post-processing. 375 376 // Adjust for fraction part. 377 digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace); 378 379 // Set the digits, either normal or exponent. 380 if (exponentSign != 0 && segment.getOffset() != initialOffset) { 381 boolean overflow = false; 382 if (digitsConsumed.fitsInLong()) { 383 long exponentLong = digitsConsumed.toLong(false); 384 assert exponentLong >= 0; 385 if (exponentLong <= Integer.MAX_VALUE) { 386 int exponentInt = (int) exponentLong; 387 try { 388 result.quantity.adjustMagnitude(exponentSign * exponentInt); 389 } catch (ArithmeticException e) { 390 overflow = true; 391 } 392 } else { 393 overflow = true; 394 } 395 } else { 396 overflow = true; 397 } 398 if (overflow) { 399 if (exponentSign == -1) { 400 // Set to zero 401 result.quantity.clear(); 402 } else { 403 // Set to infinity 404 result.quantity = null; 405 result.flags |= ParsedNumber.FLAG_INFINITY; 406 } 407 } 408 } else { 409 result.quantity = digitsConsumed; 410 } 411 412 // Set other information into the result and return. 413 if (actualDecimalString != null) { 414 result.flags |= ParsedNumber.FLAG_HAS_DECIMAL_SEPARATOR; 415 } 416 result.setCharsConsumed(segment); 417 return segment.length() == 0 || maybeMore; 418 } 419 validateGroup(int sepType, int count, boolean isPrimary)420 private boolean validateGroup(int sepType, int count, boolean isPrimary) { 421 if (requireGroupingMatch) { 422 if (sepType == -1) { 423 // No such group (prevGroup before first shift). 424 return true; 425 } else if (sepType == 0) { 426 // First group. 427 if (isPrimary) { 428 // No grouping separators is OK. 429 return true; 430 } else { 431 return count != 0 && count <= grouping2; 432 } 433 } else if (sepType == 1) { 434 // Middle group. 435 if (isPrimary) { 436 return count == grouping1; 437 } else { 438 return count == grouping2; 439 } 440 } else { 441 assert sepType == 2; 442 // After the decimal separator. 443 return true; 444 } 445 } else { 446 if (sepType == 1) { 447 // #11230: don't accept middle groups with only 1 digit. 448 return count != 1; 449 } else { 450 return true; 451 } 452 } 453 } 454 455 @Override smokeTest(StringSegment segment)456 public boolean smokeTest(StringSegment segment) { 457 // The common case uses a static leadSet for efficiency. 458 if (digitStrings == null && leadSet != null) { 459 return segment.startsWith(leadSet); 460 } 461 if (segment.startsWith(separatorSet) || UCharacter.isDigit(segment.getCodePoint())) { 462 return true; 463 } 464 if (digitStrings == null) { 465 return false; 466 } 467 for (int i = 0; i < digitStrings.length; i++) { 468 if (segment.startsWith(digitStrings[i])) { 469 return true; 470 } 471 } 472 return false; 473 } 474 475 @Override postProcess(ParsedNumber result)476 public void postProcess(ParsedNumber result) { 477 // No-op 478 } 479 480 @Override toString()481 public String toString() { 482 return "<DecimalMatcher>"; 483 } 484 } 485