1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11
12 #include "numparse_types.h"
13 #include "numparse_scientific.h"
14 #include "static_unicode_sets.h"
15
16 using namespace icu;
17 using namespace icu::numparse;
18 using namespace icu::numparse::impl;
19
20
21 namespace {
22
minusSignSet()23 inline const UnicodeSet& minusSignSet() {
24 return *unisets::get(unisets::MINUS_SIGN);
25 }
26
plusSignSet()27 inline const UnicodeSet& plusSignSet() {
28 return *unisets::get(unisets::PLUS_SIGN);
29 }
30
31 } // namespace
32
33
ScientificMatcher(const DecimalFormatSymbols & dfs,const Grouper & grouper)34 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
35 : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
36 fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED) {
37
38 const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
39 if (minusSignSet().contains(minusSign)) {
40 fCustomMinusSign.setToBogus();
41 } else {
42 fCustomMinusSign = minusSign;
43 }
44
45 const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
46 if (plusSignSet().contains(plusSign)) {
47 fCustomPlusSign.setToBogus();
48 } else {
49 fCustomPlusSign = plusSign;
50 }
51 }
52
match(StringSegment & segment,ParsedNumber & result,UErrorCode & status) const53 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
54 // Only accept scientific notation after the mantissa.
55 if (!result.seenNumber()) {
56 return false;
57 }
58
59 // Only accept one exponent per string.
60 if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
61 return false;
62 }
63
64 // First match the scientific separator, and then match another number after it.
65 // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
66 int overlap1 = segment.getCommonPrefixLength(fExponentSeparatorString);
67 if (overlap1 == fExponentSeparatorString.length()) {
68 // Full exponent separator match.
69
70 // First attempt to get a code point, returning true if we can't get one.
71 if (segment.length() == overlap1) {
72 return true;
73 }
74 segment.adjustOffset(overlap1);
75
76 // Allow a sign, and then try to match digits.
77 int8_t exponentSign = 1;
78 if (segment.startsWith(minusSignSet())) {
79 exponentSign = -1;
80 segment.adjustOffsetByCodePoint();
81 } else if (segment.startsWith(plusSignSet())) {
82 segment.adjustOffsetByCodePoint();
83 } else if (segment.startsWith(fCustomMinusSign)) {
84 // Note: call site is guarded with startsWith, which returns false on empty string
85 int32_t overlap2 = segment.getCommonPrefixLength(fCustomMinusSign);
86 if (overlap2 != fCustomMinusSign.length()) {
87 // Partial custom sign match; un-match the exponent separator.
88 segment.adjustOffset(-overlap1);
89 return true;
90 }
91 exponentSign = -1;
92 segment.adjustOffset(overlap2);
93 } else if (segment.startsWith(fCustomPlusSign)) {
94 // Note: call site is guarded with startsWith, which returns false on empty string
95 int32_t overlap2 = segment.getCommonPrefixLength(fCustomPlusSign);
96 if (overlap2 != fCustomPlusSign.length()) {
97 // Partial custom sign match; un-match the exponent separator.
98 segment.adjustOffset(-overlap1);
99 return true;
100 }
101 segment.adjustOffset(overlap2);
102 }
103
104 // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
105 bool wasBogus = result.quantity.bogus;
106 result.quantity.bogus = false;
107 int digitsOffset = segment.getOffset();
108 bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
109 result.quantity.bogus = wasBogus;
110
111 if (segment.getOffset() != digitsOffset) {
112 // At least one exponent digit was matched.
113 result.flags |= FLAG_HAS_EXPONENT;
114 } else {
115 // No exponent digits were matched; un-match the exponent separator.
116 segment.adjustOffset(-overlap1);
117 }
118 return digitsReturnValue;
119
120 } else if (overlap1 == segment.length()) {
121 // Partial exponent separator match
122 return true;
123 }
124
125 // No match
126 return false;
127 }
128
smokeTest(const StringSegment & segment) const129 bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
130 return segment.startsWith(fExponentSeparatorString);
131 }
132
toString() const133 UnicodeString ScientificMatcher::toString() const {
134 return u"<Scientific>";
135 }
136
137
138 #endif /* #if !UCONFIG_NO_FORMATTING */
139