1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_AFFIXES_H__ 8 #define __NUMPARSE_AFFIXES_H__ 9 10 #include "numparse_types.h" 11 #include "numparse_symbols.h" 12 #include "numparse_currency.h" 13 #include "number_affixutils.h" 14 #include "number_currencysymbols.h" 15 16 #include <array> 17 18 U_NAMESPACE_BEGIN 19 namespace numparse { 20 namespace impl { 21 22 // Forward-declaration of implementation classes for friending 23 class AffixPatternMatcherBuilder; 24 class AffixPatternMatcher; 25 26 using ::icu::number::impl::AffixPatternProvider; 27 using ::icu::number::impl::TokenConsumer; 28 using ::icu::number::impl::CurrencySymbols; 29 30 31 class CodePointMatcher : public NumberParseMatcher, public UMemory { 32 public: 33 CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state 34 35 CodePointMatcher(UChar32 cp); 36 37 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 38 39 bool smokeTest(const StringSegment& segment) const override; 40 41 UnicodeString toString() const override; 42 43 private: 44 UChar32 fCp; 45 }; 46 47 } // namespace impl 48 } // namespace numparse 49 50 // Export a explicit template instantiations of MaybeStackArray and CompactUnicodeString. 51 // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. 52 // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.) 53 // Note: These need to be outside of the impl::numparse namespace, or Clang will generate a compile error. 54 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 55 template class U_I18N_API MaybeStackArray<UChar, 4>; 56 template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 3>; 57 template class U_I18N_API numparse::impl::CompactUnicodeString<4>; 58 #endif 59 60 namespace numparse { 61 namespace impl { 62 63 /** 64 * A warehouse to retain ownership of CodePointMatchers. 65 */ 66 // Exported as U_I18N_API for tests 67 class U_I18N_API CodePointMatcherWarehouse : public UMemory { 68 private: 69 static constexpr int32_t CODE_POINT_STACK_CAPACITY = 5; // Number of entries directly on the stack 70 static constexpr int32_t CODE_POINT_BATCH_SIZE = 10; // Number of entries per heap allocation 71 72 public: 73 CodePointMatcherWarehouse(); 74 75 // A custom destructor is needed to free the memory from MaybeStackArray. 76 // A custom move constructor and move assignment seem to be needed because of the custom destructor. 77 78 ~CodePointMatcherWarehouse(); 79 80 CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT; 81 82 CodePointMatcherWarehouse& operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT; 83 84 NumberParseMatcher& nextCodePointMatcher(UChar32 cp); 85 86 private: 87 std::array<CodePointMatcher, CODE_POINT_STACK_CAPACITY> codePoints; // By value 88 MaybeStackArray<CodePointMatcher*, 3> codePointsOverflow; // On heap in "batches" 89 int32_t codePointCount; // Total for both the ones by value and on heap 90 int32_t codePointNumBatches; // Number of batches in codePointsOverflow 91 }; 92 93 94 struct AffixTokenMatcherSetupData { 95 const CurrencySymbols& currencySymbols; 96 const DecimalFormatSymbols& dfs; 97 IgnorablesMatcher& ignorables; 98 const Locale& locale; 99 parse_flags_t parseFlags; 100 }; 101 102 103 /** 104 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. 105 * 106 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a 107 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The 108 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from 109 * the warehouse. 110 * 111 * @author sffc 112 */ 113 // Exported as U_I18N_API for tests 114 class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { 115 public: 116 AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 117 118 AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); 119 120 NumberParseMatcher& minusSign(); 121 122 NumberParseMatcher& plusSign(); 123 124 NumberParseMatcher& percent(); 125 126 NumberParseMatcher& permille(); 127 128 NumberParseMatcher& currency(UErrorCode& status); 129 130 IgnorablesMatcher& ignorables(); 131 132 NumberParseMatcher& nextCodePointMatcher(UChar32 cp); 133 134 private: 135 // NOTE: The following field may be unsafe to access after construction is done! 136 const AffixTokenMatcherSetupData* fSetupData; 137 138 // NOTE: These are default-constructed and should not be used until initialized. 139 MinusSignMatcher fMinusSign; 140 PlusSignMatcher fPlusSign; 141 PercentMatcher fPercent; 142 PermilleMatcher fPermille; 143 CombinedCurrencyMatcher fCurrency; 144 145 // Use a child class for code point matchers, since it requires non-default operators. 146 CodePointMatcherWarehouse fCodePoints; 147 148 friend class AffixPatternMatcherBuilder; 149 friend class AffixPatternMatcher; 150 }; 151 152 153 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { 154 public: 155 AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, 156 IgnorablesMatcher* ignorables); 157 158 void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; 159 160 /** NOTE: You can build only once! */ 161 AffixPatternMatcher build(); 162 163 private: 164 ArraySeriesMatcher::MatcherArray fMatchers; 165 int32_t fMatchersLen; 166 int32_t fLastTypeOrCp; 167 168 const UnicodeString& fPattern; 169 AffixTokenMatcherWarehouse& fWarehouse; 170 IgnorablesMatcher* fIgnorables; 171 172 void addMatcher(NumberParseMatcher& matcher) override; 173 }; 174 175 176 // Exported as U_I18N_API for tests 177 class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { 178 public: 179 AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state 180 181 static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, 182 AffixTokenMatcherWarehouse& warehouse, 183 parse_flags_t parseFlags, bool* success, 184 UErrorCode& status); 185 186 UnicodeString getPattern() const; 187 188 bool operator==(const AffixPatternMatcher& other) const; 189 190 private: 191 CompactUnicodeString<4> fPattern; 192 193 AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); 194 195 friend class AffixPatternMatcherBuilder; 196 }; 197 198 199 class AffixMatcher : public NumberParseMatcher, public UMemory { 200 public: 201 AffixMatcher() = default; // WARNING: Leaves the object in an unusable state 202 203 AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); 204 205 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 206 207 void postProcess(ParsedNumber& result) const override; 208 209 bool smokeTest(const StringSegment& segment) const override; 210 211 int8_t compareTo(const AffixMatcher& rhs) const; 212 213 UnicodeString toString() const override; 214 215 private: 216 AffixPatternMatcher* fPrefix; 217 AffixPatternMatcher* fSuffix; 218 result_flags_t fFlags; 219 }; 220 221 222 /** 223 * A C++-only class to retain ownership of the AffixMatchers needed for parsing. 224 */ 225 class AffixMatcherWarehouse { 226 public: 227 AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 228 229 AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); 230 231 void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, 232 const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, 233 UErrorCode& status); 234 235 private: 236 // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix 237 AffixMatcher fAffixMatchers[9]; 238 // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each 239 AffixPatternMatcher fAffixPatternMatchers[6]; 240 // Reference to the warehouse for tokens used by the AffixPatternMatchers 241 AffixTokenMatcherWarehouse* fTokenWarehouse; 242 243 friend class AffixMatcher; 244 245 static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, 246 parse_flags_t parseFlags, UErrorCode& status); 247 }; 248 249 250 } // namespace impl 251 } // namespace numparse 252 U_NAMESPACE_END 253 254 #endif //__NUMPARSE_AFFIXES_H__ 255 #endif /* #if !UCONFIG_NO_FORMATTING */ 256