1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_AFFIXES_H__ 8 #define __NUMPARSE_AFFIXES_H__ 9 10 #include "cmemory.h" 11 12 #include "numparse_types.h" 13 #include "numparse_symbols.h" 14 #include "numparse_currency.h" 15 #include "number_affixutils.h" 16 #include "number_currencysymbols.h" 17 18 U_NAMESPACE_BEGIN 19 namespace numparse { 20 namespace impl { 21 22 // Forward-declaration of implementation classes for friending 23 class AffixPatternMatcherBuilder; 24 class AffixPatternMatcher; 25 26 using ::icu::number::impl::AffixPatternProvider; 27 using ::icu::number::impl::TokenConsumer; 28 using ::icu::number::impl::CurrencySymbols; 29 30 31 class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory { 32 public: 33 CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state 34 35 CodePointMatcher(UChar32 cp); 36 37 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 38 39 bool smokeTest(const StringSegment& segment) const override; 40 41 UnicodeString toString() const override; 42 43 private: 44 UChar32 fCp; 45 }; 46 47 } // namespace impl 48 } // namespace numparse 49 50 // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString. 51 // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. 52 // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.) 53 // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error. 54 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 55 template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>; 56 template class U_I18N_API MaybeStackArray<UChar, 4>; 57 template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>; 58 template class U_I18N_API numparse::impl::CompactUnicodeString<4>; 59 #endif 60 61 namespace numparse { 62 namespace impl { 63 64 struct AffixTokenMatcherSetupData { 65 const CurrencySymbols& currencySymbols; 66 const DecimalFormatSymbols& dfs; 67 IgnorablesMatcher& ignorables; 68 const Locale& locale; 69 parse_flags_t parseFlags; 70 }; 71 72 73 /** 74 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. 75 * 76 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a 77 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The 78 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from 79 * the warehouse. 80 * 81 * @author sffc 82 */ 83 // Exported as U_I18N_API for tests 84 class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { 85 public: 86 AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 87 88 AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); 89 90 NumberParseMatcher& minusSign(); 91 92 NumberParseMatcher& plusSign(); 93 94 NumberParseMatcher& percent(); 95 96 NumberParseMatcher& permille(); 97 98 NumberParseMatcher& currency(UErrorCode& status); 99 100 IgnorablesMatcher& ignorables(); 101 102 NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); 103 104 bool hasEmptyCurrencySymbol() const; 105 106 private: 107 // NOTE: The following field may be unsafe to access after construction is done! 108 const AffixTokenMatcherSetupData* fSetupData; 109 110 // NOTE: These are default-constructed and should not be used until initialized. 111 MinusSignMatcher fMinusSign; 112 PlusSignMatcher fPlusSign; 113 PercentMatcher fPercent; 114 PermilleMatcher fPermille; 115 CombinedCurrencyMatcher fCurrency; 116 117 // Use a child class for code point matchers, since it requires non-default operators. 118 MemoryPool<CodePointMatcher> fCodePoints; 119 120 friend class AffixPatternMatcherBuilder; 121 friend class AffixPatternMatcher; 122 }; 123 124 125 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { 126 public: 127 AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, 128 IgnorablesMatcher* ignorables); 129 130 void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; 131 132 /** NOTE: You can build only once! */ 133 AffixPatternMatcher build(UErrorCode& status); 134 135 private: 136 ArraySeriesMatcher::MatcherArray fMatchers; 137 int32_t fMatchersLen; 138 int32_t fLastTypeOrCp; 139 140 const UnicodeString& fPattern; 141 AffixTokenMatcherWarehouse& fWarehouse; 142 IgnorablesMatcher* fIgnorables; 143 144 void addMatcher(NumberParseMatcher& matcher) override; 145 }; 146 147 148 // Exported as U_I18N_API for tests 149 class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { 150 public: 151 AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state 152 153 static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, 154 AffixTokenMatcherWarehouse& warehouse, 155 parse_flags_t parseFlags, bool* success, 156 UErrorCode& status); 157 158 UnicodeString getPattern() const; 159 160 bool operator==(const AffixPatternMatcher& other) const; 161 162 private: 163 CompactUnicodeString<4> fPattern; 164 165 AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, 166 UErrorCode& status); 167 168 friend class AffixPatternMatcherBuilder; 169 }; 170 171 172 class AffixMatcher : public NumberParseMatcher, public UMemory { 173 public: 174 AffixMatcher() = default; // WARNING: Leaves the object in an unusable state 175 176 AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); 177 178 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 179 180 void postProcess(ParsedNumber& result) const override; 181 182 bool smokeTest(const StringSegment& segment) const override; 183 184 int8_t compareTo(const AffixMatcher& rhs) const; 185 186 UnicodeString toString() const override; 187 188 private: 189 AffixPatternMatcher* fPrefix; 190 AffixPatternMatcher* fSuffix; 191 result_flags_t fFlags; 192 }; 193 194 195 /** 196 * A C++-only class to retain ownership of the AffixMatchers needed for parsing. 197 */ 198 class AffixMatcherWarehouse { 199 public: 200 AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 201 202 AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); 203 204 void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, 205 const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, 206 UErrorCode& status); 207 208 private: 209 // 18 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix, 210 // and doubled since there may be an empty currency symbol 211 AffixMatcher fAffixMatchers[18]; 212 // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each, 213 // and doubled since there may be an empty currency symbol 214 AffixPatternMatcher fAffixPatternMatchers[12]; 215 // Reference to the warehouse for tokens used by the AffixPatternMatchers 216 AffixTokenMatcherWarehouse* fTokenWarehouse; 217 218 friend class AffixMatcher; 219 220 static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, 221 parse_flags_t parseFlags, UErrorCode& status); 222 }; 223 224 225 } // namespace impl 226 } // namespace numparse 227 U_NAMESPACE_END 228 229 #endif //__NUMPARSE_AFFIXES_H__ 230 #endif /* #if !UCONFIG_NO_FORMATTING */ 231