1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_AFFIXES_H__ 8 #define __NUMPARSE_AFFIXES_H__ 9 10 #include "cmemory.h" 11 12 #include "numparse_types.h" 13 #include "numparse_symbols.h" 14 #include "numparse_currency.h" 15 #include "number_affixutils.h" 16 #include "number_currencysymbols.h" 17 18 U_NAMESPACE_BEGIN 19 namespace numparse { 20 namespace impl { 21 22 // Forward-declaration of implementation classes for friending 23 class AffixPatternMatcherBuilder; 24 class AffixPatternMatcher; 25 26 using ::icu::number::impl::AffixPatternProvider; 27 using ::icu::number::impl::TokenConsumer; 28 using ::icu::number::impl::CurrencySymbols; 29 30 31 class CodePointMatcher : public NumberParseMatcher, public UMemory { 32 public: 33 CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state 34 35 CodePointMatcher(UChar32 cp); 36 37 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 38 39 bool smokeTest(const StringSegment& segment) const override; 40 41 UnicodeString toString() const override; 42 43 private: 44 UChar32 fCp; 45 }; 46 47 } // namespace impl 48 } // namespace numparse 49 50 // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString. 51 // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. 52 // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.) 53 // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error. 54 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 55 template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>; 56 template class U_I18N_API MaybeStackArray<UChar, 4>; 57 template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>; 58 template class U_I18N_API numparse::impl::CompactUnicodeString<4>; 59 #endif 60 61 namespace numparse { 62 namespace impl { 63 64 struct AffixTokenMatcherSetupData { 65 const CurrencySymbols& currencySymbols; 66 const DecimalFormatSymbols& dfs; 67 IgnorablesMatcher& ignorables; 68 const Locale& locale; 69 parse_flags_t parseFlags; 70 }; 71 72 73 /** 74 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. 75 * 76 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a 77 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The 78 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from 79 * the warehouse. 80 * 81 * @author sffc 82 */ 83 // Exported as U_I18N_API for tests 84 class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { 85 public: 86 AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 87 88 AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); 89 90 NumberParseMatcher& minusSign(); 91 92 NumberParseMatcher& plusSign(); 93 94 NumberParseMatcher& percent(); 95 96 NumberParseMatcher& permille(); 97 98 NumberParseMatcher& currency(UErrorCode& status); 99 100 IgnorablesMatcher& ignorables(); 101 102 NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); 103 104 private: 105 // NOTE: The following field may be unsafe to access after construction is done! 106 const AffixTokenMatcherSetupData* fSetupData; 107 108 // NOTE: These are default-constructed and should not be used until initialized. 109 MinusSignMatcher fMinusSign; 110 PlusSignMatcher fPlusSign; 111 PercentMatcher fPercent; 112 PermilleMatcher fPermille; 113 CombinedCurrencyMatcher fCurrency; 114 115 // Use a child class for code point matchers, since it requires non-default operators. 116 MemoryPool<CodePointMatcher> fCodePoints; 117 118 friend class AffixPatternMatcherBuilder; 119 friend class AffixPatternMatcher; 120 }; 121 122 123 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { 124 public: 125 AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, 126 IgnorablesMatcher* ignorables); 127 128 void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; 129 130 /** NOTE: You can build only once! */ 131 AffixPatternMatcher build(); 132 133 private: 134 ArraySeriesMatcher::MatcherArray fMatchers; 135 int32_t fMatchersLen; 136 int32_t fLastTypeOrCp; 137 138 const UnicodeString& fPattern; 139 AffixTokenMatcherWarehouse& fWarehouse; 140 IgnorablesMatcher* fIgnorables; 141 142 void addMatcher(NumberParseMatcher& matcher) override; 143 }; 144 145 146 // Exported as U_I18N_API for tests 147 class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { 148 public: 149 AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state 150 151 static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, 152 AffixTokenMatcherWarehouse& warehouse, 153 parse_flags_t parseFlags, bool* success, 154 UErrorCode& status); 155 156 UnicodeString getPattern() const; 157 158 bool operator==(const AffixPatternMatcher& other) const; 159 160 private: 161 CompactUnicodeString<4> fPattern; 162 163 AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern); 164 165 friend class AffixPatternMatcherBuilder; 166 }; 167 168 169 class AffixMatcher : public NumberParseMatcher, public UMemory { 170 public: 171 AffixMatcher() = default; // WARNING: Leaves the object in an unusable state 172 173 AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); 174 175 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 176 177 void postProcess(ParsedNumber& result) const override; 178 179 bool smokeTest(const StringSegment& segment) const override; 180 181 int8_t compareTo(const AffixMatcher& rhs) const; 182 183 UnicodeString toString() const override; 184 185 private: 186 AffixPatternMatcher* fPrefix; 187 AffixPatternMatcher* fSuffix; 188 result_flags_t fFlags; 189 }; 190 191 192 /** 193 * A C++-only class to retain ownership of the AffixMatchers needed for parsing. 194 */ 195 class AffixMatcherWarehouse { 196 public: 197 AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 198 199 AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); 200 201 void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, 202 const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, 203 UErrorCode& status); 204 205 private: 206 // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix 207 AffixMatcher fAffixMatchers[9]; 208 // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each 209 AffixPatternMatcher fAffixPatternMatchers[6]; 210 // Reference to the warehouse for tokens used by the AffixPatternMatchers 211 AffixTokenMatcherWarehouse* fTokenWarehouse; 212 213 friend class AffixMatcher; 214 215 static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, 216 parse_flags_t parseFlags, UErrorCode& status); 217 }; 218 219 220 } // namespace impl 221 } // namespace numparse 222 U_NAMESPACE_END 223 224 #endif //__NUMPARSE_AFFIXES_H__ 225 #endif /* #if !UCONFIG_NO_FORMATTING */ 226