1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 #ifndef __NUMPARSE_AFFIXES_H__ 8 #define __NUMPARSE_AFFIXES_H__ 9 10 #include "cmemory.h" 11 12 #include "numparse_types.h" 13 #include "numparse_symbols.h" 14 #include "numparse_currency.h" 15 #include "number_affixutils.h" 16 #include "number_currencysymbols.h" 17 18 U_NAMESPACE_BEGIN 19 namespace numparse { 20 namespace impl { 21 22 // Forward-declaration of implementation classes for friending 23 class AffixPatternMatcherBuilder; 24 class AffixPatternMatcher; 25 26 using ::icu::number::impl::AffixPatternProvider; 27 using ::icu::number::impl::TokenConsumer; 28 using ::icu::number::impl::CurrencySymbols; 29 30 31 class U_I18N_API CodePointMatcher : public NumberParseMatcher, public UMemory { 32 public: 33 CodePointMatcher() = default; // WARNING: Leaves the object in an unusable state 34 35 CodePointMatcher(UChar32 cp); 36 37 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 38 39 bool smokeTest(const StringSegment& segment) const override; 40 41 UnicodeString toString() const override; 42 43 private: 44 UChar32 fCp; 45 }; 46 47 } // namespace impl 48 } // namespace numparse 49 50 // Export a explicit template instantiations of MaybeStackArray, MemoryPool and CompactUnicodeString. 51 // When building DLLs for Windows this is required even though no direct access leaks out of the i18n library. 52 // (See digitlst.h, pluralaffix.h, datefmt.h, and others for similar examples.) 53 // Note: These need to be outside of the numparse::impl namespace, or Clang will generate a compile error. 54 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 55 template class U_I18N_API MaybeStackArray<numparse::impl::CodePointMatcher*, 8>; 56 template class U_I18N_API MaybeStackArray<UChar, 4>; 57 template class U_I18N_API MemoryPool<numparse::impl::CodePointMatcher, 8>; 58 template class U_I18N_API numparse::impl::CompactUnicodeString<4>; 59 #endif 60 61 namespace numparse { 62 namespace impl { 63 64 struct AffixTokenMatcherSetupData { 65 const CurrencySymbols& currencySymbols; 66 const DecimalFormatSymbols& dfs; 67 IgnorablesMatcher& ignorables; 68 const Locale& locale; 69 parse_flags_t parseFlags; 70 }; 71 72 73 /** 74 * Small helper class that generates matchers for individual tokens for AffixPatternMatcher. 75 * 76 * In Java, this is called AffixTokenMatcherFactory (a "factory"). However, in C++, it is called a 77 * "warehouse", because in addition to generating the matchers, it also retains ownership of them. The 78 * warehouse must stay in scope for the whole lifespan of the AffixPatternMatcher that uses matchers from 79 * the warehouse. 80 * 81 * @author sffc 82 */ 83 // Exported as U_I18N_API for tests 84 class U_I18N_API AffixTokenMatcherWarehouse : public UMemory { 85 public: 86 AffixTokenMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 87 88 AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData); 89 90 NumberParseMatcher& minusSign(); 91 92 NumberParseMatcher& plusSign(); 93 94 NumberParseMatcher& percent(); 95 96 NumberParseMatcher& permille(); 97 98 NumberParseMatcher& currency(UErrorCode& status); 99 100 IgnorablesMatcher& ignorables(); 101 102 NumberParseMatcher* nextCodePointMatcher(UChar32 cp, UErrorCode& status); 103 104 private: 105 // NOTE: The following field may be unsafe to access after construction is done! 106 const AffixTokenMatcherSetupData* fSetupData; 107 108 // NOTE: These are default-constructed and should not be used until initialized. 109 MinusSignMatcher fMinusSign; 110 PlusSignMatcher fPlusSign; 111 PercentMatcher fPercent; 112 PermilleMatcher fPermille; 113 CombinedCurrencyMatcher fCurrency; 114 115 // Use a child class for code point matchers, since it requires non-default operators. 116 MemoryPool<CodePointMatcher> fCodePoints; 117 118 friend class AffixPatternMatcherBuilder; 119 friend class AffixPatternMatcher; 120 }; 121 122 123 class AffixPatternMatcherBuilder : public TokenConsumer, public MutableMatcherCollection { 124 public: 125 AffixPatternMatcherBuilder(const UnicodeString& pattern, AffixTokenMatcherWarehouse& warehouse, 126 IgnorablesMatcher* ignorables); 127 128 void consumeToken(::icu::number::impl::AffixPatternType type, UChar32 cp, UErrorCode& status) override; 129 130 /** NOTE: You can build only once! */ 131 AffixPatternMatcher build(UErrorCode& status); 132 133 private: 134 ArraySeriesMatcher::MatcherArray fMatchers; 135 int32_t fMatchersLen; 136 int32_t fLastTypeOrCp; 137 138 const UnicodeString& fPattern; 139 AffixTokenMatcherWarehouse& fWarehouse; 140 IgnorablesMatcher* fIgnorables; 141 142 void addMatcher(NumberParseMatcher& matcher) override; 143 }; 144 145 146 // Exported as U_I18N_API for tests 147 class U_I18N_API AffixPatternMatcher : public ArraySeriesMatcher { 148 public: 149 AffixPatternMatcher() = default; // WARNING: Leaves the object in an unusable state 150 151 static AffixPatternMatcher fromAffixPattern(const UnicodeString& affixPattern, 152 AffixTokenMatcherWarehouse& warehouse, 153 parse_flags_t parseFlags, bool* success, 154 UErrorCode& status); 155 156 UnicodeString getPattern() const; 157 158 bool operator==(const AffixPatternMatcher& other) const; 159 160 private: 161 CompactUnicodeString<4> fPattern; 162 163 AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, const UnicodeString& pattern, 164 UErrorCode& status); 165 166 friend class AffixPatternMatcherBuilder; 167 }; 168 169 170 class AffixMatcher : public NumberParseMatcher, public UMemory { 171 public: 172 AffixMatcher() = default; // WARNING: Leaves the object in an unusable state 173 174 AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags); 175 176 bool match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const override; 177 178 void postProcess(ParsedNumber& result) const override; 179 180 bool smokeTest(const StringSegment& segment) const override; 181 182 int8_t compareTo(const AffixMatcher& rhs) const; 183 184 UnicodeString toString() const override; 185 186 private: 187 AffixPatternMatcher* fPrefix; 188 AffixPatternMatcher* fSuffix; 189 result_flags_t fFlags; 190 }; 191 192 193 /** 194 * A C++-only class to retain ownership of the AffixMatchers needed for parsing. 195 */ 196 class AffixMatcherWarehouse { 197 public: 198 AffixMatcherWarehouse() = default; // WARNING: Leaves the object in an unusable state 199 200 AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse); 201 202 void createAffixMatchers(const AffixPatternProvider& patternInfo, MutableMatcherCollection& output, 203 const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, 204 UErrorCode& status); 205 206 private: 207 // 9 is the limit: positive, zero, and negative, each with prefix, suffix, and prefix+suffix 208 AffixMatcher fAffixMatchers[9]; 209 // 6 is the limit: positive, zero, and negative, a prefix and a suffix for each 210 AffixPatternMatcher fAffixPatternMatchers[6]; 211 // Reference to the warehouse for tokens used by the AffixPatternMatchers 212 AffixTokenMatcherWarehouse* fTokenWarehouse; 213 214 friend class AffixMatcher; 215 216 static bool isInteresting(const AffixPatternProvider& patternInfo, const IgnorablesMatcher& ignorables, 217 parse_flags_t parseFlags, UErrorCode& status); 218 }; 219 220 221 } // namespace impl 222 } // namespace numparse 223 U_NAMESPACE_END 224 225 #endif //__NUMPARSE_AFFIXES_H__ 226 #endif /* #if !UCONFIG_NO_FORMATTING */ 227