1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURRULE_IMPL.H 10 * 11 ******************************************************************************* 12 */ 13 14 15 #ifndef PLURRULE_IMPL 16 #define PLURRULE_IMPL 17 18 // Internal definitions for the PluralRules implementation. 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/format.h" 25 #include "unicode/locid.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/ures.h" 28 #include "uvector.h" 29 #include "hash.h" 30 31 class PluralRulesTest; 32 33 U_NAMESPACE_BEGIN 34 35 class AndConstraint; 36 class RuleChain; 37 class DigitInterval; 38 class PluralRules; 39 class VisibleDigits; 40 41 static const UChar DOT = ((UChar)0x002E); 42 static const UChar SINGLE_QUOTE = ((UChar)0x0027); 43 static const UChar SLASH = ((UChar)0x002F); 44 static const UChar BACKSLASH = ((UChar)0x005C); 45 static const UChar SPACE = ((UChar)0x0020); 46 static const UChar EXCLAMATION = ((UChar)0x0021); 47 static const UChar QUOTATION_MARK = ((UChar)0x0022); 48 static const UChar NUMBER_SIGN = ((UChar)0x0023); 49 static const UChar PERCENT_SIGN = ((UChar)0x0025); 50 static const UChar ASTERISK = ((UChar)0x002A); 51 static const UChar COMMA = ((UChar)0x002C); 52 static const UChar HYPHEN = ((UChar)0x002D); 53 static const UChar U_ZERO = ((UChar)0x0030); 54 static const UChar U_ONE = ((UChar)0x0031); 55 static const UChar U_TWO = ((UChar)0x0032); 56 static const UChar U_THREE = ((UChar)0x0033); 57 static const UChar U_FOUR = ((UChar)0x0034); 58 static const UChar U_FIVE = ((UChar)0x0035); 59 static const UChar U_SIX = ((UChar)0x0036); 60 static const UChar U_SEVEN = ((UChar)0x0037); 61 static const UChar U_EIGHT = ((UChar)0x0038); 62 static const UChar U_NINE = ((UChar)0x0039); 63 static const UChar COLON = ((UChar)0x003A); 64 static const UChar SEMI_COLON = ((UChar)0x003B); 65 static const UChar EQUALS = ((UChar)0x003D); 66 static const UChar AT = ((UChar)0x0040); 67 static const UChar CAP_A = ((UChar)0x0041); 68 static const UChar CAP_B = ((UChar)0x0042); 69 static const UChar CAP_R = ((UChar)0x0052); 70 static const UChar CAP_Z = ((UChar)0x005A); 71 static const UChar LOWLINE = ((UChar)0x005F); 72 static const UChar LEFTBRACE = ((UChar)0x007B); 73 static const UChar RIGHTBRACE = ((UChar)0x007D); 74 static const UChar TILDE = ((UChar)0x007E); 75 static const UChar ELLIPSIS = ((UChar)0x2026); 76 77 static const UChar LOW_A = ((UChar)0x0061); 78 static const UChar LOW_B = ((UChar)0x0062); 79 static const UChar LOW_C = ((UChar)0x0063); 80 static const UChar LOW_D = ((UChar)0x0064); 81 static const UChar LOW_E = ((UChar)0x0065); 82 static const UChar LOW_F = ((UChar)0x0066); 83 static const UChar LOW_G = ((UChar)0x0067); 84 static const UChar LOW_H = ((UChar)0x0068); 85 static const UChar LOW_I = ((UChar)0x0069); 86 static const UChar LOW_J = ((UChar)0x006a); 87 static const UChar LOW_K = ((UChar)0x006B); 88 static const UChar LOW_L = ((UChar)0x006C); 89 static const UChar LOW_M = ((UChar)0x006D); 90 static const UChar LOW_N = ((UChar)0x006E); 91 static const UChar LOW_O = ((UChar)0x006F); 92 static const UChar LOW_P = ((UChar)0x0070); 93 static const UChar LOW_Q = ((UChar)0x0071); 94 static const UChar LOW_R = ((UChar)0x0072); 95 static const UChar LOW_S = ((UChar)0x0073); 96 static const UChar LOW_T = ((UChar)0x0074); 97 static const UChar LOW_U = ((UChar)0x0075); 98 static const UChar LOW_V = ((UChar)0x0076); 99 static const UChar LOW_W = ((UChar)0x0077); 100 static const UChar LOW_Y = ((UChar)0x0079); 101 static const UChar LOW_Z = ((UChar)0x007A); 102 103 104 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 105 106 enum tokenType { 107 none, 108 tNumber, 109 tComma, 110 tSemiColon, 111 tSpace, 112 tColon, 113 tAt, // '@' 114 tDot, 115 tDot2, 116 tEllipsis, 117 tKeyword, 118 tAnd, 119 tOr, 120 tMod, // 'mod' or '%' 121 tNot, // 'not' only. 122 tIn, // 'in' only. 123 tEqual, // '=' only. 124 tNotEqual, // '!=' 125 tTilde, 126 tWithin, 127 tIs, 128 tVariableN, 129 tVariableI, 130 tVariableF, 131 tVariableV, 132 tVariableT, 133 tDecimal, 134 tInteger, 135 tEOF 136 }; 137 138 139 class PluralRuleParser: public UMemory { 140 public: 141 PluralRuleParser(); 142 virtual ~PluralRuleParser(); 143 144 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 145 void getNextToken(UErrorCode &status); 146 void checkSyntax(UErrorCode &status); 147 static int32_t getNumberValue(const UnicodeString &token); 148 149 private: 150 static tokenType getKeyType(const UnicodeString& token, tokenType type); 151 static tokenType charType(UChar ch); 152 static UBool isValidKeyword(const UnicodeString& token); 153 154 const UnicodeString *ruleSrc; // The rules string. 155 int32_t ruleIndex; // String index in the input rules, the current parse position. 156 UnicodeString token; // Token most recently scanned. 157 tokenType type; 158 tokenType prevType; 159 160 // The items currently being parsed & built. 161 // Note: currentChain may not be the last RuleChain in the 162 // list because the "other" chain is forced to the end. 163 AndConstraint *curAndConstraint; 164 RuleChain *currentChain; 165 166 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 167 int32_t rangeHiIdx; // low and hi values currently being parsed. 168 169 enum EParseState { 170 kKeyword, 171 kExpr, 172 kValue, 173 kRangeList, 174 kSamples 175 }; 176 177 }; 178 179 /** 180 * class FixedDecimal serves to communicate the properties 181 * of a formatted number from a decimal formatter to PluralRules::select() 182 * 183 * see DecimalFormat::getFixedDecimal() 184 * @internal 185 */ 186 class U_I18N_API FixedDecimal: public UMemory { 187 public: 188 /** 189 * @param n the number, e.g. 12.345 190 * @param v The number of visible fraction digits, e.g. 3 191 * @param f The fraction digits, e.g. 345 192 */ 193 FixedDecimal(double n, int32_t v, int64_t f); 194 FixedDecimal(double n, int32_t); 195 explicit FixedDecimal(double n); 196 explicit FixedDecimal(const VisibleDigits &n); 197 FixedDecimal(); 198 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 199 FixedDecimal(const FixedDecimal &other); 200 201 double get(tokenType operand) const; 202 int32_t getVisibleFractionDigitCount() const; 203 204 void init(double n, int32_t v, int64_t f); 205 void init(double n); 206 UBool quickInit(double n); // Try a fast-path only initialization, 207 // return TRUE if successful. 208 void adjustForMinFractionDigits(int32_t min); 209 static int64_t getFractionalDigits(double n, int32_t v); 210 static int32_t decimals(double n); 211 212 double source; 213 int32_t visibleDecimalDigitCount; 214 int64_t decimalDigits; 215 int64_t decimalDigitsWithoutTrailingZeros; 216 int64_t intValue; 217 UBool hasIntegerValue; 218 UBool isNegative; 219 UBool isNanOrInfinity; 220 }; 221 222 class AndConstraint : public UMemory { 223 public: 224 typedef enum RuleOp { 225 NONE, 226 MOD 227 } RuleOp; 228 RuleOp op; 229 int32_t opNum; // for mod expressions, the right operand of the mod. 230 int32_t value; // valid for 'is' rules only. 231 UVector32 *rangeList; // for 'in', 'within' rules. Null otherwise. 232 UBool negated; // TRUE for negated rules. 233 UBool integerOnly; // TRUE for 'within' rules. 234 tokenType digitsType; // n | i | v | f constraint. 235 AndConstraint *next; 236 237 AndConstraint(); 238 AndConstraint(const AndConstraint& other); 239 virtual ~AndConstraint(); 240 AndConstraint* add(); 241 // UBool isFulfilled(double number); 242 UBool isFulfilled(const FixedDecimal &number); 243 }; 244 245 class OrConstraint : public UMemory { 246 public: 247 AndConstraint *childNode; 248 OrConstraint *next; 249 OrConstraint(); 250 251 OrConstraint(const OrConstraint& other); 252 virtual ~OrConstraint(); 253 AndConstraint* add(); 254 // UBool isFulfilled(double number); 255 UBool isFulfilled(const FixedDecimal &number); 256 }; 257 258 class RuleChain : public UMemory { 259 public: 260 UnicodeString fKeyword; 261 RuleChain *fNext; 262 OrConstraint *ruleHeader; 263 UnicodeString fDecimalSamples; // Samples strings from rule source 264 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 265 UBool fDecimalSamplesUnbounded; 266 UBool fIntegerSamplesUnbounded; 267 268 269 RuleChain(); 270 RuleChain(const RuleChain& other); 271 virtual ~RuleChain(); 272 273 UnicodeString select(const FixedDecimal &number) const; 274 void dumpRules(UnicodeString& result); 275 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 276 UBool isKeyword(const UnicodeString& keyword) const; 277 }; 278 279 class PluralKeywordEnumeration : public StringEnumeration { 280 public: 281 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 282 virtual ~PluralKeywordEnumeration(); 283 static UClassID U_EXPORT2 getStaticClassID(void); 284 virtual UClassID getDynamicClassID(void) const; 285 virtual const UnicodeString* snext(UErrorCode& status); 286 virtual void reset(UErrorCode& status); 287 virtual int32_t count(UErrorCode& status) const; 288 private: 289 int32_t pos; 290 UVector fKeywordNames; 291 }; 292 293 294 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 295 public: 296 PluralAvailableLocalesEnumeration(UErrorCode &status); 297 virtual ~PluralAvailableLocalesEnumeration(); 298 virtual const char* next(int32_t *resultLength, UErrorCode& status); 299 virtual void reset(UErrorCode& status); 300 virtual int32_t count(UErrorCode& status) const; 301 private: 302 UErrorCode fOpenStatus; 303 UResourceBundle *fLocales; 304 UResourceBundle *fRes; 305 }; 306 307 U_NAMESPACE_END 308 309 #endif /* #if !UCONFIG_NO_FORMATTING */ 310 311 #endif // _PLURRULE_IMPL 312 //eof 313