1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURRULE_IMPL.H 10 * 11 ******************************************************************************* 12 */ 13 14 15 #ifndef PLURRULE_IMPL 16 #define PLURRULE_IMPL 17 18 // Internal definitions for the PluralRules implementation. 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/format.h" 25 #include "unicode/locid.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/strenum.h" 28 #include "unicode/ures.h" 29 #include "uvector.h" 30 #include "hash.h" 31 #include "uassert.h" 32 33 class PluralRulesTest; 34 35 U_NAMESPACE_BEGIN 36 37 class AndConstraint; 38 class RuleChain; 39 class DigitInterval; 40 class PluralRules; 41 class VisibleDigits; 42 43 namespace pluralimpl { 44 45 // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. 46 47 static const UChar DOT = ((UChar) 0x002E); 48 static const UChar SINGLE_QUOTE = ((UChar) 0x0027); 49 static const UChar SLASH = ((UChar) 0x002F); 50 static const UChar BACKSLASH = ((UChar) 0x005C); 51 static const UChar SPACE = ((UChar) 0x0020); 52 static const UChar EXCLAMATION = ((UChar) 0x0021); 53 static const UChar QUOTATION_MARK = ((UChar) 0x0022); 54 static const UChar NUMBER_SIGN = ((UChar) 0x0023); 55 static const UChar PERCENT_SIGN = ((UChar) 0x0025); 56 static const UChar ASTERISK = ((UChar) 0x002A); 57 static const UChar COMMA = ((UChar) 0x002C); 58 static const UChar HYPHEN = ((UChar) 0x002D); 59 static const UChar U_ZERO = ((UChar) 0x0030); 60 static const UChar U_ONE = ((UChar) 0x0031); 61 static const UChar U_TWO = ((UChar) 0x0032); 62 static const UChar U_THREE = ((UChar) 0x0033); 63 static const UChar U_FOUR = ((UChar) 0x0034); 64 static const UChar U_FIVE = ((UChar) 0x0035); 65 static const UChar U_SIX = ((UChar) 0x0036); 66 static const UChar U_SEVEN = ((UChar) 0x0037); 67 static const UChar U_EIGHT = ((UChar) 0x0038); 68 static const UChar U_NINE = ((UChar) 0x0039); 69 static const UChar COLON = ((UChar) 0x003A); 70 static const UChar SEMI_COLON = ((UChar) 0x003B); 71 static const UChar EQUALS = ((UChar) 0x003D); 72 static const UChar AT = ((UChar) 0x0040); 73 static const UChar CAP_A = ((UChar) 0x0041); 74 static const UChar CAP_B = ((UChar) 0x0042); 75 static const UChar CAP_R = ((UChar) 0x0052); 76 static const UChar CAP_Z = ((UChar) 0x005A); 77 static const UChar LOWLINE = ((UChar) 0x005F); 78 static const UChar LEFTBRACE = ((UChar) 0x007B); 79 static const UChar RIGHTBRACE = ((UChar) 0x007D); 80 static const UChar TILDE = ((UChar) 0x007E); 81 static const UChar ELLIPSIS = ((UChar) 0x2026); 82 83 static const UChar LOW_A = ((UChar) 0x0061); 84 static const UChar LOW_B = ((UChar) 0x0062); 85 static const UChar LOW_C = ((UChar) 0x0063); 86 static const UChar LOW_D = ((UChar) 0x0064); 87 static const UChar LOW_E = ((UChar) 0x0065); 88 static const UChar LOW_F = ((UChar) 0x0066); 89 static const UChar LOW_G = ((UChar) 0x0067); 90 static const UChar LOW_H = ((UChar) 0x0068); 91 static const UChar LOW_I = ((UChar) 0x0069); 92 static const UChar LOW_J = ((UChar) 0x006a); 93 static const UChar LOW_K = ((UChar) 0x006B); 94 static const UChar LOW_L = ((UChar) 0x006C); 95 static const UChar LOW_M = ((UChar) 0x006D); 96 static const UChar LOW_N = ((UChar) 0x006E); 97 static const UChar LOW_O = ((UChar) 0x006F); 98 static const UChar LOW_P = ((UChar) 0x0070); 99 static const UChar LOW_Q = ((UChar) 0x0071); 100 static const UChar LOW_R = ((UChar) 0x0072); 101 static const UChar LOW_S = ((UChar) 0x0073); 102 static const UChar LOW_T = ((UChar) 0x0074); 103 static const UChar LOW_U = ((UChar) 0x0075); 104 static const UChar LOW_V = ((UChar) 0x0076); 105 static const UChar LOW_W = ((UChar) 0x0077); 106 static const UChar LOW_Y = ((UChar) 0x0079); 107 static const UChar LOW_Z = ((UChar) 0x007A); 108 109 } 110 111 112 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 113 114 enum tokenType { 115 none, 116 tNumber, 117 tComma, 118 tSemiColon, 119 tSpace, 120 tColon, 121 tAt, // '@' 122 tDot, 123 tDot2, 124 tEllipsis, 125 tKeyword, 126 tAnd, 127 tOr, 128 tMod, // 'mod' or '%' 129 tNot, // 'not' only. 130 tIn, // 'in' only. 131 tEqual, // '=' only. 132 tNotEqual, // '!=' 133 tTilde, 134 tWithin, 135 tIs, 136 tVariableN, 137 tVariableI, 138 tVariableF, 139 tVariableV, 140 tVariableT, 141 tDecimal, 142 tInteger, 143 tEOF 144 }; 145 146 147 class PluralRuleParser: public UMemory { 148 public: 149 PluralRuleParser(); 150 virtual ~PluralRuleParser(); 151 152 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 153 void getNextToken(UErrorCode &status); 154 void checkSyntax(UErrorCode &status); 155 static int32_t getNumberValue(const UnicodeString &token); 156 157 private: 158 static tokenType getKeyType(const UnicodeString& token, tokenType type); 159 static tokenType charType(UChar ch); 160 static UBool isValidKeyword(const UnicodeString& token); 161 162 const UnicodeString *ruleSrc; // The rules string. 163 int32_t ruleIndex; // String index in the input rules, the current parse position. 164 UnicodeString token; // Token most recently scanned. 165 tokenType type; 166 tokenType prevType; 167 168 // The items currently being parsed & built. 169 // Note: currentChain may not be the last RuleChain in the 170 // list because the "other" chain is forced to the end. 171 AndConstraint *curAndConstraint; 172 RuleChain *currentChain; 173 174 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 175 int32_t rangeHiIdx; // low and hi values currently being parsed. 176 177 enum EParseState { 178 kKeyword, 179 kExpr, 180 kValue, 181 kRangeList, 182 kSamples 183 }; 184 }; 185 186 enum PluralOperand { 187 /** 188 * The double value of the entire number. 189 */ 190 PLURAL_OPERAND_N, 191 192 /** 193 * The integer value, with the fraction digits truncated off. 194 */ 195 PLURAL_OPERAND_I, 196 197 /** 198 * All visible fraction digits as an integer, including trailing zeros. 199 */ 200 PLURAL_OPERAND_F, 201 202 /** 203 * Visible fraction digits as an integer, not including trailing zeros. 204 */ 205 PLURAL_OPERAND_T, 206 207 /** 208 * Number of visible fraction digits. 209 */ 210 PLURAL_OPERAND_V, 211 212 /** 213 * Number of visible fraction digits, not including trailing zeros. 214 */ 215 PLURAL_OPERAND_W, 216 217 /** 218 * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. 219 * 220 * <p>Returns the integer value, but will fail if the number has fraction digits. 221 * That is, using "j" instead of "i" is like implicitly adding "v is 0". 222 * 223 * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches 224 * "3" but not "3.1" or "3.0". 225 */ 226 PLURAL_OPERAND_J 227 }; 228 229 /** 230 * Converts from the tokenType enum to PluralOperand. Asserts that the given 231 * tokenType can be mapped to a PluralOperand. 232 */ 233 PluralOperand tokenTypeToPluralOperand(tokenType tt); 234 235 /** 236 * An interface to FixedDecimal, allowing for other implementations. 237 * @internal 238 */ 239 class U_I18N_API IFixedDecimal { 240 public: 241 virtual ~IFixedDecimal(); 242 243 /** 244 * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). 245 * If the operand is 'n', returns a double; otherwise, returns an integer. 246 */ 247 virtual double getPluralOperand(PluralOperand operand) const = 0; 248 249 virtual bool isNaN() const = 0; 250 251 virtual bool isInfinite() const = 0; 252 253 /** Whether the number has no nonzero fraction digits. */ 254 virtual bool hasIntegerValue() const = 0; 255 }; 256 257 /** 258 * class FixedDecimal serves to communicate the properties 259 * of a formatted number from a decimal formatter to PluralRules::select() 260 * 261 * see DecimalFormat::getFixedDecimal() 262 * @internal 263 */ 264 class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { 265 public: 266 /** 267 * @param n the number, e.g. 12.345 268 * @param v The number of visible fraction digits, e.g. 3 269 * @param f The fraction digits, e.g. 345 270 */ 271 FixedDecimal(double n, int32_t v, int64_t f); 272 FixedDecimal(double n, int32_t); 273 explicit FixedDecimal(double n); 274 FixedDecimal(); 275 ~FixedDecimal() U_OVERRIDE; 276 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 277 FixedDecimal(const FixedDecimal &other); 278 279 double getPluralOperand(PluralOperand operand) const U_OVERRIDE; 280 bool isNaN() const U_OVERRIDE; 281 bool isInfinite() const U_OVERRIDE; 282 bool hasIntegerValue() const U_OVERRIDE; 283 284 bool isNanOrInfinity() const; // used in decimfmtimpl.cpp 285 286 int32_t getVisibleFractionDigitCount() const; 287 288 void init(double n, int32_t v, int64_t f); 289 void init(double n); 290 UBool quickInit(double n); // Try a fast-path only initialization, 291 // return TRUE if successful. 292 void adjustForMinFractionDigits(int32_t min); 293 static int64_t getFractionalDigits(double n, int32_t v); 294 static int32_t decimals(double n); 295 296 double source; 297 int32_t visibleDecimalDigitCount; 298 int64_t decimalDigits; 299 int64_t decimalDigitsWithoutTrailingZeros; 300 int64_t intValue; 301 UBool _hasIntegerValue; 302 UBool isNegative; 303 UBool _isNaN; 304 UBool _isInfinite; 305 }; 306 307 class AndConstraint : public UMemory { 308 public: 309 typedef enum RuleOp { 310 NONE, 311 MOD 312 } RuleOp; 313 RuleOp op = AndConstraint::NONE; 314 int32_t opNum = -1; // for mod expressions, the right operand of the mod. 315 int32_t value = -1; // valid for 'is' rules only. 316 UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. 317 UBool negated = FALSE; // TRUE for negated rules. 318 UBool integerOnly = FALSE; // TRUE for 'within' rules. 319 tokenType digitsType = none; // n | i | v | f constraint. 320 AndConstraint *next = nullptr; 321 // Internal error status, used for errors that occur during the copy constructor. 322 UErrorCode fInternalStatus = U_ZERO_ERROR; 323 324 AndConstraint() = default; 325 AndConstraint(const AndConstraint& other); 326 virtual ~AndConstraint(); 327 AndConstraint* add(UErrorCode& status); 328 // UBool isFulfilled(double number); 329 UBool isFulfilled(const IFixedDecimal &number); 330 }; 331 332 class OrConstraint : public UMemory { 333 public: 334 AndConstraint *childNode = nullptr; 335 OrConstraint *next = nullptr; 336 // Internal error status, used for errors that occur during the copy constructor. 337 UErrorCode fInternalStatus = U_ZERO_ERROR; 338 339 OrConstraint() = default; 340 OrConstraint(const OrConstraint& other); 341 virtual ~OrConstraint(); 342 AndConstraint* add(UErrorCode& status); 343 // UBool isFulfilled(double number); 344 UBool isFulfilled(const IFixedDecimal &number); 345 }; 346 347 class RuleChain : public UMemory { 348 public: 349 UnicodeString fKeyword; 350 RuleChain *fNext = nullptr; 351 OrConstraint *ruleHeader = nullptr; 352 UnicodeString fDecimalSamples; // Samples strings from rule source 353 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 354 UBool fDecimalSamplesUnbounded = FALSE; 355 UBool fIntegerSamplesUnbounded = FALSE; 356 // Internal error status, used for errors that occur during the copy constructor. 357 UErrorCode fInternalStatus = U_ZERO_ERROR; 358 359 RuleChain() = default; 360 RuleChain(const RuleChain& other); 361 virtual ~RuleChain(); 362 363 UnicodeString select(const IFixedDecimal &number) const; 364 void dumpRules(UnicodeString& result); 365 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 366 UBool isKeyword(const UnicodeString& keyword) const; 367 }; 368 369 class PluralKeywordEnumeration : public StringEnumeration { 370 public: 371 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 372 virtual ~PluralKeywordEnumeration(); 373 static UClassID U_EXPORT2 getStaticClassID(void); 374 virtual UClassID getDynamicClassID(void) const; 375 virtual const UnicodeString* snext(UErrorCode& status); 376 virtual void reset(UErrorCode& status); 377 virtual int32_t count(UErrorCode& status) const; 378 private: 379 int32_t pos; 380 UVector fKeywordNames; 381 }; 382 383 384 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 385 public: 386 PluralAvailableLocalesEnumeration(UErrorCode &status); 387 virtual ~PluralAvailableLocalesEnumeration(); 388 virtual const char* next(int32_t *resultLength, UErrorCode& status); 389 virtual void reset(UErrorCode& status); 390 virtual int32_t count(UErrorCode& status) const; 391 private: 392 UErrorCode fOpenStatus; 393 UResourceBundle *fLocales = nullptr; 394 UResourceBundle *fRes = nullptr; 395 }; 396 397 U_NAMESPACE_END 398 399 #endif /* #if !UCONFIG_NO_FORMATTING */ 400 401 #endif // _PLURRULE_IMPL 402 //eof 403