1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURRULE_IMPL.H 10 * 11 ******************************************************************************* 12 */ 13 14 15 #ifndef PLURRULE_IMPL 16 #define PLURRULE_IMPL 17 18 // Internal definitions for the PluralRules implementation. 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/format.h" 25 #include "unicode/locid.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/strenum.h" 28 #include "unicode/ures.h" 29 #include "uvector.h" 30 #include "hash.h" 31 #include "uassert.h" 32 33 /** 34 * A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest 35 * for parsing of samples. 36 */ 37 #define UPLRULES_NO_UNIQUE_VALUE_DECIMAL (FixedDecimal((double)-0.00123456777)) 38 39 class PluralRulesTest; 40 41 U_NAMESPACE_BEGIN 42 43 class AndConstraint; 44 class RuleChain; 45 class DigitInterval; 46 class PluralRules; 47 class VisibleDigits; 48 49 namespace pluralimpl { 50 51 // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. 52 53 static const UChar DOT = ((UChar) 0x002E); 54 static const UChar SINGLE_QUOTE = ((UChar) 0x0027); 55 static const UChar SLASH = ((UChar) 0x002F); 56 static const UChar BACKSLASH = ((UChar) 0x005C); 57 static const UChar SPACE = ((UChar) 0x0020); 58 static const UChar EXCLAMATION = ((UChar) 0x0021); 59 static const UChar QUOTATION_MARK = ((UChar) 0x0022); 60 static const UChar NUMBER_SIGN = ((UChar) 0x0023); 61 static const UChar PERCENT_SIGN = ((UChar) 0x0025); 62 static const UChar ASTERISK = ((UChar) 0x002A); 63 static const UChar COMMA = ((UChar) 0x002C); 64 static const UChar HYPHEN = ((UChar) 0x002D); 65 static const UChar U_ZERO = ((UChar) 0x0030); 66 static const UChar U_ONE = ((UChar) 0x0031); 67 static const UChar U_TWO = ((UChar) 0x0032); 68 static const UChar U_THREE = ((UChar) 0x0033); 69 static const UChar U_FOUR = ((UChar) 0x0034); 70 static const UChar U_FIVE = ((UChar) 0x0035); 71 static const UChar U_SIX = ((UChar) 0x0036); 72 static const UChar U_SEVEN = ((UChar) 0x0037); 73 static const UChar U_EIGHT = ((UChar) 0x0038); 74 static const UChar U_NINE = ((UChar) 0x0039); 75 static const UChar COLON = ((UChar) 0x003A); 76 static const UChar SEMI_COLON = ((UChar) 0x003B); 77 static const UChar EQUALS = ((UChar) 0x003D); 78 static const UChar AT = ((UChar) 0x0040); 79 static const UChar CAP_A = ((UChar) 0x0041); 80 static const UChar CAP_B = ((UChar) 0x0042); 81 static const UChar CAP_R = ((UChar) 0x0052); 82 static const UChar CAP_Z = ((UChar) 0x005A); 83 static const UChar LOWLINE = ((UChar) 0x005F); 84 static const UChar LEFTBRACE = ((UChar) 0x007B); 85 static const UChar RIGHTBRACE = ((UChar) 0x007D); 86 static const UChar TILDE = ((UChar) 0x007E); 87 static const UChar ELLIPSIS = ((UChar) 0x2026); 88 89 static const UChar LOW_A = ((UChar) 0x0061); 90 static const UChar LOW_B = ((UChar) 0x0062); 91 static const UChar LOW_C = ((UChar) 0x0063); 92 static const UChar LOW_D = ((UChar) 0x0064); 93 static const UChar LOW_E = ((UChar) 0x0065); 94 static const UChar LOW_F = ((UChar) 0x0066); 95 static const UChar LOW_G = ((UChar) 0x0067); 96 static const UChar LOW_H = ((UChar) 0x0068); 97 static const UChar LOW_I = ((UChar) 0x0069); 98 static const UChar LOW_J = ((UChar) 0x006a); 99 static const UChar LOW_K = ((UChar) 0x006B); 100 static const UChar LOW_L = ((UChar) 0x006C); 101 static const UChar LOW_M = ((UChar) 0x006D); 102 static const UChar LOW_N = ((UChar) 0x006E); 103 static const UChar LOW_O = ((UChar) 0x006F); 104 static const UChar LOW_P = ((UChar) 0x0070); 105 static const UChar LOW_Q = ((UChar) 0x0071); 106 static const UChar LOW_R = ((UChar) 0x0072); 107 static const UChar LOW_S = ((UChar) 0x0073); 108 static const UChar LOW_T = ((UChar) 0x0074); 109 static const UChar LOW_U = ((UChar) 0x0075); 110 static const UChar LOW_V = ((UChar) 0x0076); 111 static const UChar LOW_W = ((UChar) 0x0077); 112 static const UChar LOW_Y = ((UChar) 0x0079); 113 static const UChar LOW_Z = ((UChar) 0x007A); 114 115 } 116 117 118 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 119 120 enum tokenType { 121 none, 122 tNumber, 123 tComma, 124 tSemiColon, 125 tSpace, 126 tColon, 127 tAt, // '@' 128 tDot, 129 tDot2, 130 tEllipsis, 131 tKeyword, 132 tAnd, 133 tOr, 134 tMod, // 'mod' or '%' 135 tNot, // 'not' only. 136 tIn, // 'in' only. 137 tEqual, // '=' only. 138 tNotEqual, // '!=' 139 tTilde, 140 tWithin, 141 tIs, 142 tVariableN, 143 tVariableI, 144 tVariableF, 145 tVariableV, 146 tVariableT, 147 tVariableE, 148 tVariableC, 149 tDecimal, 150 tInteger, 151 tEOF 152 }; 153 154 155 class PluralRuleParser: public UMemory { 156 public: 157 PluralRuleParser(); 158 virtual ~PluralRuleParser(); 159 160 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 161 void getNextToken(UErrorCode &status); 162 void checkSyntax(UErrorCode &status); 163 static int32_t getNumberValue(const UnicodeString &token); 164 165 private: 166 static tokenType getKeyType(const UnicodeString& token, tokenType type); 167 static tokenType charType(UChar ch); 168 static UBool isValidKeyword(const UnicodeString& token); 169 170 const UnicodeString *ruleSrc; // The rules string. 171 int32_t ruleIndex; // String index in the input rules, the current parse position. 172 UnicodeString token; // Token most recently scanned. 173 tokenType type; 174 tokenType prevType; 175 176 // The items currently being parsed & built. 177 // Note: currentChain may not be the last RuleChain in the 178 // list because the "other" chain is forced to the end. 179 AndConstraint *curAndConstraint; 180 RuleChain *currentChain; 181 182 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 183 int32_t rangeHiIdx; // low and hi values currently being parsed. 184 185 enum EParseState { 186 kKeyword, 187 kExpr, 188 kValue, 189 kRangeList, 190 kSamples 191 }; 192 }; 193 194 enum PluralOperand { 195 /** 196 * The double value of the entire number. 197 */ 198 PLURAL_OPERAND_N, 199 200 /** 201 * The integer value, with the fraction digits truncated off. 202 */ 203 PLURAL_OPERAND_I, 204 205 /** 206 * All visible fraction digits as an integer, including trailing zeros. 207 */ 208 PLURAL_OPERAND_F, 209 210 /** 211 * Visible fraction digits as an integer, not including trailing zeros. 212 */ 213 PLURAL_OPERAND_T, 214 215 /** 216 * Number of visible fraction digits. 217 */ 218 PLURAL_OPERAND_V, 219 220 /** 221 * Number of visible fraction digits, not including trailing zeros. 222 */ 223 PLURAL_OPERAND_W, 224 225 /** 226 * Suppressed exponent for scientific notation (exponent needed in 227 * scientific notation to approximate i). 228 */ 229 PLURAL_OPERAND_E, 230 231 /** 232 * This operand is currently treated as an alias for `PLURAL_OPERAND_E`. 233 * In the future, it will represent: 234 * 235 * Suppressed exponent for compact notation (exponent needed in 236 * compact notation to approximate i). 237 */ 238 PLURAL_OPERAND_C, 239 240 /** 241 * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. 242 * 243 * <p>Returns the integer value, but will fail if the number has fraction digits. 244 * That is, using "j" instead of "i" is like implicitly adding "v is 0". 245 * 246 * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches 247 * "3" but not "3.1" or "3.0". 248 */ 249 PLURAL_OPERAND_J 250 }; 251 252 /** 253 * Converts from the tokenType enum to PluralOperand. Asserts that the given 254 * tokenType can be mapped to a PluralOperand. 255 */ 256 PluralOperand tokenTypeToPluralOperand(tokenType tt); 257 258 /** 259 * An interface to FixedDecimal, allowing for other implementations. 260 * @internal 261 */ 262 class U_I18N_API IFixedDecimal { 263 public: 264 virtual ~IFixedDecimal(); 265 266 /** 267 * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). 268 * If the operand is 'n', returns a double; otherwise, returns an integer. 269 */ 270 virtual double getPluralOperand(PluralOperand operand) const = 0; 271 272 virtual bool isNaN() const = 0; 273 274 virtual bool isInfinite() const = 0; 275 276 /** Whether the number has no nonzero fraction digits. */ 277 virtual bool hasIntegerValue() const = 0; 278 }; 279 280 /** 281 * class FixedDecimal serves to communicate the properties 282 * of a formatted number from a decimal formatter to PluralRules::select() 283 * 284 * see DecimalFormat::getFixedDecimal() 285 * @internal 286 */ 287 class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { 288 public: 289 /** 290 * @param n the number, e.g. 12.345 291 * @param v The number of visible fraction digits, e.g. 3 292 * @param f The fraction digits, e.g. 345 293 * @param e The exponent, e.g. 7 in 1.2e7, for scientific notation 294 * @param c Currently: an alias for param `e`. 295 */ 296 FixedDecimal(double n, int32_t v, int64_t f, int32_t e, int32_t c); 297 FixedDecimal(double n, int32_t v, int64_t f, int32_t e); 298 FixedDecimal(double n, int32_t v, int64_t f); 299 FixedDecimal(double n, int32_t); 300 explicit FixedDecimal(double n); 301 FixedDecimal(); 302 ~FixedDecimal() U_OVERRIDE; 303 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 304 FixedDecimal(const FixedDecimal &other); 305 306 static FixedDecimal createWithExponent(double n, int32_t v, int32_t e); 307 308 double getPluralOperand(PluralOperand operand) const U_OVERRIDE; 309 bool isNaN() const U_OVERRIDE; 310 bool isInfinite() const U_OVERRIDE; 311 bool hasIntegerValue() const U_OVERRIDE; 312 313 bool isNanOrInfinity() const; // used in decimfmtimpl.cpp 314 315 int32_t getVisibleFractionDigitCount() const; 316 317 void init(double n, int32_t v, int64_t f, int32_t e, int32_t c); 318 void init(double n, int32_t v, int64_t f, int32_t e); 319 void init(double n, int32_t v, int64_t f); 320 void init(double n); 321 UBool quickInit(double n); // Try a fast-path only initialization, 322 // return true if successful. 323 void adjustForMinFractionDigits(int32_t min); 324 static int64_t getFractionalDigits(double n, int32_t v); 325 static int32_t decimals(double n); 326 327 FixedDecimal& operator=(const FixedDecimal& other) = default; 328 bool operator==(const FixedDecimal &other) const; 329 330 UnicodeString toString() const; 331 332 double doubleValue() const; 333 int64_t longValue() const; 334 335 double source; 336 int32_t visibleDecimalDigitCount; 337 int64_t decimalDigits; 338 int64_t decimalDigitsWithoutTrailingZeros; 339 int64_t intValue; 340 int32_t exponent; 341 UBool _hasIntegerValue; 342 UBool isNegative; 343 UBool _isNaN; 344 UBool _isInfinite; 345 }; 346 347 class AndConstraint : public UMemory { 348 public: 349 typedef enum RuleOp { 350 NONE, 351 MOD 352 } RuleOp; 353 RuleOp op = AndConstraint::NONE; 354 int32_t opNum = -1; // for mod expressions, the right operand of the mod. 355 int32_t value = -1; // valid for 'is' rules only. 356 UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. 357 UBool negated = false; // true for negated rules. 358 UBool integerOnly = false; // true for 'within' rules. 359 tokenType digitsType = none; // n | i | v | f constraint. 360 AndConstraint *next = nullptr; 361 // Internal error status, used for errors that occur during the copy constructor. 362 UErrorCode fInternalStatus = U_ZERO_ERROR; 363 364 AndConstraint() = default; 365 AndConstraint(const AndConstraint& other); 366 virtual ~AndConstraint(); 367 AndConstraint* add(UErrorCode& status); 368 // UBool isFulfilled(double number); 369 UBool isFulfilled(const IFixedDecimal &number); 370 }; 371 372 class OrConstraint : public UMemory { 373 public: 374 AndConstraint *childNode = nullptr; 375 OrConstraint *next = nullptr; 376 // Internal error status, used for errors that occur during the copy constructor. 377 UErrorCode fInternalStatus = U_ZERO_ERROR; 378 379 OrConstraint() = default; 380 OrConstraint(const OrConstraint& other); 381 virtual ~OrConstraint(); 382 AndConstraint* add(UErrorCode& status); 383 // UBool isFulfilled(double number); 384 UBool isFulfilled(const IFixedDecimal &number); 385 }; 386 387 class RuleChain : public UMemory { 388 public: 389 UnicodeString fKeyword; 390 RuleChain *fNext = nullptr; 391 OrConstraint *ruleHeader = nullptr; 392 UnicodeString fDecimalSamples; // Samples strings from rule source 393 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 394 UBool fDecimalSamplesUnbounded = false; 395 UBool fIntegerSamplesUnbounded = false; 396 // Internal error status, used for errors that occur during the copy constructor. 397 UErrorCode fInternalStatus = U_ZERO_ERROR; 398 399 RuleChain() = default; 400 RuleChain(const RuleChain& other); 401 virtual ~RuleChain(); 402 403 UnicodeString select(const IFixedDecimal &number) const; 404 void dumpRules(UnicodeString& result); 405 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 406 UBool isKeyword(const UnicodeString& keyword) const; 407 }; 408 409 class PluralKeywordEnumeration : public StringEnumeration { 410 public: 411 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 412 virtual ~PluralKeywordEnumeration(); 413 static UClassID U_EXPORT2 getStaticClassID(void); 414 virtual UClassID getDynamicClassID(void) const override; 415 virtual const UnicodeString* snext(UErrorCode& status) override; 416 virtual void reset(UErrorCode& status) override; 417 virtual int32_t count(UErrorCode& status) const override; 418 private: 419 int32_t pos; 420 UVector fKeywordNames; 421 }; 422 423 424 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 425 public: 426 PluralAvailableLocalesEnumeration(UErrorCode &status); 427 virtual ~PluralAvailableLocalesEnumeration(); 428 virtual const char* next(int32_t *resultLength, UErrorCode& status) override; 429 virtual void reset(UErrorCode& status) override; 430 virtual int32_t count(UErrorCode& status) const override; 431 private: 432 UErrorCode fOpenStatus; 433 UResourceBundle *fLocales = nullptr; 434 UResourceBundle *fRes = nullptr; 435 }; 436 437 U_NAMESPACE_END 438 439 #endif /* #if !UCONFIG_NO_FORMATTING */ 440 441 #endif // _PLURRULE_IMPL 442 //eof 443