1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURRULE_IMPL.H 10 * 11 ******************************************************************************* 12 */ 13 14 15 #ifndef PLURRULE_IMPL 16 #define PLURRULE_IMPL 17 18 // Internal definitions for the PluralRules implementation. 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/format.h" 25 #include "unicode/locid.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/strenum.h" 28 #include "unicode/ures.h" 29 #include "uvector.h" 30 #include "hash.h" 31 #include "uassert.h" 32 33 /** 34 * A FixedDecimal version of UPLRULES_NO_UNIQUE_VALUE used in PluralRulesTest 35 * for parsing of samples. 36 */ 37 #define UPLRULES_NO_UNIQUE_VALUE_DECIMAL (FixedDecimal((double)-0.00123456777)) 38 39 class PluralRulesTest; 40 41 U_NAMESPACE_BEGIN 42 43 class AndConstraint; 44 class RuleChain; 45 class DigitInterval; 46 class PluralRules; 47 class VisibleDigits; 48 49 namespace pluralimpl { 50 51 // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. 52 53 static const UChar DOT = ((UChar) 0x002E); 54 static const UChar SINGLE_QUOTE = ((UChar) 0x0027); 55 static const UChar SLASH = ((UChar) 0x002F); 56 static const UChar BACKSLASH = ((UChar) 0x005C); 57 static const UChar SPACE = ((UChar) 0x0020); 58 static const UChar EXCLAMATION = ((UChar) 0x0021); 59 static const UChar QUOTATION_MARK = ((UChar) 0x0022); 60 static const UChar NUMBER_SIGN = ((UChar) 0x0023); 61 static const UChar PERCENT_SIGN = ((UChar) 0x0025); 62 static const UChar ASTERISK = ((UChar) 0x002A); 63 static const UChar COMMA = ((UChar) 0x002C); 64 static const UChar HYPHEN = ((UChar) 0x002D); 65 static const UChar U_ZERO = ((UChar) 0x0030); 66 static const UChar U_ONE = ((UChar) 0x0031); 67 static const UChar U_TWO = ((UChar) 0x0032); 68 static const UChar U_THREE = ((UChar) 0x0033); 69 static const UChar U_FOUR = ((UChar) 0x0034); 70 static const UChar U_FIVE = ((UChar) 0x0035); 71 static const UChar U_SIX = ((UChar) 0x0036); 72 static const UChar U_SEVEN = ((UChar) 0x0037); 73 static const UChar U_EIGHT = ((UChar) 0x0038); 74 static const UChar U_NINE = ((UChar) 0x0039); 75 static const UChar COLON = ((UChar) 0x003A); 76 static const UChar SEMI_COLON = ((UChar) 0x003B); 77 static const UChar EQUALS = ((UChar) 0x003D); 78 static const UChar AT = ((UChar) 0x0040); 79 static const UChar CAP_A = ((UChar) 0x0041); 80 static const UChar CAP_B = ((UChar) 0x0042); 81 static const UChar CAP_R = ((UChar) 0x0052); 82 static const UChar CAP_Z = ((UChar) 0x005A); 83 static const UChar LOWLINE = ((UChar) 0x005F); 84 static const UChar LEFTBRACE = ((UChar) 0x007B); 85 static const UChar RIGHTBRACE = ((UChar) 0x007D); 86 static const UChar TILDE = ((UChar) 0x007E); 87 static const UChar ELLIPSIS = ((UChar) 0x2026); 88 89 static const UChar LOW_A = ((UChar) 0x0061); 90 static const UChar LOW_B = ((UChar) 0x0062); 91 static const UChar LOW_C = ((UChar) 0x0063); 92 static const UChar LOW_D = ((UChar) 0x0064); 93 static const UChar LOW_E = ((UChar) 0x0065); 94 static const UChar LOW_F = ((UChar) 0x0066); 95 static const UChar LOW_G = ((UChar) 0x0067); 96 static const UChar LOW_H = ((UChar) 0x0068); 97 static const UChar LOW_I = ((UChar) 0x0069); 98 static const UChar LOW_J = ((UChar) 0x006a); 99 static const UChar LOW_K = ((UChar) 0x006B); 100 static const UChar LOW_L = ((UChar) 0x006C); 101 static const UChar LOW_M = ((UChar) 0x006D); 102 static const UChar LOW_N = ((UChar) 0x006E); 103 static const UChar LOW_O = ((UChar) 0x006F); 104 static const UChar LOW_P = ((UChar) 0x0070); 105 static const UChar LOW_Q = ((UChar) 0x0071); 106 static const UChar LOW_R = ((UChar) 0x0072); 107 static const UChar LOW_S = ((UChar) 0x0073); 108 static const UChar LOW_T = ((UChar) 0x0074); 109 static const UChar LOW_U = ((UChar) 0x0075); 110 static const UChar LOW_V = ((UChar) 0x0076); 111 static const UChar LOW_W = ((UChar) 0x0077); 112 static const UChar LOW_Y = ((UChar) 0x0079); 113 static const UChar LOW_Z = ((UChar) 0x007A); 114 115 } 116 117 118 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 119 120 enum tokenType { 121 none, 122 tNumber, 123 tComma, 124 tSemiColon, 125 tSpace, 126 tColon, 127 tAt, // '@' 128 tDot, 129 tDot2, 130 tEllipsis, 131 tKeyword, 132 tAnd, 133 tOr, 134 tMod, // 'mod' or '%' 135 tNot, // 'not' only. 136 tIn, // 'in' only. 137 tEqual, // '=' only. 138 tNotEqual, // '!=' 139 tTilde, 140 tWithin, 141 tIs, 142 tVariableN, 143 tVariableI, 144 tVariableF, 145 tVariableV, 146 tVariableT, 147 tVariableE, 148 tDecimal, 149 tInteger, 150 tEOF 151 }; 152 153 154 class PluralRuleParser: public UMemory { 155 public: 156 PluralRuleParser(); 157 virtual ~PluralRuleParser(); 158 159 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 160 void getNextToken(UErrorCode &status); 161 void checkSyntax(UErrorCode &status); 162 static int32_t getNumberValue(const UnicodeString &token); 163 164 private: 165 static tokenType getKeyType(const UnicodeString& token, tokenType type); 166 static tokenType charType(UChar ch); 167 static UBool isValidKeyword(const UnicodeString& token); 168 169 const UnicodeString *ruleSrc; // The rules string. 170 int32_t ruleIndex; // String index in the input rules, the current parse position. 171 UnicodeString token; // Token most recently scanned. 172 tokenType type; 173 tokenType prevType; 174 175 // The items currently being parsed & built. 176 // Note: currentChain may not be the last RuleChain in the 177 // list because the "other" chain is forced to the end. 178 AndConstraint *curAndConstraint; 179 RuleChain *currentChain; 180 181 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 182 int32_t rangeHiIdx; // low and hi values currently being parsed. 183 184 enum EParseState { 185 kKeyword, 186 kExpr, 187 kValue, 188 kRangeList, 189 kSamples 190 }; 191 }; 192 193 enum PluralOperand { 194 /** 195 * The double value of the entire number. 196 */ 197 PLURAL_OPERAND_N, 198 199 /** 200 * The integer value, with the fraction digits truncated off. 201 */ 202 PLURAL_OPERAND_I, 203 204 /** 205 * All visible fraction digits as an integer, including trailing zeros. 206 */ 207 PLURAL_OPERAND_F, 208 209 /** 210 * Visible fraction digits as an integer, not including trailing zeros. 211 */ 212 PLURAL_OPERAND_T, 213 214 /** 215 * Number of visible fraction digits. 216 */ 217 PLURAL_OPERAND_V, 218 219 /** 220 * Number of visible fraction digits, not including trailing zeros. 221 */ 222 PLURAL_OPERAND_W, 223 224 /** 225 * Suppressed exponent for compact notation (exponent needed in 226 * scientific notation with compact notation to approximate i). 227 */ 228 PLURAL_OPERAND_E, 229 230 /** 231 * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. 232 * 233 * <p>Returns the integer value, but will fail if the number has fraction digits. 234 * That is, using "j" instead of "i" is like implicitly adding "v is 0". 235 * 236 * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches 237 * "3" but not "3.1" or "3.0". 238 */ 239 PLURAL_OPERAND_J 240 }; 241 242 /** 243 * Converts from the tokenType enum to PluralOperand. Asserts that the given 244 * tokenType can be mapped to a PluralOperand. 245 */ 246 PluralOperand tokenTypeToPluralOperand(tokenType tt); 247 248 /** 249 * An interface to FixedDecimal, allowing for other implementations. 250 * @internal 251 */ 252 class U_I18N_API IFixedDecimal { 253 public: 254 virtual ~IFixedDecimal(); 255 256 /** 257 * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). 258 * If the operand is 'n', returns a double; otherwise, returns an integer. 259 */ 260 virtual double getPluralOperand(PluralOperand operand) const = 0; 261 262 virtual bool isNaN() const = 0; 263 264 virtual bool isInfinite() const = 0; 265 266 /** Whether the number has no nonzero fraction digits. */ 267 virtual bool hasIntegerValue() const = 0; 268 }; 269 270 /** 271 * class FixedDecimal serves to communicate the properties 272 * of a formatted number from a decimal formatter to PluralRules::select() 273 * 274 * see DecimalFormat::getFixedDecimal() 275 * @internal 276 */ 277 class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { 278 public: 279 /** 280 * @param n the number, e.g. 12.345 281 * @param v The number of visible fraction digits, e.g. 3 282 * @param f The fraction digits, e.g. 345 283 * @param e The exponent, e.g. 7 in 1.2e7 (for compact/scientific) 284 */ 285 FixedDecimal(double n, int32_t v, int64_t f, int32_t e); 286 FixedDecimal(double n, int32_t v, int64_t f); 287 FixedDecimal(double n, int32_t); 288 explicit FixedDecimal(double n); 289 FixedDecimal(); 290 ~FixedDecimal() U_OVERRIDE; 291 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 292 FixedDecimal(const FixedDecimal &other); 293 294 static FixedDecimal createWithExponent(double n, int32_t v, int32_t e); 295 296 double getPluralOperand(PluralOperand operand) const U_OVERRIDE; 297 bool isNaN() const U_OVERRIDE; 298 bool isInfinite() const U_OVERRIDE; 299 bool hasIntegerValue() const U_OVERRIDE; 300 301 bool isNanOrInfinity() const; // used in decimfmtimpl.cpp 302 303 int32_t getVisibleFractionDigitCount() const; 304 305 void init(double n, int32_t v, int64_t f, int32_t e); 306 void init(double n, int32_t v, int64_t f); 307 void init(double n); 308 UBool quickInit(double n); // Try a fast-path only initialization, 309 // return true if successful. 310 void adjustForMinFractionDigits(int32_t min); 311 static int64_t getFractionalDigits(double n, int32_t v); 312 static int32_t decimals(double n); 313 314 bool operator==(const FixedDecimal &other) const; 315 316 UnicodeString toString() const; 317 318 double source; 319 int32_t visibleDecimalDigitCount; 320 int64_t decimalDigits; 321 int64_t decimalDigitsWithoutTrailingZeros; 322 int64_t intValue; 323 int32_t exponent; 324 UBool _hasIntegerValue; 325 UBool isNegative; 326 UBool _isNaN; 327 UBool _isInfinite; 328 }; 329 330 class AndConstraint : public UMemory { 331 public: 332 typedef enum RuleOp { 333 NONE, 334 MOD 335 } RuleOp; 336 RuleOp op = AndConstraint::NONE; 337 int32_t opNum = -1; // for mod expressions, the right operand of the mod. 338 int32_t value = -1; // valid for 'is' rules only. 339 UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. 340 UBool negated = false; // true for negated rules. 341 UBool integerOnly = false; // true for 'within' rules. 342 tokenType digitsType = none; // n | i | v | f constraint. 343 AndConstraint *next = nullptr; 344 // Internal error status, used for errors that occur during the copy constructor. 345 UErrorCode fInternalStatus = U_ZERO_ERROR; 346 347 AndConstraint() = default; 348 AndConstraint(const AndConstraint& other); 349 virtual ~AndConstraint(); 350 AndConstraint* add(UErrorCode& status); 351 // UBool isFulfilled(double number); 352 UBool isFulfilled(const IFixedDecimal &number); 353 }; 354 355 class OrConstraint : public UMemory { 356 public: 357 AndConstraint *childNode = nullptr; 358 OrConstraint *next = nullptr; 359 // Internal error status, used for errors that occur during the copy constructor. 360 UErrorCode fInternalStatus = U_ZERO_ERROR; 361 362 OrConstraint() = default; 363 OrConstraint(const OrConstraint& other); 364 virtual ~OrConstraint(); 365 AndConstraint* add(UErrorCode& status); 366 // UBool isFulfilled(double number); 367 UBool isFulfilled(const IFixedDecimal &number); 368 }; 369 370 class RuleChain : public UMemory { 371 public: 372 UnicodeString fKeyword; 373 RuleChain *fNext = nullptr; 374 OrConstraint *ruleHeader = nullptr; 375 UnicodeString fDecimalSamples; // Samples strings from rule source 376 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 377 UBool fDecimalSamplesUnbounded = false; 378 UBool fIntegerSamplesUnbounded = false; 379 // Internal error status, used for errors that occur during the copy constructor. 380 UErrorCode fInternalStatus = U_ZERO_ERROR; 381 382 RuleChain() = default; 383 RuleChain(const RuleChain& other); 384 virtual ~RuleChain(); 385 386 UnicodeString select(const IFixedDecimal &number) const; 387 void dumpRules(UnicodeString& result); 388 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 389 UBool isKeyword(const UnicodeString& keyword) const; 390 }; 391 392 class PluralKeywordEnumeration : public StringEnumeration { 393 public: 394 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 395 virtual ~PluralKeywordEnumeration(); 396 static UClassID U_EXPORT2 getStaticClassID(void); 397 virtual UClassID getDynamicClassID(void) const; 398 virtual const UnicodeString* snext(UErrorCode& status); 399 virtual void reset(UErrorCode& status); 400 virtual int32_t count(UErrorCode& status) const; 401 private: 402 int32_t pos; 403 UVector fKeywordNames; 404 }; 405 406 407 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 408 public: 409 PluralAvailableLocalesEnumeration(UErrorCode &status); 410 virtual ~PluralAvailableLocalesEnumeration(); 411 virtual const char* next(int32_t *resultLength, UErrorCode& status); 412 virtual void reset(UErrorCode& status); 413 virtual int32_t count(UErrorCode& status) const; 414 private: 415 UErrorCode fOpenStatus; 416 UResourceBundle *fLocales = nullptr; 417 UResourceBundle *fRes = nullptr; 418 }; 419 420 U_NAMESPACE_END 421 422 #endif /* #if !UCONFIG_NO_FORMATTING */ 423 424 #endif // _PLURRULE_IMPL 425 //eof 426