1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2007-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 * 9 * File PLURRULE_IMPL.H 10 * 11 ******************************************************************************* 12 */ 13 14 15 #ifndef PLURRULE_IMPL 16 #define PLURRULE_IMPL 17 18 // Internal definitions for the PluralRules implementation. 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_FORMATTING 23 24 #include "unicode/format.h" 25 #include "unicode/locid.h" 26 #include "unicode/parseerr.h" 27 #include "unicode/strenum.h" 28 #include "unicode/ures.h" 29 #include "uvector.h" 30 #include "hash.h" 31 #include "uassert.h" 32 33 class PluralRulesTest; 34 35 U_NAMESPACE_BEGIN 36 37 class AndConstraint; 38 class RuleChain; 39 class DigitInterval; 40 class PluralRules; 41 class VisibleDigits; 42 43 namespace pluralimpl { 44 45 // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility. 46 47 static const UChar DOT = ((UChar) 0x002E); 48 static const UChar SINGLE_QUOTE = ((UChar) 0x0027); 49 static const UChar SLASH = ((UChar) 0x002F); 50 static const UChar BACKSLASH = ((UChar) 0x005C); 51 static const UChar SPACE = ((UChar) 0x0020); 52 static const UChar EXCLAMATION = ((UChar) 0x0021); 53 static const UChar QUOTATION_MARK = ((UChar) 0x0022); 54 static const UChar NUMBER_SIGN = ((UChar) 0x0023); 55 static const UChar PERCENT_SIGN = ((UChar) 0x0025); 56 static const UChar ASTERISK = ((UChar) 0x002A); 57 static const UChar COMMA = ((UChar) 0x002C); 58 static const UChar HYPHEN = ((UChar) 0x002D); 59 static const UChar U_ZERO = ((UChar) 0x0030); 60 static const UChar U_ONE = ((UChar) 0x0031); 61 static const UChar U_TWO = ((UChar) 0x0032); 62 static const UChar U_THREE = ((UChar) 0x0033); 63 static const UChar U_FOUR = ((UChar) 0x0034); 64 static const UChar U_FIVE = ((UChar) 0x0035); 65 static const UChar U_SIX = ((UChar) 0x0036); 66 static const UChar U_SEVEN = ((UChar) 0x0037); 67 static const UChar U_EIGHT = ((UChar) 0x0038); 68 static const UChar U_NINE = ((UChar) 0x0039); 69 static const UChar COLON = ((UChar) 0x003A); 70 static const UChar SEMI_COLON = ((UChar) 0x003B); 71 static const UChar EQUALS = ((UChar) 0x003D); 72 static const UChar AT = ((UChar) 0x0040); 73 static const UChar CAP_A = ((UChar) 0x0041); 74 static const UChar CAP_B = ((UChar) 0x0042); 75 static const UChar CAP_R = ((UChar) 0x0052); 76 static const UChar CAP_Z = ((UChar) 0x005A); 77 static const UChar LOWLINE = ((UChar) 0x005F); 78 static const UChar LEFTBRACE = ((UChar) 0x007B); 79 static const UChar RIGHTBRACE = ((UChar) 0x007D); 80 static const UChar TILDE = ((UChar) 0x007E); 81 static const UChar ELLIPSIS = ((UChar) 0x2026); 82 83 static const UChar LOW_A = ((UChar) 0x0061); 84 static const UChar LOW_B = ((UChar) 0x0062); 85 static const UChar LOW_C = ((UChar) 0x0063); 86 static const UChar LOW_D = ((UChar) 0x0064); 87 static const UChar LOW_E = ((UChar) 0x0065); 88 static const UChar LOW_F = ((UChar) 0x0066); 89 static const UChar LOW_G = ((UChar) 0x0067); 90 static const UChar LOW_H = ((UChar) 0x0068); 91 static const UChar LOW_I = ((UChar) 0x0069); 92 static const UChar LOW_J = ((UChar) 0x006a); 93 static const UChar LOW_K = ((UChar) 0x006B); 94 static const UChar LOW_L = ((UChar) 0x006C); 95 static const UChar LOW_M = ((UChar) 0x006D); 96 static const UChar LOW_N = ((UChar) 0x006E); 97 static const UChar LOW_O = ((UChar) 0x006F); 98 static const UChar LOW_P = ((UChar) 0x0070); 99 static const UChar LOW_Q = ((UChar) 0x0071); 100 static const UChar LOW_R = ((UChar) 0x0072); 101 static const UChar LOW_S = ((UChar) 0x0073); 102 static const UChar LOW_T = ((UChar) 0x0074); 103 static const UChar LOW_U = ((UChar) 0x0075); 104 static const UChar LOW_V = ((UChar) 0x0076); 105 static const UChar LOW_W = ((UChar) 0x0077); 106 static const UChar LOW_Y = ((UChar) 0x0079); 107 static const UChar LOW_Z = ((UChar) 0x007A); 108 109 } 110 111 112 static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff; 113 114 enum tokenType { 115 none, 116 tNumber, 117 tComma, 118 tSemiColon, 119 tSpace, 120 tColon, 121 tAt, // '@' 122 tDot, 123 tDot2, 124 tEllipsis, 125 tKeyword, 126 tAnd, 127 tOr, 128 tMod, // 'mod' or '%' 129 tNot, // 'not' only. 130 tIn, // 'in' only. 131 tEqual, // '=' only. 132 tNotEqual, // '!=' 133 tTilde, 134 tWithin, 135 tIs, 136 tVariableN, 137 tVariableI, 138 tVariableF, 139 tVariableV, 140 tVariableT, 141 tDecimal, 142 tInteger, 143 tEOF 144 }; 145 146 147 class PluralRuleParser: public UMemory { 148 public: 149 PluralRuleParser(); 150 virtual ~PluralRuleParser(); 151 152 void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status); 153 void getNextToken(UErrorCode &status); 154 void checkSyntax(UErrorCode &status); 155 static int32_t getNumberValue(const UnicodeString &token); 156 157 private: 158 static tokenType getKeyType(const UnicodeString& token, tokenType type); 159 static tokenType charType(UChar ch); 160 static UBool isValidKeyword(const UnicodeString& token); 161 162 const UnicodeString *ruleSrc; // The rules string. 163 int32_t ruleIndex; // String index in the input rules, the current parse position. 164 UnicodeString token; // Token most recently scanned. 165 tokenType type; 166 tokenType prevType; 167 168 // The items currently being parsed & built. 169 // Note: currentChain may not be the last RuleChain in the 170 // list because the "other" chain is forced to the end. 171 AndConstraint *curAndConstraint; 172 RuleChain *currentChain; 173 174 int32_t rangeLowIdx; // Indices in the UVector of ranges of the 175 int32_t rangeHiIdx; // low and hi values currently being parsed. 176 177 enum EParseState { 178 kKeyword, 179 kExpr, 180 kValue, 181 kRangeList, 182 kSamples 183 }; 184 }; 185 186 enum PluralOperand { 187 /** 188 * The double value of the entire number. 189 */ 190 PLURAL_OPERAND_N, 191 192 /** 193 * The integer value, with the fraction digits truncated off. 194 */ 195 PLURAL_OPERAND_I, 196 197 /** 198 * All visible fraction digits as an integer, including trailing zeros. 199 */ 200 PLURAL_OPERAND_F, 201 202 /** 203 * Visible fraction digits as an integer, not including trailing zeros. 204 */ 205 PLURAL_OPERAND_T, 206 207 /** 208 * Number of visible fraction digits. 209 */ 210 PLURAL_OPERAND_V, 211 212 /** 213 * Number of visible fraction digits, not including trailing zeros. 214 */ 215 PLURAL_OPERAND_W, 216 217 /** 218 * Suppressed exponent for compact notation (exponent needed in 219 * scientific notation with compact notation to approximate i). 220 */ 221 PLURAL_OPERAND_E, 222 223 /** 224 * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC. 225 * 226 * <p>Returns the integer value, but will fail if the number has fraction digits. 227 * That is, using "j" instead of "i" is like implicitly adding "v is 0". 228 * 229 * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches 230 * "3" but not "3.1" or "3.0". 231 */ 232 PLURAL_OPERAND_J 233 }; 234 235 /** 236 * Converts from the tokenType enum to PluralOperand. Asserts that the given 237 * tokenType can be mapped to a PluralOperand. 238 */ 239 PluralOperand tokenTypeToPluralOperand(tokenType tt); 240 241 /** 242 * An interface to FixedDecimal, allowing for other implementations. 243 * @internal 244 */ 245 class U_I18N_API IFixedDecimal { 246 public: 247 virtual ~IFixedDecimal(); 248 249 /** 250 * Returns the value corresponding to the specified operand (n, i, f, t, v, or w). 251 * If the operand is 'n', returns a double; otherwise, returns an integer. 252 */ 253 virtual double getPluralOperand(PluralOperand operand) const = 0; 254 255 virtual bool isNaN() const = 0; 256 257 virtual bool isInfinite() const = 0; 258 259 /** Whether the number has no nonzero fraction digits. */ 260 virtual bool hasIntegerValue() const = 0; 261 }; 262 263 /** 264 * class FixedDecimal serves to communicate the properties 265 * of a formatted number from a decimal formatter to PluralRules::select() 266 * 267 * see DecimalFormat::getFixedDecimal() 268 * @internal 269 */ 270 class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject { 271 public: 272 /** 273 * @param n the number, e.g. 12.345 274 * @param v The number of visible fraction digits, e.g. 3 275 * @param f The fraction digits, e.g. 345 276 */ 277 FixedDecimal(double n, int32_t v, int64_t f); 278 FixedDecimal(double n, int32_t); 279 explicit FixedDecimal(double n); 280 FixedDecimal(); 281 ~FixedDecimal() U_OVERRIDE; 282 FixedDecimal(const UnicodeString &s, UErrorCode &ec); 283 FixedDecimal(const FixedDecimal &other); 284 285 double getPluralOperand(PluralOperand operand) const U_OVERRIDE; 286 bool isNaN() const U_OVERRIDE; 287 bool isInfinite() const U_OVERRIDE; 288 bool hasIntegerValue() const U_OVERRIDE; 289 290 bool isNanOrInfinity() const; // used in decimfmtimpl.cpp 291 292 int32_t getVisibleFractionDigitCount() const; 293 294 void init(double n, int32_t v, int64_t f); 295 void init(double n); 296 UBool quickInit(double n); // Try a fast-path only initialization, 297 // return TRUE if successful. 298 void adjustForMinFractionDigits(int32_t min); 299 static int64_t getFractionalDigits(double n, int32_t v); 300 static int32_t decimals(double n); 301 302 double source; 303 int32_t visibleDecimalDigitCount; 304 int64_t decimalDigits; 305 int64_t decimalDigitsWithoutTrailingZeros; 306 int64_t intValue; 307 UBool _hasIntegerValue; 308 UBool isNegative; 309 UBool _isNaN; 310 UBool _isInfinite; 311 }; 312 313 class AndConstraint : public UMemory { 314 public: 315 typedef enum RuleOp { 316 NONE, 317 MOD 318 } RuleOp; 319 RuleOp op = AndConstraint::NONE; 320 int32_t opNum = -1; // for mod expressions, the right operand of the mod. 321 int32_t value = -1; // valid for 'is' rules only. 322 UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise. 323 UBool negated = FALSE; // TRUE for negated rules. 324 UBool integerOnly = FALSE; // TRUE for 'within' rules. 325 tokenType digitsType = none; // n | i | v | f constraint. 326 AndConstraint *next = nullptr; 327 // Internal error status, used for errors that occur during the copy constructor. 328 UErrorCode fInternalStatus = U_ZERO_ERROR; 329 330 AndConstraint() = default; 331 AndConstraint(const AndConstraint& other); 332 virtual ~AndConstraint(); 333 AndConstraint* add(UErrorCode& status); 334 // UBool isFulfilled(double number); 335 UBool isFulfilled(const IFixedDecimal &number); 336 }; 337 338 class OrConstraint : public UMemory { 339 public: 340 AndConstraint *childNode = nullptr; 341 OrConstraint *next = nullptr; 342 // Internal error status, used for errors that occur during the copy constructor. 343 UErrorCode fInternalStatus = U_ZERO_ERROR; 344 345 OrConstraint() = default; 346 OrConstraint(const OrConstraint& other); 347 virtual ~OrConstraint(); 348 AndConstraint* add(UErrorCode& status); 349 // UBool isFulfilled(double number); 350 UBool isFulfilled(const IFixedDecimal &number); 351 }; 352 353 class RuleChain : public UMemory { 354 public: 355 UnicodeString fKeyword; 356 RuleChain *fNext = nullptr; 357 OrConstraint *ruleHeader = nullptr; 358 UnicodeString fDecimalSamples; // Samples strings from rule source 359 UnicodeString fIntegerSamples; // without @decimal or @integer, otherwise unprocessed. 360 UBool fDecimalSamplesUnbounded = FALSE; 361 UBool fIntegerSamplesUnbounded = FALSE; 362 // Internal error status, used for errors that occur during the copy constructor. 363 UErrorCode fInternalStatus = U_ZERO_ERROR; 364 365 RuleChain() = default; 366 RuleChain(const RuleChain& other); 367 virtual ~RuleChain(); 368 369 UnicodeString select(const IFixedDecimal &number) const; 370 void dumpRules(UnicodeString& result); 371 UErrorCode getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const; 372 UBool isKeyword(const UnicodeString& keyword) const; 373 }; 374 375 class PluralKeywordEnumeration : public StringEnumeration { 376 public: 377 PluralKeywordEnumeration(RuleChain *header, UErrorCode& status); 378 virtual ~PluralKeywordEnumeration(); 379 static UClassID U_EXPORT2 getStaticClassID(void); 380 virtual UClassID getDynamicClassID(void) const; 381 virtual const UnicodeString* snext(UErrorCode& status); 382 virtual void reset(UErrorCode& status); 383 virtual int32_t count(UErrorCode& status) const; 384 private: 385 int32_t pos; 386 UVector fKeywordNames; 387 }; 388 389 390 class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration { 391 public: 392 PluralAvailableLocalesEnumeration(UErrorCode &status); 393 virtual ~PluralAvailableLocalesEnumeration(); 394 virtual const char* next(int32_t *resultLength, UErrorCode& status); 395 virtual void reset(UErrorCode& status); 396 virtual int32_t count(UErrorCode& status) const; 397 private: 398 UErrorCode fOpenStatus; 399 UResourceBundle *fLocales = nullptr; 400 UResourceBundle *fRes = nullptr; 401 }; 402 403 U_NAMESPACE_END 404 405 #endif /* #if !UCONFIG_NO_FORMATTING */ 406 407 #endif // _PLURRULE_IMPL 408 //eof 409