• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // © 2016 and later: Unicode, Inc. and others.
2  // License & terms of use: http://www.unicode.org/copyright.html
3  /*
4  *******************************************************************************
5  * Copyright (C) 2007-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  *
9  * File PLURRULE_IMPL.H
10  *
11  *******************************************************************************
12  */
13  
14  
15  #ifndef PLURRULE_IMPL
16  #define PLURRULE_IMPL
17  
18  // Internal definitions for the PluralRules implementation.
19  
20  #include "unicode/utypes.h"
21  
22  #if !UCONFIG_NO_FORMATTING
23  
24  #include "unicode/format.h"
25  #include "unicode/locid.h"
26  #include "unicode/parseerr.h"
27  #include "unicode/strenum.h"
28  #include "unicode/ures.h"
29  #include "uvector.h"
30  #include "hash.h"
31  #include "uassert.h"
32  
33  class PluralRulesTest;
34  
35  U_NAMESPACE_BEGIN
36  
37  class AndConstraint;
38  class RuleChain;
39  class DigitInterval;
40  class PluralRules;
41  class VisibleDigits;
42  
43  namespace pluralimpl {
44  
45  // TODO: Remove this and replace with u"" literals. Was for EBCDIC compatibility.
46  
47  static const UChar DOT = ((UChar) 0x002E);
48  static const UChar SINGLE_QUOTE = ((UChar) 0x0027);
49  static const UChar SLASH = ((UChar) 0x002F);
50  static const UChar BACKSLASH = ((UChar) 0x005C);
51  static const UChar SPACE = ((UChar) 0x0020);
52  static const UChar EXCLAMATION = ((UChar) 0x0021);
53  static const UChar QUOTATION_MARK = ((UChar) 0x0022);
54  static const UChar NUMBER_SIGN = ((UChar) 0x0023);
55  static const UChar PERCENT_SIGN = ((UChar) 0x0025);
56  static const UChar ASTERISK = ((UChar) 0x002A);
57  static const UChar COMMA = ((UChar) 0x002C);
58  static const UChar HYPHEN = ((UChar) 0x002D);
59  static const UChar U_ZERO = ((UChar) 0x0030);
60  static const UChar U_ONE = ((UChar) 0x0031);
61  static const UChar U_TWO = ((UChar) 0x0032);
62  static const UChar U_THREE = ((UChar) 0x0033);
63  static const UChar U_FOUR = ((UChar) 0x0034);
64  static const UChar U_FIVE = ((UChar) 0x0035);
65  static const UChar U_SIX = ((UChar) 0x0036);
66  static const UChar U_SEVEN = ((UChar) 0x0037);
67  static const UChar U_EIGHT = ((UChar) 0x0038);
68  static const UChar U_NINE = ((UChar) 0x0039);
69  static const UChar COLON = ((UChar) 0x003A);
70  static const UChar SEMI_COLON = ((UChar) 0x003B);
71  static const UChar EQUALS = ((UChar) 0x003D);
72  static const UChar AT = ((UChar) 0x0040);
73  static const UChar CAP_A = ((UChar) 0x0041);
74  static const UChar CAP_B = ((UChar) 0x0042);
75  static const UChar CAP_R = ((UChar) 0x0052);
76  static const UChar CAP_Z = ((UChar) 0x005A);
77  static const UChar LOWLINE = ((UChar) 0x005F);
78  static const UChar LEFTBRACE = ((UChar) 0x007B);
79  static const UChar RIGHTBRACE = ((UChar) 0x007D);
80  static const UChar TILDE = ((UChar) 0x007E);
81  static const UChar ELLIPSIS = ((UChar) 0x2026);
82  
83  static const UChar LOW_A = ((UChar) 0x0061);
84  static const UChar LOW_B = ((UChar) 0x0062);
85  static const UChar LOW_C = ((UChar) 0x0063);
86  static const UChar LOW_D = ((UChar) 0x0064);
87  static const UChar LOW_E = ((UChar) 0x0065);
88  static const UChar LOW_F = ((UChar) 0x0066);
89  static const UChar LOW_G = ((UChar) 0x0067);
90  static const UChar LOW_H = ((UChar) 0x0068);
91  static const UChar LOW_I = ((UChar) 0x0069);
92  static const UChar LOW_J = ((UChar) 0x006a);
93  static const UChar LOW_K = ((UChar) 0x006B);
94  static const UChar LOW_L = ((UChar) 0x006C);
95  static const UChar LOW_M = ((UChar) 0x006D);
96  static const UChar LOW_N = ((UChar) 0x006E);
97  static const UChar LOW_O = ((UChar) 0x006F);
98  static const UChar LOW_P = ((UChar) 0x0070);
99  static const UChar LOW_Q = ((UChar) 0x0071);
100  static const UChar LOW_R = ((UChar) 0x0072);
101  static const UChar LOW_S = ((UChar) 0x0073);
102  static const UChar LOW_T = ((UChar) 0x0074);
103  static const UChar LOW_U = ((UChar) 0x0075);
104  static const UChar LOW_V = ((UChar) 0x0076);
105  static const UChar LOW_W = ((UChar) 0x0077);
106  static const UChar LOW_Y = ((UChar) 0x0079);
107  static const UChar LOW_Z = ((UChar) 0x007A);
108  
109  }
110  
111  
112  static const int32_t PLURAL_RANGE_HIGH = 0x7fffffff;
113  
114  enum tokenType {
115    none,
116    tNumber,
117    tComma,
118    tSemiColon,
119    tSpace,
120    tColon,
121    tAt,           // '@'
122    tDot,
123    tDot2,
124    tEllipsis,
125    tKeyword,
126    tAnd,
127    tOr,
128    tMod,          // 'mod' or '%'
129    tNot,          //  'not' only.
130    tIn,           //  'in'  only.
131    tEqual,        //  '='   only.
132    tNotEqual,     //  '!='
133    tTilde,
134    tWithin,
135    tIs,
136    tVariableN,
137    tVariableI,
138    tVariableF,
139    tVariableV,
140    tVariableT,
141    tDecimal,
142    tInteger,
143    tEOF
144  };
145  
146  
147  class PluralRuleParser: public UMemory {
148  public:
149      PluralRuleParser();
150      virtual ~PluralRuleParser();
151  
152      void parse(const UnicodeString &rules, PluralRules *dest, UErrorCode &status);
153      void getNextToken(UErrorCode &status);
154      void checkSyntax(UErrorCode &status);
155      static int32_t getNumberValue(const UnicodeString &token);
156  
157  private:
158      static tokenType getKeyType(const UnicodeString& token, tokenType type);
159      static tokenType charType(UChar ch);
160      static UBool isValidKeyword(const UnicodeString& token);
161  
162      const UnicodeString  *ruleSrc;  // The rules string.
163      int32_t        ruleIndex;       // String index in the input rules, the current parse position.
164      UnicodeString  token;           // Token most recently scanned.
165      tokenType      type;
166      tokenType      prevType;
167  
168                                      // The items currently being parsed & built.
169                                      // Note: currentChain may not be the last RuleChain in the
170                                      //       list because the "other" chain is forced to the end.
171      AndConstraint *curAndConstraint;
172      RuleChain     *currentChain;
173  
174      int32_t        rangeLowIdx;     // Indices in the UVector of ranges of the
175      int32_t        rangeHiIdx;      //    low and hi values currently being parsed.
176  
177      enum EParseState {
178         kKeyword,
179         kExpr,
180         kValue,
181         kRangeList,
182         kSamples
183      };
184  };
185  
186  enum PluralOperand {
187      /**
188      * The double value of the entire number.
189      */
190      PLURAL_OPERAND_N,
191  
192      /**
193       * The integer value, with the fraction digits truncated off.
194       */
195      PLURAL_OPERAND_I,
196  
197      /**
198       * All visible fraction digits as an integer, including trailing zeros.
199       */
200      PLURAL_OPERAND_F,
201  
202      /**
203       * Visible fraction digits as an integer, not including trailing zeros.
204       */
205      PLURAL_OPERAND_T,
206  
207      /**
208       * Number of visible fraction digits.
209       */
210      PLURAL_OPERAND_V,
211  
212      /**
213       * Number of visible fraction digits, not including trailing zeros.
214       */
215      PLURAL_OPERAND_W,
216  
217      /**
218       * THIS OPERAND IS DEPRECATED AND HAS BEEN REMOVED FROM THE SPEC.
219       *
220       * <p>Returns the integer value, but will fail if the number has fraction digits.
221       * That is, using "j" instead of "i" is like implicitly adding "v is 0".
222       *
223       * <p>For example, "j is 3" is equivalent to "i is 3 and v is 0": it matches
224       * "3" but not "3.1" or "3.0".
225       */
226      PLURAL_OPERAND_J
227  };
228  
229  /**
230   * Converts from the tokenType enum to PluralOperand. Asserts that the given
231   * tokenType can be mapped to a PluralOperand.
232   */
233  PluralOperand tokenTypeToPluralOperand(tokenType tt);
234  
235  /**
236   * An interface to FixedDecimal, allowing for other implementations.
237   * @internal
238   */
239  class U_I18N_API IFixedDecimal {
240    public:
241      virtual ~IFixedDecimal();
242  
243      /**
244       * Returns the value corresponding to the specified operand (n, i, f, t, v, or w).
245       * If the operand is 'n', returns a double; otherwise, returns an integer.
246       */
247      virtual double getPluralOperand(PluralOperand operand) const = 0;
248  
249      virtual bool isNaN() const = 0;
250  
251      virtual bool isInfinite() const = 0;
252  
253      /** Whether the number has no nonzero fraction digits. */
254      virtual bool hasIntegerValue() const = 0;
255  };
256  
257  /**
258   * class FixedDecimal serves to communicate the properties
259   * of a formatted number from a decimal formatter to PluralRules::select()
260   *
261   * see DecimalFormat::getFixedDecimal()
262   * @internal
263   */
264  class U_I18N_API FixedDecimal: public IFixedDecimal, public UObject {
265    public:
266      /**
267        * @param n   the number, e.g. 12.345
268        * @param v   The number of visible fraction digits, e.g. 3
269        * @param f   The fraction digits, e.g. 345
270        */
271      FixedDecimal(double  n, int32_t v, int64_t f);
272      FixedDecimal(double n, int32_t);
273      explicit FixedDecimal(double n);
274      FixedDecimal();
275      ~FixedDecimal() U_OVERRIDE;
276      FixedDecimal(const UnicodeString &s, UErrorCode &ec);
277      FixedDecimal(const FixedDecimal &other);
278  
279      double getPluralOperand(PluralOperand operand) const U_OVERRIDE;
280      bool isNaN() const U_OVERRIDE;
281      bool isInfinite() const U_OVERRIDE;
282      bool hasIntegerValue() const U_OVERRIDE;
283  
284      bool isNanOrInfinity() const;  // used in decimfmtimpl.cpp
285  
286      int32_t getVisibleFractionDigitCount() const;
287  
288      void init(double n, int32_t v, int64_t f);
289      void init(double n);
290      UBool quickInit(double n);  // Try a fast-path only initialization,
291                                  //    return TRUE if successful.
292      void adjustForMinFractionDigits(int32_t min);
293      static int64_t getFractionalDigits(double n, int32_t v);
294      static int32_t decimals(double n);
295  
296      double      source;
297      int32_t     visibleDecimalDigitCount;
298      int64_t     decimalDigits;
299      int64_t     decimalDigitsWithoutTrailingZeros;
300      int64_t     intValue;
301      UBool       _hasIntegerValue;
302      UBool       isNegative;
303      UBool       _isNaN;
304      UBool       _isInfinite;
305  };
306  
307  class AndConstraint : public UMemory  {
308  public:
309      typedef enum RuleOp {
310          NONE,
311          MOD
312      } RuleOp;
313      RuleOp op = AndConstraint::NONE;
314      int32_t opNum = -1;             // for mod expressions, the right operand of the mod.
315      int32_t value = -1;             // valid for 'is' rules only.
316      UVector32 *rangeList = nullptr; // for 'in', 'within' rules. Null otherwise.
317      UBool negated = FALSE;          // TRUE for negated rules.
318      UBool integerOnly = FALSE;      // TRUE for 'within' rules.
319      tokenType digitsType = none;    // n | i | v | f constraint.
320      AndConstraint *next = nullptr;
321      // Internal error status, used for errors that occur during the copy constructor.
322      UErrorCode fInternalStatus = U_ZERO_ERROR;
323  
324      AndConstraint() = default;
325      AndConstraint(const AndConstraint& other);
326      virtual ~AndConstraint();
327      AndConstraint* add(UErrorCode& status);
328      // UBool isFulfilled(double number);
329      UBool isFulfilled(const IFixedDecimal &number);
330  };
331  
332  class OrConstraint : public UMemory  {
333  public:
334      AndConstraint *childNode = nullptr;
335      OrConstraint *next = nullptr;
336      // Internal error status, used for errors that occur during the copy constructor.
337      UErrorCode fInternalStatus = U_ZERO_ERROR;
338  
339      OrConstraint() = default;
340      OrConstraint(const OrConstraint& other);
341      virtual ~OrConstraint();
342      AndConstraint* add(UErrorCode& status);
343      // UBool isFulfilled(double number);
344      UBool isFulfilled(const IFixedDecimal &number);
345  };
346  
347  class RuleChain : public UMemory  {
348  public:
349      UnicodeString   fKeyword;
350      RuleChain      *fNext = nullptr;
351      OrConstraint   *ruleHeader = nullptr;
352      UnicodeString   fDecimalSamples;  // Samples strings from rule source
353      UnicodeString   fIntegerSamples;  //   without @decimal or @integer, otherwise unprocessed.
354      UBool           fDecimalSamplesUnbounded = FALSE;
355      UBool           fIntegerSamplesUnbounded = FALSE;
356      // Internal error status, used for errors that occur during the copy constructor.
357      UErrorCode      fInternalStatus = U_ZERO_ERROR;
358  
359      RuleChain() = default;
360      RuleChain(const RuleChain& other);
361      virtual ~RuleChain();
362  
363      UnicodeString select(const IFixedDecimal &number) const;
364      void          dumpRules(UnicodeString& result);
365      UErrorCode    getKeywords(int32_t maxArraySize, UnicodeString *keywords, int32_t& arraySize) const;
366      UBool         isKeyword(const UnicodeString& keyword) const;
367  };
368  
369  class PluralKeywordEnumeration : public StringEnumeration {
370  public:
371      PluralKeywordEnumeration(RuleChain *header, UErrorCode& status);
372      virtual ~PluralKeywordEnumeration();
373      static UClassID U_EXPORT2 getStaticClassID(void);
374      virtual UClassID getDynamicClassID(void) const;
375      virtual const UnicodeString* snext(UErrorCode& status);
376      virtual void reset(UErrorCode& status);
377      virtual int32_t count(UErrorCode& status) const;
378  private:
379      int32_t         pos;
380      UVector         fKeywordNames;
381  };
382  
383  
384  class U_I18N_API PluralAvailableLocalesEnumeration: public StringEnumeration {
385    public:
386      PluralAvailableLocalesEnumeration(UErrorCode &status);
387      virtual ~PluralAvailableLocalesEnumeration();
388      virtual const char* next(int32_t *resultLength, UErrorCode& status);
389      virtual void reset(UErrorCode& status);
390      virtual int32_t count(UErrorCode& status) const;
391    private:
392      UErrorCode      fOpenStatus;
393      UResourceBundle *fLocales = nullptr;
394      UResourceBundle *fRes = nullptr;
395  };
396  
397  U_NAMESPACE_END
398  
399  #endif /* #if !UCONFIG_NO_FORMATTING */
400  
401  #endif // _PLURRULE_IMPL
402  //eof
403