1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * affixpatternparser.h
9 *
10 * created on: 2015jan06
11 * created by: Travis Keep
12 */
13
14 #ifndef __AFFIX_PATTERN_PARSER_H__
15 #define __AFFIX_PATTERN_PARSER_H__
16
17 #include "unicode/utypes.h"
18
19 #if !UCONFIG_NO_FORMATTING
20
21 #include "unicode/unistr.h"
22 #include "unicode/uobject.h"
23 #include "pluralaffix.h"
24
25 U_NAMESPACE_BEGIN
26
27 class PluralRules;
28 class FixedPrecision;
29 class DecimalFormatSymbols;
30
31 /**
32 * A representation of the various forms of a particular currency according
33 * to some locale and usage context.
34 *
35 * Includes the symbol, ISO code form, and long form(s) of the currency name
36 * for each plural variation.
37 */
38 class U_I18N_API CurrencyAffixInfo : public UMemory {
39 public:
40 /**
41 * Symbol is \u00a4; ISO form is \u00a4\u00a4;
42 * long form is \u00a4\u00a4\u00a4.
43 */
44 CurrencyAffixInfo();
45
getSymbol()46 const UnicodeString &getSymbol() const { return fSymbol; }
getISO()47 const UnicodeString &getISO() const { return fISO; }
getLong()48 const PluralAffix &getLong() const { return fLong; }
setSymbol(const UnicodeString & symbol)49 void setSymbol(const UnicodeString &symbol) {
50 fSymbol = symbol;
51 fIsDefault = FALSE;
52 }
setISO(const UnicodeString & iso)53 void setISO(const UnicodeString &iso) {
54 fISO = iso;
55 fIsDefault = FALSE;
56 }
57 UBool
equals(const CurrencyAffixInfo & other)58 equals(const CurrencyAffixInfo &other) const {
59 return (fSymbol == other.fSymbol)
60 && (fISO == other.fISO)
61 && (fLong.equals(other.fLong))
62 && (fIsDefault == other.fIsDefault);
63 }
64
65 /**
66 * Intializes this instance.
67 *
68 * @param locale the locale for the currency forms.
69 * @param rules The plural rules for the locale.
70 * @param currency the null terminated, 3 character ISO code of the
71 * currency. If NULL, resets this instance as if it were just created.
72 * In this case, the first 2 parameters may be NULL as well.
73 * @param status any error returned here.
74 */
75 void set(
76 const char *locale, const PluralRules *rules,
77 const UChar *currency, UErrorCode &status);
78
79 /**
80 * Returns true if this instance is the default. That is has no real
81 * currency. For instance never initialized with set()
82 * or reset with set(NULL, NULL, NULL, status).
83 */
isDefault()84 UBool isDefault() const { return fIsDefault; }
85
86 /**
87 * Adjusts the precision used for a particular currency.
88 * @param currency the null terminated, 3 character ISO code of the
89 * currency.
90 * @param usage the usage of the currency
91 * @param precision min/max fraction digits and rounding increment
92 * adjusted.
93 * @params status any error reported here.
94 */
95 static void adjustPrecision(
96 const UChar *currency, const UCurrencyUsage usage,
97 FixedPrecision &precision, UErrorCode &status);
98
99 private:
100 /**
101 * The symbol form of the currency.
102 */
103 UnicodeString fSymbol;
104
105 /**
106 * The ISO form of the currency, usually three letter abbreviation.
107 */
108 UnicodeString fISO;
109
110 /**
111 * The long forms of the currency keyed by plural variation.
112 */
113 PluralAffix fLong;
114
115 UBool fIsDefault;
116
117 };
118
119 class AffixPatternIterator;
120
121 /**
122 * A locale agnostic representation of an affix pattern.
123 */
124 class U_I18N_API AffixPattern : public UMemory {
125 public:
126
127 /**
128 * The token types that can appear in an affix pattern.
129 */
130 enum ETokenType {
131 kLiteral,
132 kPercent,
133 kPerMill,
134 kCurrency,
135 kNegative,
136 kPositive
137 };
138
139 /**
140 * An empty affix pattern.
141 */
AffixPattern()142 AffixPattern()
143 : tokens(), literals(), hasCurrencyToken(FALSE),
144 hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) {
145 }
146
147 /**
148 * Adds a string literal to this affix pattern.
149 */
150 void addLiteral(const UChar *, int32_t start, int32_t len);
151
152 /**
153 * Adds a token to this affix pattern. t must not be kLiteral as
154 * the addLiteral() method adds literals.
155 * @param t the token type to add
156 */
157 void add(ETokenType t);
158
159 /**
160 * Adds a currency token with specific count to this affix pattern.
161 * @param count the token count. Used to distinguish between
162 * one, two, or three currency symbols. Note that adding a currency
163 * token with count=2 (Use ISO code) is different than adding two
164 * currency tokens each with count=1 (two currency symbols).
165 */
166 void addCurrency(uint8_t count);
167
168 /**
169 * Makes this instance be an empty affix pattern.
170 */
171 void remove();
172
173 /**
174 * Provides an iterator over the tokens in this instance.
175 * @param result this is initialized to point just before the
176 * first token of this instance. Caller must call nextToken()
177 * on the iterator once it is set up to have it actually point
178 * to the first token. This first call to nextToken() will return
179 * FALSE if the AffixPattern being iterated over is empty.
180 * @return result
181 */
182 AffixPatternIterator &iterator(AffixPatternIterator &result) const;
183
184 /**
185 * Returns TRUE if this instance has currency tokens in it.
186 */
usesCurrency()187 UBool usesCurrency() const {
188 return hasCurrencyToken;
189 }
190
usesPercent()191 UBool usesPercent() const {
192 return hasPercentToken;
193 }
194
usesPermill()195 UBool usesPermill() const {
196 return hasPermillToken;
197 }
198
199 /**
200 * Returns the number of code points a string of this instance
201 * would have if none of the special tokens were escaped.
202 * Used to compute the padding size.
203 */
countChar32()204 int32_t countChar32() const {
205 return char32Count;
206 }
207
208 /**
209 * Appends other to this instance mutating this instance in place.
210 * @param other The pattern appended to the end of this one.
211 * @return a reference to this instance for chaining.
212 */
213 AffixPattern &append(const AffixPattern &other);
214
215 /**
216 * Converts this AffixPattern back into a user string.
217 * It is the inverse of parseUserAffixString.
218 */
219 UnicodeString &toUserString(UnicodeString &appendTo) const;
220
221 /**
222 * Converts this AffixPattern back into a string.
223 * It is the inverse of parseAffixString.
224 */
225 UnicodeString &toString(UnicodeString &appendTo) const;
226
227 /**
228 * Parses an affix pattern string appending it to an AffixPattern.
229 * Parses affix pattern strings produced from using
230 * DecimalFormatPatternParser to parse a format pattern. Affix patterns
231 * include the positive prefix and suffix and the negative prefix
232 * and suffix. This method expects affix patterns strings to be in the
233 * same format that DecimalFormatPatternParser produces. Namely special
234 * characters in the affix that correspond to a field type must be
235 * prefixed with an apostrophe ('). These special character sequences
236 * inluce minus (-), percent (%), permile (U+2030), plus (+),
237 * short currency (U+00a4), medium currency (u+00a4 * 2),
238 * long currency (u+a4 * 3), and apostrophe (')
239 * (apostrophe does not correspond to a field type but has to be escaped
240 * because it itself is the escape character).
241 * Since the expansion of these special character
242 * sequences is locale dependent, these sequences are not expanded in
243 * an AffixPattern instance.
244 * If these special characters are not prefixed with an apostrophe in
245 * the affix pattern string, then they are treated verbatim just as
246 * any other character. If an apostrophe prefixes a non special
247 * character in the affix pattern, the apostrophe is simply ignored.
248 *
249 * @param affixStr the string from DecimalFormatPatternParser
250 * @param appendTo parsed result appended here.
251 * @param status any error parsing returned here.
252 */
253 static AffixPattern &parseAffixString(
254 const UnicodeString &affixStr,
255 AffixPattern &appendTo,
256 UErrorCode &status);
257
258 /**
259 * Parses an affix pattern string appending it to an AffixPattern.
260 * Parses affix pattern strings as the user would supply them.
261 * In this function, quoting makes special characters like normal
262 * characters whereas in parseAffixString, quoting makes special
263 * characters special.
264 *
265 * @param affixStr the string from the user
266 * @param appendTo parsed result appended here.
267 * @param status any error parsing returned here.
268 */
269 static AffixPattern &parseUserAffixString(
270 const UnicodeString &affixStr,
271 AffixPattern &appendTo,
272 UErrorCode &status);
273
equals(const AffixPattern & other)274 UBool equals(const AffixPattern &other) const {
275 return (tokens == other.tokens)
276 && (literals == other.literals)
277 && (hasCurrencyToken == other.hasCurrencyToken)
278 && (hasPercentToken == other.hasPercentToken)
279 && (hasPermillToken == other.hasPermillToken)
280 && (char32Count == other.char32Count);
281 }
282
283 private:
284 /*
285 * Tokens stored here. Each UChar generally stands for one token. A
286 * Each token is of form 'etttttttllllllll' llllllll is the length of
287 * the token and ranges from 0-255. ttttttt is the token type and ranges
288 * from 0-127. If e is set it means this is an extendo token (to be
289 * described later). To accomodate token lengths above 255, each normal
290 * token (e=0) can be followed by 0 or more extendo tokens (e=1) with
291 * the same type. Right now only kLiteral Tokens have extendo tokens.
292 * Each extendo token provides the next 8 higher bits for the length.
293 * If a kLiteral token is followed by 2 extendo tokens then, then the
294 * llllllll of the next extendo token contains bits 8-15 of the length
295 * and the last extendo token contains bits 16-23 of the length.
296 */
297 UnicodeString tokens;
298
299 /*
300 * The characters of the kLiteral tokens are concatenated together here.
301 * The first characters go with the first kLiteral token, the next
302 * characters go with the next kLiteral token etc.
303 */
304 UnicodeString literals;
305 UBool hasCurrencyToken;
306 UBool hasPercentToken;
307 UBool hasPermillToken;
308 int32_t char32Count;
309 void add(ETokenType t, uint8_t count);
310
311 };
312
313 /**
314 * An iterator over the tokens in an AffixPattern instance.
315 */
316 class U_I18N_API AffixPatternIterator : public UMemory {
317 public:
318
319 /**
320 * Using an iterator without first calling iterator on an AffixPattern
321 * instance to initialize the iterator results in
322 * undefined behavior.
323 */
AffixPatternIterator()324 AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { }
325 /**
326 * Advances this iterator to the next token. Returns FALSE when there
327 * are no more tokens. Calling the other methods after nextToken()
328 * returns FALSE results in undefined behavior.
329 */
330 UBool nextToken();
331
332 /**
333 * Returns the type of token.
334 */
335 AffixPattern::ETokenType getTokenType() const;
336
337 /**
338 * For literal tokens, returns the literal string. Calling this for
339 * other token types results in undefined behavior.
340 * @param result replaced with a read-only alias to the literal string.
341 * @return result
342 */
343 UnicodeString &getLiteral(UnicodeString &result) const;
344
345 /**
346 * Returns the token length. Usually 1, but for currency tokens may
347 * be 2 for ISO code and 3 for long form.
348 */
349 int32_t getTokenLength() const;
350 private:
351 int32_t nextLiteralIndex;
352 int32_t lastLiteralLength;
353 int32_t nextTokenIndex;
354 const UnicodeString *tokens;
355 const UnicodeString *literals;
356 friend class AffixPattern;
357 AffixPatternIterator(const AffixPatternIterator &);
358 AffixPatternIterator &operator=(const AffixPatternIterator &);
359 };
360
361 /**
362 * A locale aware class that converts locale independent AffixPattern
363 * instances into locale dependent PluralAffix instances.
364 */
365 class U_I18N_API AffixPatternParser : public UMemory {
366 public:
367 AffixPatternParser();
368 AffixPatternParser(const DecimalFormatSymbols &symbols);
369 void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols);
370
371 /**
372 * Parses affixPattern appending the result to appendTo.
373 * @param affixPattern The affix pattern.
374 * @param currencyAffixInfo contains the currency forms.
375 * @param appendTo The result of parsing affixPattern is appended here.
376 * @param status any error returned here.
377 * @return appendTo.
378 */
379 PluralAffix &parse(
380 const AffixPattern &affixPattern,
381 const CurrencyAffixInfo ¤cyAffixInfo,
382 PluralAffix &appendTo,
383 UErrorCode &status) const;
384
equals(const AffixPatternParser & other)385 UBool equals(const AffixPatternParser &other) const {
386 return (fPercent == other.fPercent)
387 && (fPermill == other.fPermill)
388 && (fNegative == other.fNegative)
389 && (fPositive == other.fPositive);
390 }
391
392 private:
393 UnicodeString fPercent;
394 UnicodeString fPermill;
395 UnicodeString fNegative;
396 UnicodeString fPositive;
397 };
398
399
400 U_NAMESPACE_END
401 #endif /* #if !UCONFIG_NO_FORMATTING */
402 #endif // __AFFIX_PATTERN_PARSER_H__
403