• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2011-2012, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  messagepattern.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2011mar14
12 *   created by: Markus W. Scherer
13 */
14 
15 #ifndef __MESSAGEPATTERN_H__
16 #define __MESSAGEPATTERN_H__
17 
18 /**
19  * \file
20  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21  */
22 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_FORMATTING
26 
27 #include "unicode/parseerr.h"
28 #include "unicode/unistr.h"
29 
30 /**
31  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34  * <p>
35  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
36  * even when the pair is between two single, text-quoting apostrophes.
37  * <p>
38  * The following table shows examples of desired MessageFormat.format() output
39  * with the pattern strings that yield that output.
40  * <p>
41  * <table>
42  *   <tr>
43  *     <th>Desired output</th>
44  *     <th>DOUBLE_OPTIONAL</th>
45  *     <th>DOUBLE_REQUIRED</th>
46  *   </tr>
47  *   <tr>
48  *     <td>I see {many}</td>
49  *     <td>I see '{many}'</td>
50  *     <td>(same)</td>
51  *   </tr>
52  *   <tr>
53  *     <td>I said {'Wow!'}</td>
54  *     <td>I said '{''Wow!''}'</td>
55  *     <td>(same)</td>
56  *   </tr>
57  *   <tr>
58  *     <td>I don't know</td>
59  *     <td>I don't know OR<br> I don''t know</td>
60  *     <td>I don''t know</td>
61  *   </tr>
62  * </table>
63  * @stable ICU 4.8
64  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65  */
66 enum UMessagePatternApostropheMode {
67     /**
68      * A literal apostrophe is represented by
69      * either a single or a double apostrophe pattern character.
70      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71      * if it immediately precedes a curly brace {},
72      * or a pipe symbol | if inside a choice format,
73      * or a pound symbol # if inside a plural format.
74      * <p>
75      * This is the default behavior starting with ICU 4.8.
76      * @stable ICU 4.8
77      */
78     UMSGPAT_APOS_DOUBLE_OPTIONAL,
79     /**
80      * A literal apostrophe must be represented by
81      * a double apostrophe pattern character.
82      * A single apostrophe always starts quoted literal text.
83      * <p>
84      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
85      * @stable ICU 4.8
86      */
87     UMSGPAT_APOS_DOUBLE_REQUIRED
88 };
89 /**
90  * @stable ICU 4.8
91  */
92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93 
94 /**
95  * MessagePattern::Part type constants.
96  * @stable ICU 4.8
97  */
98 enum UMessagePatternPartType {
99     /**
100      * Start of a message pattern (main or nested).
101      * The length is 0 for the top-level message
102      * and for a choice argument sub-message, otherwise 1 for the '{'.
103      * The value indicates the nesting level, starting with 0 for the main message.
104      * <p>
105      * There is always a later MSG_LIMIT part.
106      * @stable ICU 4.8
107      */
108     UMSGPAT_PART_TYPE_MSG_START,
109     /**
110      * End of a message pattern (main or nested).
111      * The length is 0 for the top-level message and
112      * the last sub-message of a choice argument,
113      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
114      * The value indicates the nesting level, starting with 0 for the main message.
115      * @stable ICU 4.8
116      */
117     UMSGPAT_PART_TYPE_MSG_LIMIT,
118     /**
119      * Indicates a substring of the pattern string which is to be skipped when formatting.
120      * For example, an apostrophe that begins or ends quoted text
121      * would be indicated with such a part.
122      * The value is undefined and currently always 0.
123      * @stable ICU 4.8
124      */
125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126     /**
127      * Indicates that a syntax character needs to be inserted for auto-quoting.
128      * The length is 0.
129      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130      * @stable ICU 4.8
131      */
132     UMSGPAT_PART_TYPE_INSERT_CHAR,
133     /**
134      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
135      * When formatting, replace this part's substring with the
136      * (value-offset) for the plural argument value.
137      * The value is undefined and currently always 0.
138      * @stable ICU 4.8
139      */
140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141     /**
142      * Start of an argument.
143      * The length is 1 for the '{'.
144      * The value is the ordinal value of the ArgType. Use getArgType().
145      * <p>
146      * This part is followed by either an ARG_NUMBER or ARG_NAME,
147      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
148      * and finally an ARG_LIMIT part.
149      * @stable ICU 4.8
150      */
151     UMSGPAT_PART_TYPE_ARG_START,
152     /**
153      * End of an argument.
154      * The length is 1 for the '}'.
155      * The value is the ordinal value of the ArgType. Use getArgType().
156      * @stable ICU 4.8
157      */
158     UMSGPAT_PART_TYPE_ARG_LIMIT,
159     /**
160      * The argument number, provided by the value.
161      * @stable ICU 4.8
162      */
163     UMSGPAT_PART_TYPE_ARG_NUMBER,
164     /**
165      * The argument name.
166      * The value is undefined and currently always 0.
167      * @stable ICU 4.8
168      */
169     UMSGPAT_PART_TYPE_ARG_NAME,
170     /**
171      * The argument type.
172      * The value is undefined and currently always 0.
173      * @stable ICU 4.8
174      */
175     UMSGPAT_PART_TYPE_ARG_TYPE,
176     /**
177      * The argument style text.
178      * The value is undefined and currently always 0.
179      * @stable ICU 4.8
180      */
181     UMSGPAT_PART_TYPE_ARG_STYLE,
182     /**
183      * A selector substring in a "complex" argument style.
184      * The value is undefined and currently always 0.
185      * @stable ICU 4.8
186      */
187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
188     /**
189      * An integer value, for example the offset or an explicit selector value
190      * in a PluralFormat style.
191      * The part value is the integer value.
192      * @stable ICU 4.8
193      */
194     UMSGPAT_PART_TYPE_ARG_INT,
195     /**
196      * A numeric value, for example the offset or an explicit selector value
197      * in a PluralFormat style.
198      * The part value is an index into an internal array of numeric values;
199      * use getNumericValue().
200      * @stable ICU 4.8
201      */
202     UMSGPAT_PART_TYPE_ARG_DOUBLE
203 };
204 /**
205  * @stable ICU 4.8
206  */
207 typedef enum UMessagePatternPartType UMessagePatternPartType;
208 
209 /**
210  * Argument type constants.
211  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212  *
213  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214  * with a nesting level one greater than the surrounding message.
215  * @stable ICU 4.8
216  */
217 enum UMessagePatternArgType {
218     /**
219      * The argument has no specified type.
220      * @stable ICU 4.8
221      */
222     UMSGPAT_ARG_TYPE_NONE,
223     /**
224      * The argument has a "simple" type which is provided by the ARG_TYPE part.
225      * An ARG_STYLE part might follow that.
226      * @stable ICU 4.8
227      */
228     UMSGPAT_ARG_TYPE_SIMPLE,
229     /**
230      * The argument is a ChoiceFormat with one or more
231      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232      * @stable ICU 4.8
233      */
234     UMSGPAT_ARG_TYPE_CHOICE,
235     /**
236      * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237      * (e.g., offset:1)
238      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239      * If the selector has an explicit value (e.g., =2), then
240      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241      * Otherwise the message immediately follows the ARG_SELECTOR.
242      * @stable ICU 4.8
243      */
244     UMSGPAT_ARG_TYPE_PLURAL,
245     /**
246      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247      * @stable ICU 4.8
248      */
249     UMSGPAT_ARG_TYPE_SELECT,
250     /**
251      * The argument is an ordinal-number PluralFormat
252      * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
253      * @draft ICU 50
254      */
255     UMSGPAT_ARG_TYPE_SELECTORDINAL
256 };
257 /**
258  * @stable ICU 4.8
259  */
260 typedef enum UMessagePatternArgType UMessagePatternArgType;
261 
262 /**
263  * Returns TRUE if the argument type has a plural style part sequence and semantics,
264  * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
265  * @draft ICU 50
266  */
267 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
268     ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
269 
270 enum {
271     /**
272      * Return value from MessagePattern.validateArgumentName() for when
273      * the string is a valid "pattern identifier" but not a number.
274      * @stable ICU 4.8
275      */
276     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
277 
278     /**
279      * Return value from MessagePattern.validateArgumentName() for when
280      * the string is invalid.
281      * It might not be a valid "pattern identifier",
282      * or it have only ASCII digits but there is a leading zero or the number is too large.
283      * @stable ICU 4.8
284      */
285     UMSGPAT_ARG_NAME_NOT_VALID=-2
286 };
287 
288 /**
289  * Special value that is returned by getNumericValue(Part) when no
290  * numeric value is defined for a part.
291  * @see MessagePattern.getNumericValue()
292  * @stable ICU 4.8
293  */
294 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
295 
296 U_NAMESPACE_BEGIN
297 
298 class MessagePatternDoubleList;
299 class MessagePatternPartsList;
300 
301 /**
302  * Parses and represents ICU MessageFormat patterns.
303  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
304  * Used in the implementations of those classes as well as in tools
305  * for message validation, translation and format conversion.
306  * <p>
307  * The parser handles all syntax relevant for identifying message arguments.
308  * This includes "complex" arguments whose style strings contain
309  * nested MessageFormat pattern substrings.
310  * For "simple" arguments (with no nested MessageFormat pattern substrings),
311  * the argument style is not parsed any further.
312  * <p>
313  * The parser handles named and numbered message arguments and allows both in one message.
314  * <p>
315  * Once a pattern has been parsed successfully, iterate through the parsed data
316  * with countParts(), getPart() and related methods.
317  * <p>
318  * The data logically represents a parse tree, but is stored and accessed
319  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
320  * Arguments and nested messages are best handled via recursion.
321  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
322  * the index of the corresponding _LIMIT "part".
323  * <p>
324  * List of "parts":
325  * <pre>
326  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
327  * argument = noneArg | simpleArg | complexArg
328  * complexArg = choiceArg | pluralArg | selectArg
329  *
330  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
331  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
332  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
333  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
334  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
335  *
336  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
337  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
338  * selectStyle = (ARG_SELECTOR message)+
339  * </pre>
340  * <ul>
341  *   <li>Literal output text is not represented directly by "parts" but accessed
342  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
343  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
344  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
345  *       the less-than-or-equal-to sign (U+2264).
346  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
347  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
348  *       is the value of an explicit-number selector like "=2",
349  *       otherwise the selector is a non-numeric identifier.
350  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
351  * </ul>
352  * <p>
353  * This class is not intended for public subclassing.
354  *
355  * @stable ICU 4.8
356  */
357 class U_COMMON_API MessagePattern : public UObject {
358 public:
359     /**
360      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
361      * @param errorCode Standard ICU error code. Its input value must
362      *                  pass the U_SUCCESS() test, or else the function returns
363      *                  immediately. Check for U_FAILURE() on output or use with
364      *                  function chaining. (See User Guide for details.)
365      * @stable ICU 4.8
366      */
367     MessagePattern(UErrorCode &errorCode);
368 
369     /**
370      * Constructs an empty MessagePattern.
371      * @param mode Explicit UMessagePatternApostropheMode.
372      * @param errorCode Standard ICU error code. Its input value must
373      *                  pass the U_SUCCESS() test, or else the function returns
374      *                  immediately. Check for U_FAILURE() on output or use with
375      *                  function chaining. (See User Guide for details.)
376      * @stable ICU 4.8
377      */
378     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
379 
380     /**
381      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
382      * parses the MessageFormat pattern string.
383      * @param pattern a MessageFormat pattern string
384      * @param parseError Struct to receive information on the position
385      *                   of an error within the pattern.
386      *                   Can be NULL.
387      * @param errorCode Standard ICU error code. Its input value must
388      *                  pass the U_SUCCESS() test, or else the function returns
389      *                  immediately. Check for U_FAILURE() on output or use with
390      *                  function chaining. (See User Guide for details.)
391      * TODO: turn @throws into UErrorCode specifics?
392      * @throws IllegalArgumentException for syntax errors in the pattern string
393      * @throws IndexOutOfBoundsException if certain limits are exceeded
394      *         (e.g., argument number too high, argument name too long, etc.)
395      * @throws NumberFormatException if a number could not be parsed
396      * @stable ICU 4.8
397      */
398     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
399 
400     /**
401      * Copy constructor.
402      * @param other Object to copy.
403      * @stable ICU 4.8
404      */
405     MessagePattern(const MessagePattern &other);
406 
407     /**
408      * Assignment operator.
409      * @param other Object to copy.
410      * @return *this=other
411      * @stable ICU 4.8
412      */
413     MessagePattern &operator=(const MessagePattern &other);
414 
415     /**
416      * Destructor.
417      * @stable ICU 4.8
418      */
419     virtual ~MessagePattern();
420 
421     /**
422      * Parses a MessageFormat pattern string.
423      * @param pattern a MessageFormat pattern string
424      * @param parseError Struct to receive information on the position
425      *                   of an error within the pattern.
426      *                   Can be NULL.
427      * @param errorCode Standard ICU error code. Its input value must
428      *                  pass the U_SUCCESS() test, or else the function returns
429      *                  immediately. Check for U_FAILURE() on output or use with
430      *                  function chaining. (See User Guide for details.)
431      * @return *this
432      * @throws IllegalArgumentException for syntax errors in the pattern string
433      * @throws IndexOutOfBoundsException if certain limits are exceeded
434      *         (e.g., argument number too high, argument name too long, etc.)
435      * @throws NumberFormatException if a number could not be parsed
436      * @stable ICU 4.8
437      */
438     MessagePattern &parse(const UnicodeString &pattern,
439                           UParseError *parseError, UErrorCode &errorCode);
440 
441     /**
442      * Parses a ChoiceFormat pattern string.
443      * @param pattern a ChoiceFormat pattern string
444      * @param parseError Struct to receive information on the position
445      *                   of an error within the pattern.
446      *                   Can be NULL.
447      * @param errorCode Standard ICU error code. Its input value must
448      *                  pass the U_SUCCESS() test, or else the function returns
449      *                  immediately. Check for U_FAILURE() on output or use with
450      *                  function chaining. (See User Guide for details.)
451      * @return *this
452      * @throws IllegalArgumentException for syntax errors in the pattern string
453      * @throws IndexOutOfBoundsException if certain limits are exceeded
454      *         (e.g., argument number too high, argument name too long, etc.)
455      * @throws NumberFormatException if a number could not be parsed
456      * @stable ICU 4.8
457      */
458     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
459                                      UParseError *parseError, UErrorCode &errorCode);
460 
461     /**
462      * Parses a PluralFormat pattern string.
463      * @param pattern a PluralFormat pattern string
464      * @param parseError Struct to receive information on the position
465      *                   of an error within the pattern.
466      *                   Can be NULL.
467      * @param errorCode Standard ICU error code. Its input value must
468      *                  pass the U_SUCCESS() test, or else the function returns
469      *                  immediately. Check for U_FAILURE() on output or use with
470      *                  function chaining. (See User Guide for details.)
471      * @return *this
472      * @throws IllegalArgumentException for syntax errors in the pattern string
473      * @throws IndexOutOfBoundsException if certain limits are exceeded
474      *         (e.g., argument number too high, argument name too long, etc.)
475      * @throws NumberFormatException if a number could not be parsed
476      * @stable ICU 4.8
477      */
478     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
479                                      UParseError *parseError, UErrorCode &errorCode);
480 
481     /**
482      * Parses a SelectFormat pattern string.
483      * @param pattern a SelectFormat pattern string
484      * @param parseError Struct to receive information on the position
485      *                   of an error within the pattern.
486      *                   Can be NULL.
487      * @param errorCode Standard ICU error code. Its input value must
488      *                  pass the U_SUCCESS() test, or else the function returns
489      *                  immediately. Check for U_FAILURE() on output or use with
490      *                  function chaining. (See User Guide for details.)
491      * @return *this
492      * @throws IllegalArgumentException for syntax errors in the pattern string
493      * @throws IndexOutOfBoundsException if certain limits are exceeded
494      *         (e.g., argument number too high, argument name too long, etc.)
495      * @throws NumberFormatException if a number could not be parsed
496      * @stable ICU 4.8
497      */
498     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
499                                      UParseError *parseError, UErrorCode &errorCode);
500 
501     /**
502      * Clears this MessagePattern.
503      * countParts() will return 0.
504      * @stable ICU 4.8
505      */
506     void clear();
507 
508     /**
509      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
510      * countParts() will return 0.
511      * @param mode The new UMessagePatternApostropheMode.
512      * @stable ICU 4.8
513      */
clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)514     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
515         clear();
516         aposMode=mode;
517     }
518 
519     /**
520      * @param other another object to compare with.
521      * @return TRUE if this object is equivalent to the other one.
522      * @stable ICU 4.8
523      */
524     UBool operator==(const MessagePattern &other) const;
525 
526     /**
527      * @param other another object to compare with.
528      * @return FALSE if this object is equivalent to the other one.
529      * @stable ICU 4.8
530      */
531     inline UBool operator!=(const MessagePattern &other) const {
532         return !operator==(other);
533     }
534 
535     /**
536      * @return A hash code for this object.
537      * @stable ICU 4.8
538      */
539     int32_t hashCode() const;
540 
541     /**
542      * @return this instance's UMessagePatternApostropheMode.
543      * @stable ICU 4.8
544      */
getApostropheMode()545     UMessagePatternApostropheMode getApostropheMode() const {
546         return aposMode;
547     }
548 
549     // Java has package-private jdkAposMode() here.
550     // In C++, this is declared in the MessageImpl class.
551 
552     /**
553      * @return the parsed pattern string (null if none was parsed).
554      * @stable ICU 4.8
555      */
getPatternString()556     const UnicodeString &getPatternString() const {
557         return msg;
558     }
559 
560     /**
561      * Does the parsed pattern have named arguments like {first_name}?
562      * @return TRUE if the parsed pattern has at least one named argument.
563      * @stable ICU 4.8
564      */
hasNamedArguments()565     UBool hasNamedArguments() const {
566         return hasArgNames;
567     }
568 
569     /**
570      * Does the parsed pattern have numbered arguments like {2}?
571      * @return TRUE if the parsed pattern has at least one numbered argument.
572      * @stable ICU 4.8
573      */
hasNumberedArguments()574     UBool hasNumberedArguments() const {
575         return hasArgNumbers;
576     }
577 
578     /**
579      * Validates and parses an argument name or argument number string.
580      * An argument name must be a "pattern identifier", that is, it must contain
581      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
582      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
583      * @param name Input string.
584      * @return &gt;=0 if the name is a valid number,
585      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
586      *         ARG_NAME_NOT_VALID (-2) if it is neither.
587      * @stable ICU 4.8
588      */
589     static int32_t validateArgumentName(const UnicodeString &name);
590 
591     /**
592      * Returns a version of the parsed pattern string where each ASCII apostrophe
593      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
594      * <p>
595      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
596      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
597      * @return the deep-auto-quoted version of the parsed pattern string.
598      * @see MessageFormat.autoQuoteApostrophe()
599      * @stable ICU 4.8
600      */
601     UnicodeString autoQuoteApostropheDeep() const;
602 
603     class Part;
604 
605     /**
606      * Returns the number of "parts" created by parsing the pattern string.
607      * Returns 0 if no pattern has been parsed or clear() was called.
608      * @return the number of pattern parts.
609      * @stable ICU 4.8
610      */
countParts()611     int32_t countParts() const {
612         return partsLength;
613     }
614 
615     /**
616      * Gets the i-th pattern "part".
617      * @param i The index of the Part data. (0..countParts()-1)
618      * @return the i-th pattern "part".
619      * @stable ICU 4.8
620      */
getPart(int32_t i)621     const Part &getPart(int32_t i) const {
622         return parts[i];
623     }
624 
625     /**
626      * Returns the UMessagePatternPartType of the i-th pattern "part".
627      * Convenience method for getPart(i).getType().
628      * @param i The index of the Part data. (0..countParts()-1)
629      * @return The UMessagePatternPartType of the i-th Part.
630      * @stable ICU 4.8
631      */
getPartType(int32_t i)632     UMessagePatternPartType getPartType(int32_t i) const {
633         return getPart(i).type;
634     }
635 
636     /**
637      * Returns the pattern index of the specified pattern "part".
638      * Convenience method for getPart(partIndex).getIndex().
639      * @param partIndex The index of the Part data. (0..countParts()-1)
640      * @return The pattern index of this Part.
641      * @stable ICU 4.8
642      */
getPatternIndex(int32_t partIndex)643     int32_t getPatternIndex(int32_t partIndex) const {
644         return getPart(partIndex).index;
645     }
646 
647     /**
648      * Returns the substring of the pattern string indicated by the Part.
649      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
650      * @param part a part of this MessagePattern.
651      * @return the substring associated with part.
652      * @stable ICU 4.8
653      */
getSubstring(const Part & part)654     UnicodeString getSubstring(const Part &part) const {
655         return msg.tempSubString(part.index, part.length);
656     }
657 
658     /**
659      * Compares the part's substring with the input string s.
660      * @param part a part of this MessagePattern.
661      * @param s a string.
662      * @return TRUE if getSubstring(part).equals(s).
663      * @stable ICU 4.8
664      */
partSubstringMatches(const Part & part,const UnicodeString & s)665     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
666         return 0==msg.compare(part.index, part.length, s);
667     }
668 
669     /**
670      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
671      * @param part a part of this MessagePattern.
672      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
673      * @stable ICU 4.8
674      */
675     double getNumericValue(const Part &part) const;
676 
677     /**
678      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
679      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
680      * @return the "offset:" value.
681      * @stable ICU 4.8
682      */
683     double getPluralOffset(int32_t pluralStart) const;
684 
685     /**
686      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
687      * @param start The index of some Part data (0..countParts()-1);
688      *        this Part should be of Type ARG_START or MSG_START.
689      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
690      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
691      * @stable ICU 4.8
692      */
getLimitPartIndex(int32_t start)693     int32_t getLimitPartIndex(int32_t start) const {
694         int32_t limit=getPart(start).limitPartIndex;
695         if(limit<start) {
696             return start;
697         }
698         return limit;
699     }
700 
701     /**
702      * A message pattern "part", representing a pattern parsing event.
703      * There is a part for the start and end of a message or argument,
704      * for quoting and escaping of and with ASCII apostrophes,
705      * and for syntax elements of "complex" arguments.
706      * @stable ICU 4.8
707      */
708     class Part : public UMemory {
709     public:
710         /**
711          * Default constructor, do not use.
712          * @internal
713          */
Part()714         Part() {}
715 
716         /**
717          * Returns the type of this part.
718          * @return the part type.
719          * @stable ICU 4.8
720          */
getType()721         UMessagePatternPartType getType() const {
722             return type;
723         }
724 
725         /**
726          * Returns the pattern string index associated with this Part.
727          * @return this part's pattern string index.
728          * @stable ICU 4.8
729          */
getIndex()730         int32_t getIndex() const {
731             return index;
732         }
733 
734         /**
735          * Returns the length of the pattern substring associated with this Part.
736          * This is 0 for some parts.
737          * @return this part's pattern substring length.
738          * @stable ICU 4.8
739          */
getLength()740         int32_t getLength() const {
741             return length;
742         }
743 
744         /**
745          * Returns the pattern string limit (exclusive-end) index associated with this Part.
746          * Convenience method for getIndex()+getLength().
747          * @return this part's pattern string limit index, same as getIndex()+getLength().
748          * @stable ICU 4.8
749          */
getLimit()750         int32_t getLimit() const {
751             return index+length;
752         }
753 
754         /**
755          * Returns a value associated with this part.
756          * See the documentation of each part type for details.
757          * @return the part value.
758          * @stable ICU 4.8
759          */
getValue()760         int32_t getValue() const {
761             return value;
762         }
763 
764         /**
765          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
766          * otherwise UMSGPAT_ARG_TYPE_NONE.
767          * @return the argument type for this part.
768          * @stable ICU 4.8
769          */
getArgType()770         UMessagePatternArgType getArgType() const {
771             UMessagePatternPartType type=getType();
772             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
773                 return (UMessagePatternArgType)value;
774             } else {
775                 return UMSGPAT_ARG_TYPE_NONE;
776             }
777         }
778 
779         /**
780          * Indicates whether the Part type has a numeric value.
781          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
782          * @param type The Part type to be tested.
783          * @return TRUE if the Part type has a numeric value.
784          * @stable ICU 4.8
785          */
hasNumericValue(UMessagePatternPartType type)786         static UBool hasNumericValue(UMessagePatternPartType type) {
787             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
788         }
789 
790         /**
791          * @param other another object to compare with.
792          * @return TRUE if this object is equivalent to the other one.
793          * @stable ICU 4.8
794          */
795         UBool operator==(const Part &other) const;
796 
797         /**
798          * @param other another object to compare with.
799          * @return FALSE if this object is equivalent to the other one.
800          * @stable ICU 4.8
801          */
802         inline UBool operator!=(const Part &other) const {
803             return !operator==(other);
804         }
805 
806         /**
807          * @return A hash code for this object.
808          * @stable ICU 4.8
809          */
hashCode()810         int32_t hashCode() const {
811             return ((type*37+index)*37+length)*37+value;
812         }
813 
814     private:
815         friend class MessagePattern;
816 
817         static const int32_t MAX_LENGTH=0xffff;
818         static const int32_t MAX_VALUE=0x7fff;
819 
820         // Some fields are not final because they are modified during pattern parsing.
821         // After pattern parsing, the parts are effectively immutable.
822         UMessagePatternPartType type;
823         int32_t index;
824         uint16_t length;
825         int16_t value;
826         int32_t limitPartIndex;
827     };
828 
829 private:
830     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
831 
832     void postParse();
833 
834     int32_t parseMessage(int32_t index, int32_t msgStartLength,
835                          int32_t nestingLevel, UMessagePatternArgType parentType,
836                          UParseError *parseError, UErrorCode &errorCode);
837 
838     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
839                      UParseError *parseError, UErrorCode &errorCode);
840 
841     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
842 
843     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
844                              UParseError *parseError, UErrorCode &errorCode);
845 
846     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
847                                      UParseError *parseError, UErrorCode &errorCode);
848 
849     /**
850      * Validates and parses an argument name or argument number string.
851      * This internal method assumes that the input substring is a "pattern identifier".
852      * @return &gt;=0 if the name is a valid number,
853      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
854      *         ARG_NAME_NOT_VALID (-2) if it is neither.
855      * @see #validateArgumentName(String)
856      */
857     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
858 
parseArgNumber(int32_t start,int32_t limit)859     int32_t parseArgNumber(int32_t start, int32_t limit) {
860         return parseArgNumber(msg, start, limit);
861     }
862 
863     /**
864      * Parses a number from the specified message substring.
865      * @param start start index into the message string
866      * @param limit limit index into the message string, must be start<limit
867      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
868      * @param parseError
869      * @param errorCode
870      */
871     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
872                      UParseError *parseError, UErrorCode &errorCode);
873 
874     // Java has package-private appendReducedApostrophes() here.
875     // In C++, this is declared in the MessageImpl class.
876 
877     int32_t skipWhiteSpace(int32_t index);
878 
879     int32_t skipIdentifier(int32_t index);
880 
881     /**
882      * Skips a sequence of characters that could occur in a double value.
883      * Does not fully parse or validate the value.
884      */
885     int32_t skipDouble(int32_t index);
886 
887     static UBool isArgTypeChar(UChar32 c);
888 
889     UBool isChoice(int32_t index);
890 
891     UBool isPlural(int32_t index);
892 
893     UBool isSelect(int32_t index);
894 
895     UBool isOrdinal(int32_t index);
896 
897     /**
898      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
899      *         as opposed to inside a top-level choice/plural/select pattern.
900      */
901     UBool inMessageFormatPattern(int32_t nestingLevel);
902 
903     /**
904      * @return TRUE if we are in a MessageFormat sub-pattern
905      *         of a top-level ChoiceFormat pattern.
906      */
907     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
908 
909     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
910                  int32_t value, UErrorCode &errorCode);
911 
912     void addLimitPart(int32_t start,
913                       UMessagePatternPartType type, int32_t index, int32_t length,
914                       int32_t value, UErrorCode &errorCode);
915 
916     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
917 
918     void setParseError(UParseError *parseError, int32_t index);
919 
920     // No ICU "poor man's RTTI" for this class nor its subclasses.
921     virtual UClassID getDynamicClassID() const;
922 
923     UBool init(UErrorCode &errorCode);
924     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
925 
926     UMessagePatternApostropheMode aposMode;
927     UnicodeString msg;
928     // ArrayList<Part> parts=new ArrayList<Part>();
929     MessagePatternPartsList *partsList;
930     Part *parts;
931     int32_t partsLength;
932     // ArrayList<Double> numericValues;
933     MessagePatternDoubleList *numericValuesList;
934     double *numericValues;
935     int32_t numericValuesLength;
936     UBool hasArgNames;
937     UBool hasArgNumbers;
938     UBool needsAutoQuoting;
939 };
940 
941 U_NAMESPACE_END
942 
943 #endif  // !UCONFIG_NO_FORMATTING
944 
945 #endif  // __MESSAGEPATTERN_H__
946