• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2011, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  messagepattern.h
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2011mar14
12 *   created by: Markus W. Scherer
13 */
14 
15 #ifndef __MESSAGEPATTERN_H__
16 #define __MESSAGEPATTERN_H__
17 
18 /**
19  * \file
20  * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
21  */
22 
23 #include "unicode/utypes.h"
24 
25 #if !UCONFIG_NO_FORMATTING
26 
27 #include "unicode/parseerr.h"
28 #include "unicode/unistr.h"
29 
30 /**
31  * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
32  * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
33  * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
34  * <p>
35  * A pair of adjacent apostrophes always results in a single apostrophe in the output,
36  * even when the pair is between two single, text-quoting apostrophes.
37  * <p>
38  * The following table shows examples of desired MessageFormat.format() output
39  * with the pattern strings that yield that output.
40  * <p>
41  * <table>
42  *   <tr>
43  *     <th>Desired output</th>
44  *     <th>DOUBLE_OPTIONAL</th>
45  *     <th>DOUBLE_REQUIRED</th>
46  *   </tr>
47  *   <tr>
48  *     <td>I see {many}</td>
49  *     <td>I see '{many}'</td>
50  *     <td>(same)</td>
51  *   </tr>
52  *   <tr>
53  *     <td>I said {'Wow!'}</td>
54  *     <td>I said '{''Wow!''}'</td>
55  *     <td>(same)</td>
56  *   </tr>
57  *   <tr>
58  *     <td>I don't know</td>
59  *     <td>I don't know OR<br> I don''t know</td>
60  *     <td>I don''t know</td>
61  *   </tr>
62  * </table>
63  * @draft ICU 4.8
64  * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
65  */
66 enum UMessagePatternApostropheMode {
67     /**
68      * A literal apostrophe is represented by
69      * either a single or a double apostrophe pattern character.
70      * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
71      * if it immediately precedes a curly brace {},
72      * or a pipe symbol | if inside a choice format,
73      * or a pound symbol # if inside a plural format.
74      * <p>
75      * This is the default behavior starting with ICU 4.8.
76      * @draft ICU 4.8
77      */
78     UMSGPAT_APOS_DOUBLE_OPTIONAL,
79     /**
80      * A literal apostrophe must be represented by
81      * a double apostrophe pattern character.
82      * A single apostrophe always starts quoted literal text.
83      * <p>
84      * This is the behavior of ICU 4.6 and earlier, and of the JDK.
85      * @draft ICU 4.8
86      */
87     UMSGPAT_APOS_DOUBLE_REQUIRED
88 };
89 /**
90  * @draft ICU 4.8
91  */
92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
93 
94 /**
95  * MessagePattern::Part type constants.
96  * @draft ICU 4.8
97  */
98 enum UMessagePatternPartType {
99     /**
100      * Start of a message pattern (main or nested).
101      * The length is 0 for the top-level message
102      * and for a choice argument sub-message, otherwise 1 for the '{'.
103      * The value indicates the nesting level, starting with 0 for the main message.
104      * <p>
105      * There is always a later MSG_LIMIT part.
106      * @draft ICU 4.8
107      */
108     UMSGPAT_PART_TYPE_MSG_START,
109     /**
110      * End of a message pattern (main or nested).
111      * The length is 0 for the top-level message and
112      * the last sub-message of a choice argument,
113      * otherwise 1 for the '}' or (in a choice argument style) the '|'.
114      * The value indicates the nesting level, starting with 0 for the main message.
115      * @draft ICU 4.8
116      */
117     UMSGPAT_PART_TYPE_MSG_LIMIT,
118     /**
119      * Indicates a substring of the pattern string which is to be skipped when formatting.
120      * For example, an apostrophe that begins or ends quoted text
121      * would be indicated with such a part.
122      * The value is undefined and currently always 0.
123      * @draft ICU 4.8
124      */
125     UMSGPAT_PART_TYPE_SKIP_SYNTAX,
126     /**
127      * Indicates that a syntax character needs to be inserted for auto-quoting.
128      * The length is 0.
129      * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
130      * @draft ICU 4.8
131      */
132     UMSGPAT_PART_TYPE_INSERT_CHAR,
133     /**
134      * Indicates a syntactic (non-escaped) # symbol in a plural variant.
135      * When formatting, replace this part's substring with the
136      * (value-offset) for the plural argument value.
137      * The value is undefined and currently always 0.
138      * @draft ICU 4.8
139      */
140     UMSGPAT_PART_TYPE_REPLACE_NUMBER,
141     /**
142      * Start of an argument.
143      * The length is 1 for the '{'.
144      * The value is the ordinal value of the ArgType. Use getArgType().
145      * @draft ICU 4.8
146      */
147     UMSGPAT_PART_TYPE_ARG_START,
148     /**
149      * End of an argument.
150      * The length is 1 for the '}'.
151      * The value is the ordinal value of the ArgType. Use getArgType().
152      * <p>
153      * This part is followed by either an ARG_NUMBER or ARG_NAME,
154      * followed by optional argument sub-parts (see UMessagePatternArgType constants)
155      * and finally an ARG_LIMIT part.
156      * @draft ICU 4.8
157      */
158     UMSGPAT_PART_TYPE_ARG_LIMIT,
159     /**
160      * The argument number, provided by the value.
161      * @draft ICU 4.8
162      */
163     UMSGPAT_PART_TYPE_ARG_NUMBER,
164     /**
165      * The argument name.
166      * The value is undefined and currently always 0.
167      * @draft ICU 4.8
168      */
169     UMSGPAT_PART_TYPE_ARG_NAME,
170     /**
171      * The argument type.
172      * The value is undefined and currently always 0.
173      * @draft ICU 4.8
174      */
175     UMSGPAT_PART_TYPE_ARG_TYPE,
176     /**
177      * The argument style text.
178      * The value is undefined and currently always 0.
179      * @draft ICU 4.8
180      */
181     UMSGPAT_PART_TYPE_ARG_STYLE,
182     /**
183      * A selector substring in a "complex" argument style.
184      * The value is undefined and currently always 0.
185      * @draft ICU 4.8
186      */
187     UMSGPAT_PART_TYPE_ARG_SELECTOR,
188     /**
189      * An integer value, for example the offset or an explicit selector value
190      * in a PluralFormat style.
191      * The part value is the integer value.
192      * @draft ICU 4.8
193      */
194     UMSGPAT_PART_TYPE_ARG_INT,
195     /**
196      * A numeric value, for example the offset or an explicit selector value
197      * in a PluralFormat style.
198      * The part value is an index into an internal array of numeric values;
199      * use getNumericValue().
200      * @draft ICU 4.8
201      */
202     UMSGPAT_PART_TYPE_ARG_DOUBLE
203 };
204 /**
205  * @draft ICU 4.8
206  */
207 typedef enum UMessagePatternPartType UMessagePatternPartType;
208 
209 /**
210  * Argument type constants.
211  * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
212  *
213  * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
214  * with a nesting level one greater than the surrounding message.
215  * @draft ICU 4.8
216  */
217 enum UMessagePatternArgType {
218     /**
219      * The argument has no specified type.
220      * @draft ICU 4.8
221      */
222     UMSGPAT_ARG_TYPE_NONE,
223     /**
224      * The argument has a "simple" type which is provided by the ARG_TYPE part.
225      * An ARG_STYLE part might follow that.
226      * @draft ICU 4.8
227      */
228     UMSGPAT_ARG_TYPE_SIMPLE,
229     /**
230      * The argument is a ChoiceFormat with one or more
231      * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
232      * @draft ICU 4.8
233      */
234     UMSGPAT_ARG_TYPE_CHOICE,
235     /**
236      * The argument is a PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
237      * (e.g., offset:1)
238      * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
239      * If the selector has an explicit value (e.g., =2), then
240      * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
241      * Otherwise the message immediately follows the ARG_SELECTOR.
242      * @draft ICU 4.8
243      */
244     UMSGPAT_ARG_TYPE_PLURAL,
245     /**
246      * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
247      * @draft ICU 4.8
248      */
249     UMSGPAT_ARG_TYPE_SELECT
250 };
251 /**
252  * @draft ICU 4.8
253  */
254 typedef enum UMessagePatternArgType UMessagePatternArgType;
255 
256 /**
257  * @draft ICU 4.8
258  */
259 enum {
260     /**
261      * Return value from MessagePattern.validateArgumentName() for when
262      * the string is a valid "pattern identifier" but not a number.
263      * @draft ICU 4.8
264      */
265     UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
266 
267     /**
268      * Return value from MessagePattern.validateArgumentName() for when
269      * the string is invalid.
270      * It might not be a valid "pattern identifier",
271      * or it have only ASCII digits but there is a leading zero or the number is too large.
272      * @draft ICU 4.8
273      */
274     UMSGPAT_ARG_NAME_NOT_VALID=-2
275 };
276 
277 /**
278  * Special value that is returned by getNumericValue(Part) when no
279  * numeric value is defined for a part.
280  * @see MessagePattern.getNumericValue()
281  * @draft ICU 4.8
282  */
283 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
284 
285 U_NAMESPACE_BEGIN
286 
287 class MessagePatternDoubleList;
288 class MessagePatternPartsList;
289 
290 /**
291  * Parses and represents ICU MessageFormat patterns.
292  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
293  * Used in the implementations of those classes as well as in tools
294  * for message validation, translation and format conversion.
295  * <p>
296  * The parser handles all syntax relevant for identifying message arguments.
297  * This includes "complex" arguments whose style strings contain
298  * nested MessageFormat pattern substrings.
299  * For "simple" arguments (with no nested MessageFormat pattern substrings),
300  * the argument style is not parsed any further.
301  * <p>
302  * The parser handles named and numbered message arguments and allows both in one message.
303  * <p>
304  * Once a pattern has been parsed successfully, iterate through the parsed data
305  * with countParts(), getPart() and related methods.
306  * <p>
307  * The data logically represents a parse tree, but is stored and accessed
308  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
309  * Arguments and nested messages are best handled via recursion.
310  * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
311  * the index of the corresponding _LIMIT "part".
312  * <p>
313  * List of "parts":
314  * <pre>
315  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
316  * argument = noneArg | simpleArg | complexArg
317  * complexArg = choiceArg | pluralArg | selectArg
318  *
319  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
320  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
321  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
322  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
323  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
324  *
325  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
326  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
327  * selectStyle = (ARG_SELECTOR message)+
328  * </pre>
329  * <ul>
330  *   <li>Literal output text is not represented directly by "parts" but accessed
331  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
332  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
333  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
334  *       the less-than-or-equal-to sign (U+2264).
335  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
336  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
337  *       is the value of an explicit-number selector like "=2",
338  *       otherwise the selector is a non-numeric identifier.
339  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
340  * </ul>
341  * <p>
342  * This class is not intended for public subclassing.
343  *
344  * @draft ICU 4.8
345  */
346 class U_COMMON_API MessagePattern : public UObject {
347 public:
348     /**
349      * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
350      * @param errorCode Standard ICU error code. Its input value must
351      *                  pass the U_SUCCESS() test, or else the function returns
352      *                  immediately. Check for U_FAILURE() on output or use with
353      *                  function chaining. (See User Guide for details.)
354      * @draft ICU 4.8
355      */
356     MessagePattern(UErrorCode &errorCode);
357 
358     /**
359      * Constructs an empty MessagePattern.
360      * @param mode Explicit UMessagePatternApostropheMode.
361      * @param errorCode Standard ICU error code. Its input value must
362      *                  pass the U_SUCCESS() test, or else the function returns
363      *                  immediately. Check for U_FAILURE() on output or use with
364      *                  function chaining. (See User Guide for details.)
365      * @draft ICU 4.8
366      */
367     MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
368 
369     /**
370      * Constructs a MessagePattern with default UMessagePatternApostropheMode and
371      * parses the MessageFormat pattern string.
372      * @param pattern a MessageFormat pattern string
373      * @param parseError Struct to receive information on the position
374      *                   of an error within the pattern.
375      *                   Can be NULL.
376      * @param errorCode Standard ICU error code. Its input value must
377      *                  pass the U_SUCCESS() test, or else the function returns
378      *                  immediately. Check for U_FAILURE() on output or use with
379      *                  function chaining. (See User Guide for details.)
380      * TODO: turn @throws into UErrorCode specifics?
381      * @throws IllegalArgumentException for syntax errors in the pattern string
382      * @throws IndexOutOfBoundsException if certain limits are exceeded
383      *         (e.g., argument number too high, argument name too long, etc.)
384      * @throws NumberFormatException if a number could not be parsed
385      * @draft ICU 4.8
386      */
387     MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
388 
389     /**
390      * Copy constructor.
391      * @param other Object to copy.
392      * @draft ICU 4.8
393      */
394     MessagePattern(const MessagePattern &other);
395 
396     /**
397      * Assignment operator.
398      * @param other Object to copy.
399      * @return *this=other
400      * @draft ICU 4.8
401      */
402     MessagePattern &operator=(const MessagePattern &other);
403 
404     /**
405      * Destructor.
406      * @draft ICU 4.8
407      */
408     virtual ~MessagePattern();
409 
410     /**
411      * Parses a MessageFormat pattern string.
412      * @param pattern a MessageFormat pattern string
413      * @param parseError Struct to receive information on the position
414      *                   of an error within the pattern.
415      *                   Can be NULL.
416      * @param errorCode Standard ICU error code. Its input value must
417      *                  pass the U_SUCCESS() test, or else the function returns
418      *                  immediately. Check for U_FAILURE() on output or use with
419      *                  function chaining. (See User Guide for details.)
420      * @return *this
421      * @throws IllegalArgumentException for syntax errors in the pattern string
422      * @throws IndexOutOfBoundsException if certain limits are exceeded
423      *         (e.g., argument number too high, argument name too long, etc.)
424      * @throws NumberFormatException if a number could not be parsed
425      * @draft ICU 4.8
426      */
427     MessagePattern &parse(const UnicodeString &pattern,
428                           UParseError *parseError, UErrorCode &errorCode);
429 
430     /**
431      * Parses a ChoiceFormat pattern string.
432      * @param pattern a ChoiceFormat pattern string
433      * @param parseError Struct to receive information on the position
434      *                   of an error within the pattern.
435      *                   Can be NULL.
436      * @param errorCode Standard ICU error code. Its input value must
437      *                  pass the U_SUCCESS() test, or else the function returns
438      *                  immediately. Check for U_FAILURE() on output or use with
439      *                  function chaining. (See User Guide for details.)
440      * @return *this
441      * @throws IllegalArgumentException for syntax errors in the pattern string
442      * @throws IndexOutOfBoundsException if certain limits are exceeded
443      *         (e.g., argument number too high, argument name too long, etc.)
444      * @throws NumberFormatException if a number could not be parsed
445      * @draft ICU 4.8
446      */
447     MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
448                                      UParseError *parseError, UErrorCode &errorCode);
449 
450     /**
451      * Parses a PluralFormat pattern string.
452      * @param pattern a PluralFormat pattern string
453      * @param parseError Struct to receive information on the position
454      *                   of an error within the pattern.
455      *                   Can be NULL.
456      * @param errorCode Standard ICU error code. Its input value must
457      *                  pass the U_SUCCESS() test, or else the function returns
458      *                  immediately. Check for U_FAILURE() on output or use with
459      *                  function chaining. (See User Guide for details.)
460      * @return *this
461      * @throws IllegalArgumentException for syntax errors in the pattern string
462      * @throws IndexOutOfBoundsException if certain limits are exceeded
463      *         (e.g., argument number too high, argument name too long, etc.)
464      * @throws NumberFormatException if a number could not be parsed
465      * @draft ICU 4.8
466      */
467     MessagePattern &parsePluralStyle(const UnicodeString &pattern,
468                                      UParseError *parseError, UErrorCode &errorCode);
469 
470     /**
471      * Parses a SelectFormat pattern string.
472      * @param pattern a SelectFormat pattern string
473      * @param parseError Struct to receive information on the position
474      *                   of an error within the pattern.
475      *                   Can be NULL.
476      * @param errorCode Standard ICU error code. Its input value must
477      *                  pass the U_SUCCESS() test, or else the function returns
478      *                  immediately. Check for U_FAILURE() on output or use with
479      *                  function chaining. (See User Guide for details.)
480      * @return *this
481      * @throws IllegalArgumentException for syntax errors in the pattern string
482      * @throws IndexOutOfBoundsException if certain limits are exceeded
483      *         (e.g., argument number too high, argument name too long, etc.)
484      * @throws NumberFormatException if a number could not be parsed
485      * @draft ICU 4.8
486      */
487     MessagePattern &parseSelectStyle(const UnicodeString &pattern,
488                                      UParseError *parseError, UErrorCode &errorCode);
489 
490     /**
491      * Clears this MessagePattern.
492      * countParts() will return 0.
493      * @draft ICU 4.8
494      */
495     void clear();
496 
497     /**
498      * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
499      * countParts() will return 0.
500      * @param mode The new UMessagePatternApostropheMode.
501      * @draft ICU 4.8
502      */
clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)503     void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
504         clear();
505         aposMode=mode;
506     }
507 
508     /**
509      * @param other another object to compare with.
510      * @return TRUE if this object is equivalent to the other one.
511      * @draft ICU 4.8
512      */
513     UBool operator==(const MessagePattern &other) const;
514 
515     /**
516      * @param other another object to compare with.
517      * @return FALSE if this object is equivalent to the other one.
518      * @draft ICU 4.8
519      */
520     inline UBool operator!=(const MessagePattern &other) const {
521         return !operator==(other);
522     }
523 
524     /**
525      * @return A hash code for this object.
526      * @draft ICU 4.8
527      */
528     int32_t hashCode() const;
529 
530     /**
531      * @return this instance's UMessagePatternApostropheMode.
532      * @draft ICU 4.8
533      */
getApostropheMode()534     UMessagePatternApostropheMode getApostropheMode() const {
535         return aposMode;
536     }
537 
538     // Java has package-private jdkAposMode() here.
539     // In C++, this is declared in the MessageImpl class.
540 
541     /**
542      * @return the parsed pattern string (null if none was parsed).
543      * @draft ICU 4.8
544      */
getPatternString()545     const UnicodeString &getPatternString() const {
546         return msg;
547     }
548 
549     /**
550      * Does the parsed pattern have named arguments like {first_name}?
551      * @return TRUE if the parsed pattern has at least one named argument.
552      * @draft ICU 4.8
553      */
hasNamedArguments()554     UBool hasNamedArguments() const {
555         return hasArgNames;
556     }
557 
558     /**
559      * Does the parsed pattern have numbered arguments like {2}?
560      * @return TRUE if the parsed pattern has at least one numbered argument.
561      * @draft ICU 4.8
562      */
hasNumberedArguments()563     UBool hasNumberedArguments() const {
564         return hasArgNumbers;
565     }
566 
567     /**
568      * Validates and parses an argument name or argument number string.
569      * An argument name must be a "pattern identifier", that is, it must contain
570      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
571      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
572      * @param name Input string.
573      * @return &gt;=0 if the name is a valid number,
574      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
575      *         ARG_NAME_NOT_VALID (-2) if it is neither.
576      * @draft ICU 4.8
577      */
578     static int32_t validateArgumentName(const UnicodeString &name);
579 
580     /**
581      * Returns a version of the parsed pattern string where each ASCII apostrophe
582      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
583      * <p>
584      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
585      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
586      * @return the deep-auto-quoted version of the parsed pattern string.
587      * @see MessageFormat.autoQuoteApostrophe()
588      * @draft ICU 4.8
589      */
590     UnicodeString autoQuoteApostropheDeep() const;
591 
592     class Part;
593 
594     /**
595      * Returns the number of "parts" created by parsing the pattern string.
596      * Returns 0 if no pattern has been parsed or clear() was called.
597      * @return the number of pattern parts.
598      * @draft ICU 4.8
599      */
countParts()600     int32_t countParts() const {
601         return partsLength;
602     }
603 
604     /**
605      * Gets the i-th pattern "part".
606      * @param i The index of the Part data. (0..countParts()-1)
607      * @return the i-th pattern "part".
608      * @draft ICU 4.8
609      */
getPart(int32_t i)610     const Part &getPart(int32_t i) const {
611         return parts[i];
612     }
613 
614     /**
615      * Returns the UMessagePatternPartType of the i-th pattern "part".
616      * Convenience method for getPart(i).getType().
617      * @param i The index of the Part data. (0..countParts()-1)
618      * @return The UMessagePatternPartType of the i-th Part.
619      * @draft ICU 4.8
620      */
getPartType(int32_t i)621     UMessagePatternPartType getPartType(int32_t i) const {
622         return getPart(i).type;
623     }
624 
625     /**
626      * Returns the pattern index of the specified pattern "part".
627      * Convenience method for getPart(partIndex).getIndex().
628      * @param partIndex The index of the Part data. (0..countParts()-1)
629      * @return The pattern index of this Part.
630      * @draft ICU 4.8
631      */
getPatternIndex(int32_t partIndex)632     int32_t getPatternIndex(int32_t partIndex) const {
633         return getPart(partIndex).index;
634     }
635 
636     /**
637      * Returns the substring of the pattern string indicated by the Part.
638      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
639      * @param part a part of this MessagePattern.
640      * @return the substring associated with part.
641      * @draft ICU 4.8
642      */
getSubstring(const Part & part)643     UnicodeString getSubstring(const Part &part) const {
644         return msg.tempSubString(part.index, part.length);
645     }
646 
647     /**
648      * Compares the part's substring with the input string s.
649      * @param part a part of this MessagePattern.
650      * @param s a string.
651      * @return TRUE if getSubstring(part).equals(s).
652      * @draft ICU 4.8
653      */
partSubstringMatches(const Part & part,const UnicodeString & s)654     UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
655         return 0==msg.compare(part.index, part.length, s);
656     }
657 
658     /**
659      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
660      * @param part a part of this MessagePattern.
661      * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
662      * @draft ICU 4.8
663      */
664     double getNumericValue(const Part &part) const;
665 
666     /**
667      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
668      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
669      * @return the "offset:" value.
670      * @draft ICU 4.8
671      */
672     double getPluralOffset(int32_t pluralStart) const;
673 
674     /**
675      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
676      * @param start The index of some Part data (0..countParts()-1);
677      *        this Part should be of Type ARG_START or MSG_START.
678      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
679      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
680      * @draft ICU 4.8
681      */
getLimitPartIndex(int32_t start)682     int32_t getLimitPartIndex(int32_t start) const {
683         int32_t limit=getPart(start).limitPartIndex;
684         if(limit<start) {
685             return start;
686         }
687         return limit;
688     }
689 
690     /**
691      * A message pattern "part", representing a pattern parsing event.
692      * There is a part for the start and end of a message or argument,
693      * for quoting and escaping of and with ASCII apostrophes,
694      * and for syntax elements of "complex" arguments.
695      * @draft ICU 4.8
696      */
697     class Part : public UMemory {
698     public:
699         /**
700          * Default constructor, do not use.
701          * @internal
702          */
Part()703         Part() {}
704 
705         /**
706          * Returns the type of this part.
707          * @return the part type.
708          * @draft ICU 4.8
709          */
getType()710         UMessagePatternPartType getType() const {
711             return type;
712         }
713 
714         /**
715          * Returns the pattern string index associated with this Part.
716          * @return this part's pattern string index.
717          * @draft ICU 4.8
718          */
getIndex()719         int32_t getIndex() const {
720             return index;
721         }
722 
723         /**
724          * Returns the length of the pattern substring associated with this Part.
725          * This is 0 for some parts.
726          * @return this part's pattern string index.
727          * @draft ICU 4.8
728          */
getLength()729         int32_t getLength() const {
730             return length;
731         }
732 
733         /**
734          * Returns the pattern string limit (exclusive-end) index associated with this Part.
735          * Convenience method for getIndex()+getLength().
736          * @return this part's pattern string limit index, same as getIndex()+getLength().
737          * @draft ICU 4.8
738          */
getLimit()739         int32_t getLimit() const {
740             return index+length;
741         }
742 
743         /**
744          * Returns a value associated with this part.
745          * See the documentation of each part type for details.
746          * @return the part value.
747          * @draft ICU 4.8
748          */
getValue()749         int32_t getValue() const {
750             return value;
751         }
752 
753         /**
754          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
755          * otherwise UMSGPAT_ARG_TYPE_NONE.
756          * @return the argument type for this part.
757          * @draft ICU 4.8
758          */
getArgType()759         UMessagePatternArgType getArgType() const {
760             UMessagePatternPartType type=getType();
761             if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
762                 return (UMessagePatternArgType)value;
763             } else {
764                 return UMSGPAT_ARG_TYPE_NONE;
765             }
766         }
767 
768         /**
769          * Indicates whether the Part type has a numeric value.
770          * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
771          * @param type The Part type to be tested.
772          * @return TRUE if the Part type has a numeric value.
773          * @draft ICU 4.8
774          */
hasNumericValue(UMessagePatternPartType type)775         static UBool hasNumericValue(UMessagePatternPartType type) {
776             return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
777         }
778 
779         /**
780          * @param other another object to compare with.
781          * @return TRUE if this object is equivalent to the other one.
782          * @draft ICU 4.8
783          */
784         UBool operator==(const Part &other) const;
785 
786         /**
787          * @param other another object to compare with.
788          * @return FALSE if this object is equivalent to the other one.
789          * @draft ICU 4.8
790          */
791         inline UBool operator!=(const Part &other) const {
792             return !operator==(other);
793         }
794 
795         /**
796          * @return A hash code for this object.
797          * @draft ICU 4.8
798          */
hashCode()799         int32_t hashCode() const {
800             return ((type*37+index)*37+length)*37+value;
801         }
802 
803     private:
804         friend class MessagePattern;
805 
806         static const int32_t MAX_LENGTH=0xffff;
807         static const int32_t MAX_VALUE=0x7fff;
808 
809         // Some fields are not final because they are modified during pattern parsing.
810         // After pattern parsing, the parts are effectively immutable.
811         UMessagePatternPartType type;
812         int32_t index;
813         uint16_t length;
814         int16_t value;
815         int32_t limitPartIndex;
816     };
817 
818 private:
819     void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
820 
821     void postParse();
822 
823     int32_t parseMessage(int32_t index, int32_t msgStartLength,
824                          int32_t nestingLevel, UMessagePatternArgType parentType,
825                          UParseError *parseError, UErrorCode &errorCode);
826 
827     int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
828                      UParseError *parseError, UErrorCode &errorCode);
829 
830     int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
831 
832     int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
833                              UParseError *parseError, UErrorCode &errorCode);
834 
835     int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
836                                      UParseError *parseError, UErrorCode &errorCode);
837 
838     /**
839      * Validates and parses an argument name or argument number string.
840      * This internal method assumes that the input substring is a "pattern identifier".
841      * @return &gt;=0 if the name is a valid number,
842      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
843      *         ARG_NAME_NOT_VALID (-2) if it is neither.
844      * @see #validateArgumentName(String)
845      */
846     static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
847 
parseArgNumber(int32_t start,int32_t limit)848     int32_t parseArgNumber(int32_t start, int32_t limit) {
849         return parseArgNumber(msg, start, limit);
850     }
851 
852     /**
853      * Parses a number from the specified message substring.
854      * @param start start index into the message string
855      * @param limit limit index into the message string, must be start<limit
856      * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
857      * @param parseError
858      * @param errorCode
859      */
860     void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
861                      UParseError *parseError, UErrorCode &errorCode);
862 
863     // Java has package-private appendReducedApostrophes() here.
864     // In C++, this is declared in the MessageImpl class.
865 
866     int32_t skipWhiteSpace(int32_t index);
867 
868     int32_t skipIdentifier(int32_t index);
869 
870     /**
871      * Skips a sequence of characters that could occur in a double value.
872      * Does not fully parse or validate the value.
873      */
874     int32_t skipDouble(int32_t index);
875 
876     static UBool isArgTypeChar(UChar32 c);
877 
878     UBool isChoice(int32_t index);
879 
880     UBool isPlural(int32_t index);
881 
882     UBool isSelect(int32_t index);
883 
884     /**
885      * @return TRUE if we are inside a MessageFormat (sub-)pattern,
886      *         as opposed to inside a top-level choice/plural/select pattern.
887      */
888     UBool inMessageFormatPattern(int32_t nestingLevel);
889 
890     /**
891      * @return TRUE if we are in a MessageFormat sub-pattern
892      *         of a top-level ChoiceFormat pattern.
893      */
894     UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
895 
896     void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
897                  int32_t value, UErrorCode &errorCode);
898 
899     void addLimitPart(int32_t start,
900                       UMessagePatternPartType type, int32_t index, int32_t length,
901                       int32_t value, UErrorCode &errorCode);
902 
903     void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
904 
905     void setParseError(UParseError *parseError, int32_t index);
906 
907     // No ICU "poor man's RTTI" for this class nor its subclasses.
908     virtual UClassID getDynamicClassID() const;
909 
910     UBool init(UErrorCode &errorCode);
911     UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
912 
913     UMessagePatternApostropheMode aposMode;
914     UnicodeString msg;
915     // ArrayList<Part> parts=new ArrayList<Part>();
916     MessagePatternPartsList *partsList;
917     Part *parts;
918     int32_t partsLength;
919     // ArrayList<Double> numericValues;
920     MessagePatternDoubleList *numericValuesList;
921     double *numericValues;
922     int32_t numericValuesLength;
923     UBool hasArgNames;
924     UBool hasArgNumbers;
925     UBool needsAutoQuoting;
926 };
927 
928 U_NAMESPACE_END
929 
930 #endif  // !UCONFIG_NO_FORMATTING
931 
932 #endif  // __MESSAGEPATTERN_H__
933