• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2010-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   created on: 2010aug21
7 *   created by: Markus W. Scherer
8 */
9 
10 package com.ibm.icu.text;
11 
12 import java.util.ArrayList;
13 import java.util.Locale;
14 
15 import com.ibm.icu.impl.ICUConfig;
16 import com.ibm.icu.impl.PatternProps;
17 import com.ibm.icu.util.Freezable;
18 import com.ibm.icu.util.ICUCloneNotSupportedException;
19 
20 //Note: Minimize ICU dependencies, only use a very small part of the ICU core.
21 //In particular, do not depend on *Format classes.
22 
23 /**
24  * Parses and represents ICU MessageFormat patterns.
25  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
26  * Used in the implementations of those classes as well as in tools
27  * for message validation, translation and format conversion.
28  * <p>
29  * The parser handles all syntax relevant for identifying message arguments.
30  * This includes "complex" arguments whose style strings contain
31  * nested MessageFormat pattern substrings.
32  * For "simple" arguments (with no nested MessageFormat pattern substrings),
33  * the argument style is not parsed any further.
34  * <p>
35  * The parser handles named and numbered message arguments and allows both in one message.
36  * <p>
37  * Once a pattern has been parsed successfully, iterate through the parsed data
38  * with countParts(), getPart() and related methods.
39  * <p>
40  * The data logically represents a parse tree, but is stored and accessed
41  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
42  * Arguments and nested messages are best handled via recursion.
43  * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
44  * the index of the corresponding _LIMIT "part".
45  * <p>
46  * List of "parts":
47  * <pre>
48  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
49  * argument = noneArg | simpleArg | complexArg
50  * complexArg = choiceArg | pluralArg | selectArg
51  *
52  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
53  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
54  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
55  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
56  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
57  *
58  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
59  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
60  * selectStyle = (ARG_SELECTOR message)+
61  * </pre>
62  * <ul>
63  *   <li>Literal output text is not represented directly by "parts" but accessed
64  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
65  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
66  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
67  *       the less-than-or-equal-to sign (U+2264).
68  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
69  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
70  *       is the value of an explicit-number selector like "=2",
71  *       otherwise the selector is a non-numeric identifier.
72  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
73  * <p>
74  * This class is not intended for public subclassing.
75  *
76  * @stable ICU 4.8
77  * @author Markus Scherer
78  */
79 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
80     /**
81      * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
82      * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig
83      * (/com/ibm/icu/ICUConfig.properties).
84      * <p>
85      * A pair of adjacent apostrophes always results in a single apostrophe in the output,
86      * even when the pair is between two single, text-quoting apostrophes.
87      * <p>
88      * The following table shows examples of desired MessageFormat.format() output
89      * with the pattern strings that yield that output.
90      * <p>
91      * <table>
92      *   <tr>
93      *     <th>Desired output</th>
94      *     <th>DOUBLE_OPTIONAL</th>
95      *     <th>DOUBLE_REQUIRED</th>
96      *   </tr>
97      *   <tr>
98      *     <td>I see {many}</td>
99      *     <td>I see '{many}'</td>
100      *     <td>(same)</td>
101      *   </tr>
102      *   <tr>
103      *     <td>I said {'Wow!'}</td>
104      *     <td>I said '{''Wow!''}'</td>
105      *     <td>(same)</td>
106      *   </tr>
107      *   <tr>
108      *     <td>I don't know</td>
109      *     <td>I don't know OR<br> I don''t know</td>
110      *     <td>I don''t know</td>
111      *   </tr>
112      * </table>
113      * @stable ICU 4.8
114      */
115     public enum ApostropheMode {
116         /**
117          * A literal apostrophe is represented by
118          * either a single or a double apostrophe pattern character.
119          * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
120          * if it immediately precedes a curly brace {},
121          * or a pipe symbol | if inside a choice format,
122          * or a pound symbol # if inside a plural format.
123          * <p>
124          * This is the default behavior starting with ICU 4.8.
125          * @stable ICU 4.8
126          */
127         DOUBLE_OPTIONAL,
128         /**
129          * A literal apostrophe must be represented by
130          * a double apostrophe pattern character.
131          * A single apostrophe always starts quoted literal text.
132          * <p>
133          * This is the behavior of ICU 4.6 and earlier, and of the JDK.
134          * @stable ICU 4.8
135          */
136         DOUBLE_REQUIRED
137     }
138 
139     /**
140      * Constructs an empty MessagePattern with default ApostropheMode.
141      * @stable ICU 4.8
142      */
MessagePattern()143     public MessagePattern() {
144         aposMode=defaultAposMode;
145     }
146 
147     /**
148      * Constructs an empty MessagePattern.
149      * @param mode Explicit ApostropheMode.
150      * @stable ICU 4.8
151      */
MessagePattern(ApostropheMode mode)152     public MessagePattern(ApostropheMode mode) {
153         aposMode=mode;
154     }
155 
156     /**
157      * Constructs a MessagePattern with default ApostropheMode and
158      * parses the MessageFormat pattern string.
159      * @param pattern a MessageFormat pattern string
160      * @throws IllegalArgumentException for syntax errors in the pattern string
161      * @throws IndexOutOfBoundsException if certain limits are exceeded
162      *         (e.g., argument number too high, argument name too long, etc.)
163      * @throws NumberFormatException if a number could not be parsed
164      * @stable ICU 4.8
165      */
MessagePattern(String pattern)166     public MessagePattern(String pattern) {
167         aposMode=defaultAposMode;
168         parse(pattern);
169     }
170 
171     /**
172      * Parses a MessageFormat pattern string.
173      * @param pattern a MessageFormat pattern string
174      * @return this
175      * @throws IllegalArgumentException for syntax errors in the pattern string
176      * @throws IndexOutOfBoundsException if certain limits are exceeded
177      *         (e.g., argument number too high, argument name too long, etc.)
178      * @throws NumberFormatException if a number could not be parsed
179      * @stable ICU 4.8
180      */
parse(String pattern)181     public MessagePattern parse(String pattern) {
182         preParse(pattern);
183         parseMessage(0, 0, 0, ArgType.NONE);
184         postParse();
185         return this;
186     }
187 
188     /**
189      * Parses a ChoiceFormat pattern string.
190      * @param pattern a ChoiceFormat pattern string
191      * @return this
192      * @throws IllegalArgumentException for syntax errors in the pattern string
193      * @throws IndexOutOfBoundsException if certain limits are exceeded
194      *         (e.g., argument number too high, argument name too long, etc.)
195      * @throws NumberFormatException if a number could not be parsed
196      * @stable ICU 4.8
197      */
parseChoiceStyle(String pattern)198     public MessagePattern parseChoiceStyle(String pattern) {
199         preParse(pattern);
200         parseChoiceStyle(0, 0);
201         postParse();
202         return this;
203     }
204 
205     /**
206      * Parses a PluralFormat pattern string.
207      * @param pattern a PluralFormat pattern string
208      * @return this
209      * @throws IllegalArgumentException for syntax errors in the pattern string
210      * @throws IndexOutOfBoundsException if certain limits are exceeded
211      *         (e.g., argument number too high, argument name too long, etc.)
212      * @throws NumberFormatException if a number could not be parsed
213      * @stable ICU 4.8
214      */
parsePluralStyle(String pattern)215     public MessagePattern parsePluralStyle(String pattern) {
216         preParse(pattern);
217         parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
218         postParse();
219         return this;
220     }
221 
222     /**
223      * Parses a SelectFormat pattern string.
224      * @param pattern a SelectFormat pattern string
225      * @return this
226      * @throws IllegalArgumentException for syntax errors in the pattern string
227      * @throws IndexOutOfBoundsException if certain limits are exceeded
228      *         (e.g., argument number too high, argument name too long, etc.)
229      * @throws NumberFormatException if a number could not be parsed
230      * @stable ICU 4.8
231      */
parseSelectStyle(String pattern)232     public MessagePattern parseSelectStyle(String pattern) {
233         preParse(pattern);
234         parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
235         postParse();
236         return this;
237     }
238 
239     /**
240      * Clears this MessagePattern.
241      * countParts() will return 0.
242      * @stable ICU 4.8
243      */
clear()244     public void clear() {
245         // Mostly the same as preParse().
246         if(isFrozen()) {
247             throw new UnsupportedOperationException(
248                 "Attempt to clear() a frozen MessagePattern instance.");
249         }
250         msg=null;
251         hasArgNames=hasArgNumbers=false;
252         needsAutoQuoting=false;
253         parts.clear();
254         if(numericValues!=null) {
255             numericValues.clear();
256         }
257     }
258 
259     /**
260      * Clears this MessagePattern and sets the ApostropheMode.
261      * countParts() will return 0.
262      * @param mode The new ApostropheMode.
263      * @stable ICU 4.8
264      */
clearPatternAndSetApostropheMode(ApostropheMode mode)265     public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
266         clear();
267         aposMode=mode;
268     }
269 
270     /**
271      * @param other another object to compare with.
272      * @return true if this object is equivalent to the other one.
273      * @stable ICU 4.8
274      */
275     @Override
equals(Object other)276     public boolean equals(Object other) {
277         if(this==other) {
278             return true;
279         }
280         if(other==null || getClass()!=other.getClass()) {
281             return false;
282         }
283         MessagePattern o=(MessagePattern)other;
284         return
285             aposMode.equals(o.aposMode) &&
286             (msg==null ? o.msg==null : msg.equals(o.msg)) &&
287             parts.equals(o.parts);
288         // No need to compare numericValues if msg and parts are the same.
289     }
290 
291     /**
292      * {@inheritDoc}
293      * @stable ICU 4.8
294      */
295     @Override
hashCode()296     public int hashCode() {
297         return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
298     }
299 
300     /**
301      * @return this instance's ApostropheMode.
302      * @stable ICU 4.8
303      */
getApostropheMode()304     public ApostropheMode getApostropheMode() {
305         return aposMode;
306     }
307 
308     /**
309      * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
310      * @internal
311      */
jdkAposMode()312     public boolean jdkAposMode() {
313         return aposMode == ApostropheMode.DOUBLE_REQUIRED;
314     }
315 
316     /**
317      * @return the parsed pattern string (null if none was parsed).
318      * @stable ICU 4.8
319      */
getPatternString()320     public String getPatternString() {
321         return msg;
322     }
323 
324     /**
325      * Does the parsed pattern have named arguments like {first_name}?
326      * @return true if the parsed pattern has at least one named argument.
327      * @stable ICU 4.8
328      */
hasNamedArguments()329     public boolean hasNamedArguments() {
330         return hasArgNames;
331     }
332 
333     /**
334      * Does the parsed pattern have numbered arguments like {2}?
335      * @return true if the parsed pattern has at least one numbered argument.
336      * @stable ICU 4.8
337      */
hasNumberedArguments()338     public boolean hasNumberedArguments() {
339         return hasArgNumbers;
340     }
341 
342     /**
343      * {@inheritDoc}
344      * @stable ICU 4.8
345      */
346     @Override
toString()347     public String toString() {
348         return msg;
349     }
350 
351     /**
352      * Validates and parses an argument name or argument number string.
353      * An argument name must be a "pattern identifier", that is, it must contain
354      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
355      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
356      * @param name Input string.
357      * @return &gt;=0 if the name is a valid number,
358      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
359      *         ARG_NAME_NOT_VALID (-2) if it is neither.
360      * @stable ICU 4.8
361      */
validateArgumentName(String name)362     public static int validateArgumentName(String name) {
363         if(!PatternProps.isIdentifier(name)) {
364             return ARG_NAME_NOT_VALID;
365         }
366         return parseArgNumber(name, 0, name.length());
367     }
368 
369     /**
370      * Return value from {@link #validateArgumentName(String)} for when
371      * the string is a valid "pattern identifier" but not a number.
372      * @stable ICU 4.8
373      */
374     public static final int ARG_NAME_NOT_NUMBER=-1;
375 
376     /**
377      * Return value from {@link #validateArgumentName(String)} for when
378      * the string is invalid.
379      * It might not be a valid "pattern identifier",
380      * or it have only ASCII digits but there is a leading zero or the number is too large.
381      * @stable ICU 4.8
382      */
383     public static final int ARG_NAME_NOT_VALID=-2;
384 
385     /**
386      * Returns a version of the parsed pattern string where each ASCII apostrophe
387      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
388      * <p>
389      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
390      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
391      * @return the deep-auto-quoted version of the parsed pattern string.
392      * @see MessageFormat#autoQuoteApostrophe(String)
393      * @stable ICU 4.8
394      */
autoQuoteApostropheDeep()395     public String autoQuoteApostropheDeep() {
396         if(!needsAutoQuoting) {
397             return msg;
398         }
399         StringBuilder modified=null;
400         // Iterate backward so that the insertion indexes do not change.
401         int count=countParts();
402         for(int i=count; i>0;) {
403             Part part;
404             if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
405                 if(modified==null) {
406                     modified=new StringBuilder(msg.length()+10).append(msg);
407                 }
408                 modified.insert(part.index, (char)part.value);
409             }
410         }
411         if(modified==null) {
412             return msg;
413         } else {
414             return modified.toString();
415         }
416     }
417 
418     /**
419      * Returns the number of "parts" created by parsing the pattern string.
420      * Returns 0 if no pattern has been parsed or clear() was called.
421      * @return the number of pattern parts.
422      * @stable ICU 4.8
423      */
countParts()424     public int countParts() {
425         return parts.size();
426     }
427 
428     /**
429      * Gets the i-th pattern "part".
430      * @param i The index of the Part data. (0..countParts()-1)
431      * @return the i-th pattern "part".
432      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
433      * @stable ICU 4.8
434      */
getPart(int i)435     public Part getPart(int i) {
436         return parts.get(i);
437     }
438 
439     /**
440      * Returns the Part.Type of the i-th pattern "part".
441      * Convenience method for getPart(i).getType().
442      * @param i The index of the Part data. (0..countParts()-1)
443      * @return The Part.Type of the i-th Part.
444      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
445      * @stable ICU 4.8
446      */
getPartType(int i)447     public Part.Type getPartType(int i) {
448         return parts.get(i).type;
449     }
450 
451     /**
452      * Returns the pattern index of the specified pattern "part".
453      * Convenience method for getPart(partIndex).getIndex().
454      * @param partIndex The index of the Part data. (0..countParts()-1)
455      * @return The pattern index of this Part.
456      * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
457      * @stable ICU 4.8
458      */
getPatternIndex(int partIndex)459     public int getPatternIndex(int partIndex) {
460         return parts.get(partIndex).index;
461     }
462 
463     /**
464      * Returns the substring of the pattern string indicated by the Part.
465      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
466      * @param part a part of this MessagePattern.
467      * @return the substring associated with part.
468      * @stable ICU 4.8
469      */
getSubstring(Part part)470     public String getSubstring(Part part) {
471         int index=part.index;
472         return msg.substring(index, index+part.length);
473     }
474 
475     /**
476      * Compares the part's substring with the input string s.
477      * @param part a part of this MessagePattern.
478      * @param s a string.
479      * @return true if getSubstring(part).equals(s).
480      * @stable ICU 4.8
481      */
partSubstringMatches(Part part, String s)482     public boolean partSubstringMatches(Part part, String s) {
483         return msg.regionMatches(part.index, s, 0, part.length);
484     }
485 
486     /**
487      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
488      * @param part a part of this MessagePattern.
489      * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
490      * @stable ICU 4.8
491      */
getNumericValue(Part part)492     public double getNumericValue(Part part) {
493         Part.Type type=part.type;
494         if(type==Part.Type.ARG_INT) {
495             return part.value;
496         } else if(type==Part.Type.ARG_DOUBLE) {
497             return numericValues.get(part.value);
498         } else {
499             return NO_NUMERIC_VALUE;
500         }
501     }
502 
503     /**
504      * Special value that is returned by getNumericValue(Part) when no
505      * numeric value is defined for a part.
506      * @see #getNumericValue
507      * @stable ICU 4.8
508      */
509     public static final double NO_NUMERIC_VALUE=-123456789;
510 
511     /**
512      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
513      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
514      * @return the "offset:" value.
515      * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
516      * @stable ICU 4.8
517      */
getPluralOffset(int pluralStart)518     public double getPluralOffset(int pluralStart) {
519         Part part=parts.get(pluralStart);
520         if(part.type.hasNumericValue()) {
521             return getNumericValue(part);
522         } else {
523             return 0;
524         }
525     }
526 
527     /**
528      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
529      * @param start The index of some Part data (0..countParts()-1);
530      *        this Part should be of Type ARG_START or MSG_START.
531      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
532      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
533      * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
534      * @stable ICU 4.8
535      */
getLimitPartIndex(int start)536     public int getLimitPartIndex(int start) {
537         int limit=parts.get(start).limitPartIndex;
538         if(limit<start) {
539             return start;
540         }
541         return limit;
542     }
543 
544     /**
545      * A message pattern "part", representing a pattern parsing event.
546      * There is a part for the start and end of a message or argument,
547      * for quoting and escaping of and with ASCII apostrophes,
548      * and for syntax elements of "complex" arguments.
549      * @stable ICU 4.8
550      */
551     public static final class Part {
Part(Type t, int i, int l, int v)552         private Part(Type t, int i, int l, int v) {
553             type=t;
554             index=i;
555             length=(char)l;
556             value=(short)v;
557         }
558 
559         /**
560          * Returns the type of this part.
561          * @return the part type.
562          * @stable ICU 4.8
563          */
getType()564         public Type getType() {
565             return type;
566         }
567 
568         /**
569          * Returns the pattern string index associated with this Part.
570          * @return this part's pattern string index.
571          * @stable ICU 4.8
572          */
getIndex()573         public int getIndex() {
574             return index;
575         }
576 
577         /**
578          * Returns the length of the pattern substring associated with this Part.
579          * This is 0 for some parts.
580          * @return this part's pattern substring length.
581          * @stable ICU 4.8
582          */
getLength()583         public int getLength() {
584             return length;
585         }
586 
587         /**
588          * Returns the pattern string limit (exclusive-end) index associated with this Part.
589          * Convenience method for getIndex()+getLength().
590          * @return this part's pattern string limit index, same as getIndex()+getLength().
591          * @stable ICU 4.8
592          */
getLimit()593         public int getLimit() {
594             return index+length;
595         }
596 
597         /**
598          * Returns a value associated with this part.
599          * See the documentation of each part type for details.
600          * @return the part value.
601          * @stable ICU 4.8
602          */
getValue()603         public int getValue() {
604             return value;
605         }
606 
607         /**
608          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
609          * otherwise ArgType.NONE.
610          * @return the argument type for this part.
611          * @stable ICU 4.8
612          */
getArgType()613         public ArgType getArgType() {
614             Type type=getType();
615             if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
616                 return argTypes[value];
617             } else {
618                 return ArgType.NONE;
619             }
620         }
621 
622         /**
623          * Part type constants.
624          * @stable ICU 4.8
625          */
626         public enum Type {
627             /**
628              * Start of a message pattern (main or nested).
629              * The length is 0 for the top-level message
630              * and for a choice argument sub-message, otherwise 1 for the '{'.
631              * The value indicates the nesting level, starting with 0 for the main message.
632              * <p>
633              * There is always a later MSG_LIMIT part.
634              * @stable ICU 4.8
635              */
636             MSG_START,
637             /**
638              * End of a message pattern (main or nested).
639              * The length is 0 for the top-level message and
640              * the last sub-message of a choice argument,
641              * otherwise 1 for the '}' or (in a choice argument style) the '|'.
642              * The value indicates the nesting level, starting with 0 for the main message.
643              * @stable ICU 4.8
644              */
645             MSG_LIMIT,
646             /**
647              * Indicates a substring of the pattern string which is to be skipped when formatting.
648              * For example, an apostrophe that begins or ends quoted text
649              * would be indicated with such a part.
650              * The value is undefined and currently always 0.
651              * @stable ICU 4.8
652              */
653             SKIP_SYNTAX,
654             /**
655              * Indicates that a syntax character needs to be inserted for auto-quoting.
656              * The length is 0.
657              * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
658              * @stable ICU 4.8
659              */
660             INSERT_CHAR,
661             /**
662              * Indicates a syntactic (non-escaped) # symbol in a plural variant.
663              * When formatting, replace this part's substring with the
664              * (value-offset) for the plural argument value.
665              * The value is undefined and currently always 0.
666              * @stable ICU 4.8
667              */
668             REPLACE_NUMBER,
669             /**
670              * Start of an argument.
671              * The length is 1 for the '{'.
672              * The value is the ordinal value of the ArgType. Use getArgType().
673              * <p>
674              * This part is followed by either an ARG_NUMBER or ARG_NAME,
675              * followed by optional argument sub-parts (see ArgType constants)
676              * and finally an ARG_LIMIT part.
677              * @stable ICU 4.8
678              */
679             ARG_START,
680             /**
681              * End of an argument.
682              * The length is 1 for the '}'.
683              * The value is the ordinal value of the ArgType. Use getArgType().
684              * @stable ICU 4.8
685              */
686             ARG_LIMIT,
687             /**
688              * The argument number, provided by the value.
689              * @stable ICU 4.8
690              */
691             ARG_NUMBER,
692             /**
693              * The argument name.
694              * The value is undefined and currently always 0.
695              * @stable ICU 4.8
696              */
697             ARG_NAME,
698             /**
699              * The argument type.
700              * The value is undefined and currently always 0.
701              * @stable ICU 4.8
702              */
703             ARG_TYPE,
704             /**
705              * The argument style text.
706              * The value is undefined and currently always 0.
707              * @stable ICU 4.8
708              */
709             ARG_STYLE,
710             /**
711              * A selector substring in a "complex" argument style.
712              * The value is undefined and currently always 0.
713              * @stable ICU 4.8
714              */
715             ARG_SELECTOR,
716             /**
717              * An integer value, for example the offset or an explicit selector value
718              * in a PluralFormat style.
719              * The part value is the integer value.
720              * @stable ICU 4.8
721              */
722             ARG_INT,
723             /**
724              * A numeric value, for example the offset or an explicit selector value
725              * in a PluralFormat style.
726              * The part value is an index into an internal array of numeric values;
727              * use getNumericValue().
728              * @stable ICU 4.8
729              */
730             ARG_DOUBLE;
731 
732             /**
733              * Indicates whether this part has a numeric value.
734              * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
735              * @return true if this part has a numeric value.
736              * @stable ICU 4.8
737              */
hasNumericValue()738             public boolean hasNumericValue() {
739                 return this==ARG_INT || this==ARG_DOUBLE;
740             }
741         }
742 
743         /**
744          * @return a string representation of this part.
745          * @stable ICU 4.8
746          */
747         @Override
toString()748         public String toString() {
749             String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
750                 getArgType().name() : Integer.toString(value);
751             return type.name()+"("+valueString+")@"+index;
752         }
753 
754         /**
755          * @param other another object to compare with.
756          * @return true if this object is equivalent to the other one.
757          * @stable ICU 4.8
758          */
759         @Override
equals(Object other)760         public boolean equals(Object other) {
761             if(this==other) {
762                 return true;
763             }
764             if(other==null || getClass()!=other.getClass()) {
765                 return false;
766             }
767             Part o=(Part)other;
768             return
769                 type.equals(o.type) &&
770                 index==o.index &&
771                 length==o.length &&
772                 value==o.value &&
773                 limitPartIndex==o.limitPartIndex;
774         }
775 
776         /**
777          * {@inheritDoc}
778          * @stable ICU 4.8
779          */
780         @Override
hashCode()781         public int hashCode() {
782             return ((type.hashCode()*37+index)*37+length)*37+value;
783         }
784 
785         private static final int MAX_LENGTH=0xffff;
786         private static final int MAX_VALUE=Short.MAX_VALUE;
787 
788         // Some fields are not final because they are modified during pattern parsing.
789         // After pattern parsing, the parts are effectively immutable.
790         private final Type type;
791         private final int index;
792         private final char length;
793         private short value;
794         private int limitPartIndex;
795     }
796 
797     /**
798      * Argument type constants.
799      * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
800      *
801      * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
802      * with a nesting level one greater than the surrounding message.
803      * @stable ICU 4.8
804      */
805     public enum ArgType {
806         /**
807          * The argument has no specified type.
808          * @stable ICU 4.8
809          */
810         NONE,
811         /**
812          * The argument has a "simple" type which is provided by the ARG_TYPE part.
813          * An ARG_STYLE part might follow that.
814          * @stable ICU 4.8
815          */
816         SIMPLE,
817         /**
818          * The argument is a ChoiceFormat with one or more
819          * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
820          * @stable ICU 4.8
821          */
822         CHOICE,
823         /**
824          * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
825          * (e.g., offset:1)
826          * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
827          * If the selector has an explicit value (e.g., =2), then
828          * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
829          * Otherwise the message immediately follows the ARG_SELECTOR.
830          * @stable ICU 4.8
831          */
832         PLURAL,
833         /**
834          * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
835          * @stable ICU 4.8
836          */
837         SELECT,
838         /**
839          * The argument is an ordinal-number PluralFormat
840          * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
841          * @stable ICU 50
842          */
843         SELECTORDINAL;
844 
845         /**
846          * @return true if the argument type has a plural style part sequence and semantics,
847          * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
848          * @stable ICU 50
849          */
hasPluralStyle()850         public boolean hasPluralStyle() {
851             return this == PLURAL || this == SELECTORDINAL;
852         }
853     }
854 
855     /**
856      * Creates and returns a copy of this object.
857      * @return a copy of this object (or itself if frozen).
858      * @stable ICU 4.8
859      */
860     @Override
clone()861     public Object clone() {
862         if(isFrozen()) {
863             return this;
864         } else {
865             return cloneAsThawed();
866         }
867     }
868 
869     /**
870      * Creates and returns an unfrozen copy of this object.
871      * @return a copy of this object.
872      * @stable ICU 4.8
873      */
874     @SuppressWarnings("unchecked")
cloneAsThawed()875     public MessagePattern cloneAsThawed() {
876         MessagePattern newMsg;
877         try {
878             newMsg=(MessagePattern)super.clone();
879         } catch (CloneNotSupportedException e) {
880             throw new ICUCloneNotSupportedException(e);
881         }
882         newMsg.parts=(ArrayList<Part>)parts.clone();
883         if(numericValues!=null) {
884             newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
885         }
886         newMsg.frozen=false;
887         return newMsg;
888     }
889 
890     /**
891      * Freezes this object, making it immutable and thread-safe.
892      * @return this
893      * @stable ICU 4.8
894      */
freeze()895     public MessagePattern freeze() {
896         frozen=true;
897         return this;
898     }
899 
900     /**
901      * Determines whether this object is frozen (immutable) or not.
902      * @return true if this object is frozen.
903      * @stable ICU 4.8
904      */
isFrozen()905     public boolean isFrozen() {
906         return frozen;
907     }
908 
preParse(String pattern)909     private void preParse(String pattern) {
910         if(isFrozen()) {
911             throw new UnsupportedOperationException(
912                 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
913         }
914         msg=pattern;
915         hasArgNames=hasArgNumbers=false;
916         needsAutoQuoting=false;
917         parts.clear();
918         if(numericValues!=null) {
919             numericValues.clear();
920         }
921     }
922 
postParse()923     private void postParse() {
924         // Nothing to be done currently.
925     }
926 
parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)927     private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
928         if(nestingLevel>Part.MAX_VALUE) {
929             throw new IndexOutOfBoundsException();
930         }
931         int msgStart=parts.size();
932         addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
933         index+=msgStartLength;
934         while(index<msg.length()) {
935             char c=msg.charAt(index++);
936             if(c=='\'') {
937                 if(index==msg.length()) {
938                     // The apostrophe is the last character in the pattern.
939                     // Add a Part for auto-quoting.
940                     addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
941                     needsAutoQuoting=true;
942                 } else {
943                     c=msg.charAt(index);
944                     if(c=='\'') {
945                         // double apostrophe, skip the second one
946                         addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
947                     } else if(
948                         aposMode==ApostropheMode.DOUBLE_REQUIRED ||
949                         c=='{' || c=='}' ||
950                         (parentType==ArgType.CHOICE && c=='|') ||
951                         (parentType.hasPluralStyle() && c=='#')
952                     ) {
953                         // skip the quote-starting apostrophe
954                         addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
955                         // find the end of the quoted literal text
956                         for(;;) {
957                             index=msg.indexOf('\'', index+1);
958                             if(index>=0) {
959                                 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
960                                     // double apostrophe inside quoted literal text
961                                     // still encodes a single apostrophe, skip the second one
962                                     addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
963                                 } else {
964                                     // skip the quote-ending apostrophe
965                                     addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
966                                     break;
967                                 }
968                             } else {
969                                 // The quoted text reaches to the end of the of the message.
970                                 index=msg.length();
971                                 // Add a Part for auto-quoting.
972                                 addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
973                                 needsAutoQuoting=true;
974                                 break;
975                             }
976                         }
977                     } else {
978                         // Interpret the apostrophe as literal text.
979                         // Add a Part for auto-quoting.
980                         addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
981                         needsAutoQuoting=true;
982                     }
983                 }
984             } else if(parentType.hasPluralStyle() && c=='#') {
985                 // The unquoted # in a plural message fragment will be replaced
986                 // with the (number-offset).
987                 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
988             } else if(c=='{') {
989                 index=parseArg(index-1, 1, nestingLevel);
990             } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
991                 // Finish the message before the terminator.
992                 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
993                 // not for this MSG_LIMIT.
994                 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
995                 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
996                 if(parentType==ArgType.CHOICE) {
997                     // Let the choice style parser see the '}' or '|'.
998                     return index-1;
999                 } else {
1000                     // continue parsing after the '}'
1001                     return index;
1002                 }
1003             }  // else: c is part of literal text
1004         }
1005         if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1006             throw new IllegalArgumentException(
1007                 "Unmatched '{' braces in message "+prefix());
1008         }
1009         addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1010         return index;
1011     }
1012 
parseArg(int index, int argStartLength, int nestingLevel)1013     private int parseArg(int index, int argStartLength, int nestingLevel) {
1014         int argStart=parts.size();
1015         ArgType argType=ArgType.NONE;
1016         addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1017         int nameIndex=index=skipWhiteSpace(index+argStartLength);
1018         if(index==msg.length()) {
1019             throw new IllegalArgumentException(
1020                 "Unmatched '{' braces in message "+prefix());
1021         }
1022         // parse argument name or number
1023         index=skipIdentifier(index);
1024         int number=parseArgNumber(nameIndex, index);
1025         if(number>=0) {
1026             int length=index-nameIndex;
1027             if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1028                 throw new IndexOutOfBoundsException(
1029                     "Argument number too large: "+prefix(nameIndex));
1030             }
1031             hasArgNumbers=true;
1032             addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1033         } else if(number==ARG_NAME_NOT_NUMBER) {
1034             int length=index-nameIndex;
1035             if(length>Part.MAX_LENGTH) {
1036                 throw new IndexOutOfBoundsException(
1037                     "Argument name too long: "+prefix(nameIndex));
1038             }
1039             hasArgNames=true;
1040             addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1041         } else {  // number<-1 (ARG_NAME_NOT_VALID)
1042             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1043         }
1044         index=skipWhiteSpace(index);
1045         if(index==msg.length()) {
1046             throw new IllegalArgumentException(
1047                 "Unmatched '{' braces in message "+prefix());
1048         }
1049         char c=msg.charAt(index);
1050         if(c=='}') {
1051             // all done
1052         } else if(c!=',') {
1053             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1054         } else /* ',' */ {
1055             // parse argument type: case-sensitive a-zA-Z
1056             int typeIndex=index=skipWhiteSpace(index+1);
1057             while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1058                 ++index;
1059             }
1060             int length=index-typeIndex;
1061             index=skipWhiteSpace(index);
1062             if(index==msg.length()) {
1063                 throw new IllegalArgumentException(
1064                     "Unmatched '{' braces in message "+prefix());
1065             }
1066             if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1067                 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1068             }
1069             if(length>Part.MAX_LENGTH) {
1070                 throw new IndexOutOfBoundsException(
1071                     "Argument type name too long: "+prefix(nameIndex));
1072             }
1073             argType=ArgType.SIMPLE;
1074             if(length==6) {
1075                 // case-insensitive comparisons for complex-type names
1076                 if(isChoice(typeIndex)) {
1077                     argType=ArgType.CHOICE;
1078                 } else if(isPlural(typeIndex)) {
1079                     argType=ArgType.PLURAL;
1080                 } else if(isSelect(typeIndex)) {
1081                     argType=ArgType.SELECT;
1082                 }
1083             } else if(length==13) {
1084                 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1085                     argType=ArgType.SELECTORDINAL;
1086                 }
1087             }
1088             // change the ARG_START type from NONE to argType
1089             parts.get(argStart).value=(short)argType.ordinal();
1090             if(argType==ArgType.SIMPLE) {
1091                 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1092             }
1093             // look for an argument style (pattern)
1094             if(c=='}') {
1095                 if(argType!=ArgType.SIMPLE) {
1096                     throw new IllegalArgumentException(
1097                         "No style field for complex argument: "+prefix(nameIndex));
1098                 }
1099             } else /* ',' */ {
1100                 ++index;
1101                 if(argType==ArgType.SIMPLE) {
1102                     index=parseSimpleStyle(index);
1103                 } else if(argType==ArgType.CHOICE) {
1104                     index=parseChoiceStyle(index, nestingLevel);
1105                 } else {
1106                     index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1107                 }
1108             }
1109         }
1110         // Argument parsing stopped on the '}'.
1111         addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1112         return index+1;
1113     }
1114 
parseSimpleStyle(int index)1115     private int parseSimpleStyle(int index) {
1116         int start=index;
1117         int nestedBraces=0;
1118         while(index<msg.length()) {
1119             char c=msg.charAt(index++);
1120             if(c=='\'') {
1121                 // Treat apostrophe as quoting but include it in the style part.
1122                 // Find the end of the quoted literal text.
1123                 index=msg.indexOf('\'', index);
1124                 if(index<0) {
1125                     throw new IllegalArgumentException(
1126                         "Quoted literal argument style text reaches to the end of the message: "+
1127                         prefix(start));
1128                 }
1129                 // skip the quote-ending apostrophe
1130                 ++index;
1131             } else if(c=='{') {
1132                 ++nestedBraces;
1133             } else if(c=='}') {
1134                 if(nestedBraces>0) {
1135                     --nestedBraces;
1136                 } else {
1137                     int length=--index-start;
1138                     if(length>Part.MAX_LENGTH) {
1139                         throw new IndexOutOfBoundsException(
1140                             "Argument style text too long: "+prefix(start));
1141                     }
1142                     addPart(Part.Type.ARG_STYLE, start, length, 0);
1143                     return index;
1144                 }
1145             }  // c is part of literal text
1146         }
1147         throw new IllegalArgumentException(
1148             "Unmatched '{' braces in message "+prefix());
1149     }
1150 
parseChoiceStyle(int index, int nestingLevel)1151     private int parseChoiceStyle(int index, int nestingLevel) {
1152         int start=index;
1153         index=skipWhiteSpace(index);
1154         if(index==msg.length() || msg.charAt(index)=='}') {
1155             throw new IllegalArgumentException(
1156                 "Missing choice argument pattern in "+prefix());
1157         }
1158         for(;;) {
1159             // The choice argument style contains |-separated (number, separator, message) triples.
1160             // Parse the number.
1161             int numberIndex=index;
1162             index=skipDouble(index);
1163             int length=index-numberIndex;
1164             if(length==0) {
1165                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1166             }
1167             if(length>Part.MAX_LENGTH) {
1168                 throw new IndexOutOfBoundsException(
1169                     "Choice number too long: "+prefix(numberIndex));
1170             }
1171             parseDouble(numberIndex, index, true);  // adds ARG_INT or ARG_DOUBLE
1172             // Parse the separator.
1173             index=skipWhiteSpace(index);
1174             if(index==msg.length()) {
1175                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1176             }
1177             char c=msg.charAt(index);
1178             if(!(c=='#' || c=='<' || c=='\u2264')) {  // U+2264 is <=
1179                 throw new IllegalArgumentException(
1180                     "Expected choice separator (#<\u2264) instead of '"+c+
1181                     "' in choice pattern "+prefix(start));
1182             }
1183             addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1184             // Parse the message fragment.
1185             index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1186             // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1187             if(index==msg.length()) {
1188                 return index;
1189             }
1190             if(msg.charAt(index)=='}') {
1191                 if(!inMessageFormatPattern(nestingLevel)) {
1192                     throw new IllegalArgumentException(
1193                         "Bad choice pattern syntax: "+prefix(start));
1194                 }
1195                 return index;
1196             }  // else the terminator is '|'
1197             index=skipWhiteSpace(index+1);
1198         }
1199     }
1200 
parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1201     private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1202         int start=index;
1203         boolean isEmpty=true;
1204         boolean hasOther=false;
1205         for(;;) {
1206             // First, collect the selector looking for a small set of terminators.
1207             // It would be a little faster to consider the syntax of each possible
1208             // token right here, but that makes the code too complicated.
1209             index=skipWhiteSpace(index);
1210             boolean eos=index==msg.length();
1211             if(eos || msg.charAt(index)=='}') {
1212                 if(eos==inMessageFormatPattern(nestingLevel)) {
1213                     throw new IllegalArgumentException(
1214                         "Bad "+
1215                         argType.toString().toLowerCase(Locale.ENGLISH)+
1216                         " pattern syntax: "+prefix(start));
1217                 }
1218                 if(!hasOther) {
1219                     throw new IllegalArgumentException(
1220                         "Missing 'other' keyword in "+
1221                         argType.toString().toLowerCase(Locale.ENGLISH)+
1222                         " pattern in "+prefix());
1223                 }
1224                 return index;
1225             }
1226             int selectorIndex=index;
1227             if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1228                 // explicit-value plural selector: =double
1229                 index=skipDouble(index+1);
1230                 int length=index-selectorIndex;
1231                 if(length==1) {
1232                     throw new IllegalArgumentException(
1233                         "Bad "+
1234                         argType.toString().toLowerCase(Locale.ENGLISH)+
1235                         " pattern syntax: "+prefix(start));
1236                 }
1237                 if(length>Part.MAX_LENGTH) {
1238                     throw new IndexOutOfBoundsException(
1239                         "Argument selector too long: "+prefix(selectorIndex));
1240                 }
1241                 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1242                 parseDouble(selectorIndex+1, index, false);  // adds ARG_INT or ARG_DOUBLE
1243             } else {
1244                 index=skipIdentifier(index);
1245                 int length=index-selectorIndex;
1246                 if(length==0) {
1247                     throw new IllegalArgumentException(
1248                         "Bad "+
1249                         argType.toString().toLowerCase(Locale.ENGLISH)+
1250                         " pattern syntax: "+prefix(start));
1251                 }
1252                 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1253                 if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1254                     msg.regionMatches(selectorIndex, "offset:", 0, 7)
1255                 ) {
1256                     // plural offset, not a selector
1257                     if(!isEmpty) {
1258                         throw new IllegalArgumentException(
1259                             "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1260                             prefix(start));
1261                     }
1262                     // allow whitespace between offset: and its value
1263                     int valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
1264                     index=skipDouble(valueIndex);
1265                     if(index==valueIndex) {
1266                         throw new IllegalArgumentException(
1267                             "Missing value for plural 'offset:' "+prefix(start));
1268                     }
1269                     if((index-valueIndex)>Part.MAX_LENGTH) {
1270                         throw new IndexOutOfBoundsException(
1271                             "Plural offset value too long: "+prefix(valueIndex));
1272                     }
1273                     parseDouble(valueIndex, index, false);  // adds ARG_INT or ARG_DOUBLE
1274                     isEmpty=false;
1275                     continue;  // no message fragment after the offset
1276                 } else {
1277                     // normal selector word
1278                     if(length>Part.MAX_LENGTH) {
1279                         throw new IndexOutOfBoundsException(
1280                             "Argument selector too long: "+prefix(selectorIndex));
1281                     }
1282                     addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1283                     if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1284                         hasOther=true;
1285                     }
1286                 }
1287             }
1288 
1289             // parse the message fragment following the selector
1290             index=skipWhiteSpace(index);
1291             if(index==msg.length() || msg.charAt(index)!='{') {
1292                 throw new IllegalArgumentException(
1293                     "No message fragment after "+
1294                     argType.toString().toLowerCase(Locale.ENGLISH)+
1295                     " selector: "+prefix(selectorIndex));
1296             }
1297             index=parseMessage(index, 1, nestingLevel+1, argType);
1298             isEmpty=false;
1299         }
1300     }
1301 
1302     /**
1303      * Validates and parses an argument name or argument number string.
1304      * This internal method assumes that the input substring is a "pattern identifier".
1305      * @return &gt;=0 if the name is a valid number,
1306      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1307      *         ARG_NAME_NOT_VALID (-2) if it is neither.
1308      * @see #validateArgumentName(String)
1309      */
parseArgNumber(CharSequence s, int start, int limit)1310     private static int parseArgNumber(CharSequence s, int start, int limit) {
1311         // If the identifier contains only ASCII digits, then it is an argument _number_
1312         // and must not have leading zeros (except "0" itself).
1313         // Otherwise it is an argument _name_.
1314         if(start>=limit) {
1315             return ARG_NAME_NOT_VALID;
1316         }
1317         int number;
1318         // Defer numeric errors until we know there are only digits.
1319         boolean badNumber;
1320         char c=s.charAt(start++);
1321         if(c=='0') {
1322             if(start==limit) {
1323                 return 0;
1324             } else {
1325                 number=0;
1326                 badNumber=true;  // leading zero
1327             }
1328         } else if('1'<=c && c<='9') {
1329             number=c-'0';
1330             badNumber=false;
1331         } else {
1332             return ARG_NAME_NOT_NUMBER;
1333         }
1334         while(start<limit) {
1335             c=s.charAt(start++);
1336             if('0'<=c && c<='9') {
1337                 if(number>=Integer.MAX_VALUE/10) {
1338                     badNumber=true;  // overflow
1339                 }
1340                 number=number*10+(c-'0');
1341             } else {
1342                 return ARG_NAME_NOT_NUMBER;
1343             }
1344         }
1345         // There are only ASCII digits.
1346         if(badNumber) {
1347             return ARG_NAME_NOT_VALID;
1348         } else {
1349             return number;
1350         }
1351     }
1352 
parseArgNumber(int start, int limit)1353     private int parseArgNumber(int start, int limit) {
1354         return parseArgNumber(msg, start, limit);
1355     }
1356 
1357     /**
1358      * Parses a number from the specified message substring.
1359      * @param start start index into the message string
1360      * @param limit limit index into the message string, must be start<limit
1361      * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1362      */
parseDouble(int start, int limit, boolean allowInfinity)1363     private void parseDouble(int start, int limit, boolean allowInfinity) {
1364         assert start<limit;
1365         // fake loop for easy exit and single throw statement
1366         for(;;) {
1367             // fast path for small integers and infinity
1368             int value=0;
1369             int isNegative=0;  // not boolean so that we can easily add it to value
1370             int index=start;
1371             char c=msg.charAt(index++);
1372             if(c=='-') {
1373                 isNegative=1;
1374                 if(index==limit) {
1375                     break;  // no number
1376                 }
1377                 c=msg.charAt(index++);
1378             } else if(c=='+') {
1379                 if(index==limit) {
1380                     break;  // no number
1381                 }
1382                 c=msg.charAt(index++);
1383             }
1384             if(c==0x221e) {  // infinity
1385                 if(allowInfinity && index==limit) {
1386                     addArgDoublePart(
1387                         isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1388                         start, limit-start);
1389                     return;
1390                 } else {
1391                     break;
1392                 }
1393             }
1394             // try to parse the number as a small integer but fall back to a double
1395             while('0'<=c && c<='9') {
1396                 value=value*10+(c-'0');
1397                 if(value>(Part.MAX_VALUE+isNegative)) {
1398                     break;  // not a small-enough integer
1399                 }
1400                 if(index==limit) {
1401                     addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1402                     return;
1403                 }
1404                 c=msg.charAt(index++);
1405             }
1406             // Let Double.parseDouble() throw a NumberFormatException.
1407             double numericValue=Double.parseDouble(msg.substring(start, limit));
1408             addArgDoublePart(numericValue, start, limit-start);
1409             return;
1410         }
1411         throw new NumberFormatException(
1412             "Bad syntax for numeric value: "+msg.substring(start, limit));
1413     }
1414 
1415     /**
1416      * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1417      * according to JDK pattern behavior.
1418      * @internal
1419      */
1420     /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1421                                                        StringBuilder sb) {
1422         int doubleApos=-1;
1423         for(;;) {
1424             int i=s.indexOf('\'', start);
1425             if(i<0 || i>=limit) {
1426                 sb.append(s, start, limit);
1427                 break;
1428             }
1429             if(i==doubleApos) {
1430                 // Double apostrophe at start-1 and start==i, append one.
1431                 sb.append('\'');
1432                 ++start;
1433                 doubleApos=-1;
1434             } else {
1435                 // Append text between apostrophes and skip this one.
1436                 sb.append(s, start, i);
1437                 doubleApos=start=i+1;
1438             }
1439         }
1440     }
1441 
1442     private int skipWhiteSpace(int index) {
1443         return PatternProps.skipWhiteSpace(msg, index);
1444     }
1445 
1446     private int skipIdentifier(int index) {
1447         return PatternProps.skipIdentifier(msg, index);
1448     }
1449 
1450     /**
1451      * Skips a sequence of characters that could occur in a double value.
1452      * Does not fully parse or validate the value.
1453      */
1454     private int skipDouble(int index) {
1455         while(index<msg.length()) {
1456             char c=msg.charAt(index);
1457             // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1458             if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1459                 break;
1460             }
1461             ++index;
1462         }
1463         return index;
1464     }
1465 
1466     private static boolean isArgTypeChar(int c) {
1467         return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1468     }
1469 
1470     private boolean isChoice(int index) {
1471         char c;
1472         return
1473             ((c=msg.charAt(index++))=='c' || c=='C') &&
1474             ((c=msg.charAt(index++))=='h' || c=='H') &&
1475             ((c=msg.charAt(index++))=='o' || c=='O') &&
1476             ((c=msg.charAt(index++))=='i' || c=='I') &&
1477             ((c=msg.charAt(index++))=='c' || c=='C') &&
1478             ((c=msg.charAt(index))=='e' || c=='E');
1479     }
1480 
1481     private boolean isPlural(int index) {
1482         char c;
1483         return
1484             ((c=msg.charAt(index++))=='p' || c=='P') &&
1485             ((c=msg.charAt(index++))=='l' || c=='L') &&
1486             ((c=msg.charAt(index++))=='u' || c=='U') &&
1487             ((c=msg.charAt(index++))=='r' || c=='R') &&
1488             ((c=msg.charAt(index++))=='a' || c=='A') &&
1489             ((c=msg.charAt(index))=='l' || c=='L');
1490     }
1491 
1492     private boolean isSelect(int index) {
1493         char c;
1494         return
1495             ((c=msg.charAt(index++))=='s' || c=='S') &&
1496             ((c=msg.charAt(index++))=='e' || c=='E') &&
1497             ((c=msg.charAt(index++))=='l' || c=='L') &&
1498             ((c=msg.charAt(index++))=='e' || c=='E') &&
1499             ((c=msg.charAt(index++))=='c' || c=='C') &&
1500             ((c=msg.charAt(index))=='t' || c=='T');
1501     }
1502 
1503     private boolean isOrdinal(int index) {
1504         char c;
1505         return
1506             ((c=msg.charAt(index++))=='o' || c=='O') &&
1507             ((c=msg.charAt(index++))=='r' || c=='R') &&
1508             ((c=msg.charAt(index++))=='d' || c=='D') &&
1509             ((c=msg.charAt(index++))=='i' || c=='I') &&
1510             ((c=msg.charAt(index++))=='n' || c=='N') &&
1511             ((c=msg.charAt(index++))=='a' || c=='A') &&
1512             ((c=msg.charAt(index))=='l' || c=='L');
1513     }
1514 
1515     /**
1516      * @return true if we are inside a MessageFormat (sub-)pattern,
1517      *         as opposed to inside a top-level choice/plural/select pattern.
1518      */
1519     private boolean inMessageFormatPattern(int nestingLevel) {
1520         return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1521     }
1522 
1523     /**
1524      * @return true if we are in a MessageFormat sub-pattern
1525      *         of a top-level ChoiceFormat pattern.
1526      */
1527     private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1528         return
1529             nestingLevel==1 &&
1530             parentType==ArgType.CHOICE &&
1531             parts.get(0).type!=Part.Type.MSG_START;
1532     }
1533 
1534     private void addPart(Part.Type type, int index, int length, int value) {
1535         parts.add(new Part(type, index, length, value));
1536     }
1537 
1538     private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1539         parts.get(start).limitPartIndex=parts.size();
1540         addPart(type, index, length, value);
1541     }
1542 
1543     private void addArgDoublePart(double numericValue, int start, int length) {
1544         int numericIndex;
1545         if(numericValues==null) {
1546             numericValues=new ArrayList<Double>();
1547             numericIndex=0;
1548         } else {
1549             numericIndex=numericValues.size();
1550             if(numericIndex>Part.MAX_VALUE) {
1551                 throw new IndexOutOfBoundsException("Too many numeric values");
1552             }
1553         }
1554         numericValues.add(numericValue);
1555         addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1556     }
1557 
1558     private static final int MAX_PREFIX_LENGTH=24;
1559 
1560     /**
1561      * Returns a prefix of s.substring(start). Used for Exception messages.
1562      * @param s
1563      * @param start start index in s
1564      * @return s.substring(start) or a prefix of that
1565      */
1566     private static String prefix(String s, int start) {
1567         StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1568         if(start==0) {
1569             prefix.append("\"");
1570         } else {
1571             prefix.append("[at pattern index ").append(start).append("] \"");
1572         }
1573         int substringLength=s.length()-start;
1574         if(substringLength<=MAX_PREFIX_LENGTH) {
1575             prefix.append(start==0 ? s : s.substring(start));
1576         } else {
1577             int limit=start+MAX_PREFIX_LENGTH-4;
1578             if(Character.isHighSurrogate(s.charAt(limit-1))) {
1579                 // remove lead surrogate from the end of the prefix
1580                 --limit;
1581             }
1582             prefix.append(s, start, limit).append(" ...");
1583         }
1584         return prefix.append("\"").toString();
1585     }
1586 
1587     private static String prefix(String s) {
1588         return prefix(s, 0);
1589     }
1590 
1591     private String prefix(int start) {
1592         return prefix(msg, start);
1593     }
1594 
1595     private String prefix() {
1596         return prefix(msg, 0);
1597     }
1598 
1599     private ApostropheMode aposMode;
1600     private String msg;
1601     private ArrayList<Part> parts=new ArrayList<Part>();
1602     private ArrayList<Double> numericValues;
1603     private boolean hasArgNames;
1604     private boolean hasArgNumbers;
1605     private boolean needsAutoQuoting;
1606     private boolean frozen;
1607 
1608     private static final ApostropheMode defaultAposMode=
1609         ApostropheMode.valueOf(
1610             ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL"));
1611 
1612     private static final ArgType[] argTypes=ArgType.values();
1613 }
1614