1 /*
2  *******************************************************************************
3  *   Copyright (C) 2010-2014, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  *******************************************************************************
6  *   created on: 2010aug21
7  *   created by: Markus W. Scherer
8  */
9 
10 package androidx.core.i18n.messageformat_icu.text;
11 
12 import androidx.annotation.RestrictTo;
13 import androidx.core.i18n.messageformat_icu.impl.PatternProps;
14 import androidx.core.i18n.messageformat_icu.util.Freezable;
15 import androidx.core.i18n.messageformat_icu.util.ICUCloneNotSupportedException;
16 
17 import java.util.ArrayList;
18 import java.util.Locale;
19 
20 //Note: Minimize ICU dependencies, only use a very small part of the ICU core.
21 //In particular, do not depend on *Format classes.
22 
23 /**
24  * Parses and represents ICU MessageFormat patterns.
25  * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
26  * Used in the implementations of those classes as well as in tools
27  * for message validation, translation and format conversion.
28  * <p>
29  * The parser handles all syntax relevant for identifying message arguments.
30  * This includes "complex" arguments whose style strings contain
31  * nested MessageFormat pattern substrings.
32  * For "simple" arguments (with no nested MessageFormat pattern substrings),
33  * the argument style is not parsed any further.
34  * <p>
35  * The parser handles named and numbered message arguments and allows both in one message.
36  * <p>
37  * Once a pattern has been parsed successfully, iterate through the parsed data
38  * with countParts(), getPart() and related methods.
39  * <p>
40  * The data logically represents a parse tree, but is stored and accessed
41  * as a list of "parts" for fast and simple parsing and to minimize object allocations.
42  * Arguments and nested messages are best handled via recursion.
43  * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns
44  * the index of the corresponding _LIMIT "part".
45  * <p>
46  * List of "parts":
47  * <pre>
48  * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
49  * argument = noneArg | simpleArg | complexArg
50  * complexArg = choiceArg | pluralArg | selectArg
51  *
52  * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
53  * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
54  * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
55  * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
56  * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
57  *
58  * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
59  * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
60  * selectStyle = (ARG_SELECTOR message)+
61  * </pre>
62  * <ul>
63  *   <li>Literal output text is not represented directly by "parts" but accessed
64  *       between parts of a message, from one part's getLimit() to the next part's getIndex().
65  *   <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
66  *   <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
67  *       the less-than-or-equal-to sign (U+2264).
68  *   <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
69  *       The optional numeric Part between each (ARG_SELECTOR, message) pair
70  *       is the value of an explicit-number selector like "=2",
71  *       otherwise the selector is a non-numeric identifier.
72  *   <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
73  * <p>
74  * This class is not intended for public subclassing.
75  *
76  * icu_annot::stable ICU 4.8
77  * @author Markus Scherer
78  */
79 @RestrictTo(RestrictTo.Scope.LIBRARY)
80 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> {
81     /**
82      * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
83      * The default is {@link ApostropheMode#DOUBLE_OPTIONAL}.
84      * <p>
85      * A pair of adjacent apostrophes always results in a single apostrophe in the output,
86      * even when the pair is between two single, text-quoting apostrophes.
87      * <p>
88      * The following table shows examples of desired MessageFormat.format() output
89      * with the pattern strings that yield that output.
90      * <p>
91      * <table>
92      *   <tr>
93      *     <th>Desired output</th>
94      *     <th>DOUBLE_OPTIONAL</th>
95      *     <th>DOUBLE_REQUIRED</th>
96      *   </tr>
97      *   <tr>
98      *     <td>I see {many}</td>
99      *     <td>I see '{many}'</td>
100      *     <td>(same)</td>
101      *   </tr>
102      *   <tr>
103      *     <td>I said {'Wow!'}</td>
104      *     <td>I said '{''Wow!''}'</td>
105      *     <td>(same)</td>
106      *   </tr>
107      *   <tr>
108      *     <td>I don't know</td>
109      *     <td>I don't know OR<br> I don''t know</td>
110      *     <td>I don''t know</td>
111      *   </tr>
112      * </table>
113      * icu_annot::stable ICU 4.8
114      */
115     public enum ApostropheMode {
116         /**
117          * A literal apostrophe is represented by
118          * either a single or a double apostrophe pattern character.
119          * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
120          * if it immediately precedes a curly brace {},
121          * or a pipe symbol | if inside a choice format,
122          * or a pound symbol # if inside a plural format.
123          * <p>
124          * This is the default behavior starting with ICU 4.8.
125          * icu_annot::stable ICU 4.8
126          */
127         DOUBLE_OPTIONAL,
128         /**
129          * A literal apostrophe must be represented by
130          * a double apostrophe pattern character.
131          * A single apostrophe always starts quoted literal text.
132          * <p>
133          * This is the behavior of ICU 4.6 and earlier, and of the JDK.
134          * icu_annot::stable ICU 4.8
135          */
136         DOUBLE_REQUIRED
137     }
138 
139     /**
140      * Constructs an empty MessagePattern with default ApostropheMode.
141      * icu_annot::stable ICU 4.8
142      */
MessagePattern()143     public MessagePattern() {
144         aposMode=defaultAposMode;
145     }
146 
147     /**
148      * Constructs an empty MessagePattern.
149      * @param mode Explicit ApostropheMode.
150      * icu_annot::stable ICU 4.8
151      */
MessagePattern(ApostropheMode mode)152     public MessagePattern(ApostropheMode mode) {
153         aposMode=mode;
154     }
155 
156     /**
157      * Constructs a MessagePattern with default ApostropheMode and
158      * parses the MessageFormat pattern string.
159      * @param pattern a MessageFormat pattern string
160      * @throws IllegalArgumentException for syntax errors in the pattern string
161      * @throws IndexOutOfBoundsException if certain limits are exceeded
162      *         (e.g., argument number too high, argument name too long, etc.)
163      * @throws NumberFormatException if a number could not be parsed
164      * icu_annot::stable ICU 4.8
165      */
MessagePattern(String pattern)166     public MessagePattern(String pattern) {
167         aposMode=defaultAposMode;
168         parse(pattern);
169     }
170 
171     /**
172      * Parses a MessageFormat pattern string.
173      * @param pattern a MessageFormat pattern string
174      * @return this
175      * @throws IllegalArgumentException for syntax errors in the pattern string
176      * @throws IndexOutOfBoundsException if certain limits are exceeded
177      *         (e.g., argument number too high, argument name too long, etc.)
178      * @throws NumberFormatException if a number could not be parsed
179      * icu_annot::stable ICU 4.8
180      */
parse(String pattern)181     public MessagePattern parse(String pattern) {
182         preParse(pattern);
183         parseMessage(0, 0, 0, ArgType.NONE);
184         postParse();
185         return this;
186     }
187 
188     /**
189      * Parses a ChoiceFormat pattern string.
190      * @param pattern a ChoiceFormat pattern string
191      * @return this
192      * @throws IllegalArgumentException for syntax errors in the pattern string
193      * @throws IndexOutOfBoundsException if certain limits are exceeded
194      *         (e.g., argument number too high, argument name too long, etc.)
195      * @throws NumberFormatException if a number could not be parsed
196      * icu_annot::stable ICU 4.8
197      */
parseChoiceStyle(String pattern)198     public MessagePattern parseChoiceStyle(String pattern) {
199         preParse(pattern);
200         parseChoiceStyle(0, 0);
201         postParse();
202         return this;
203     }
204 
205     /**
206      * Parses a PluralFormat pattern string.
207      * @param pattern a PluralFormat pattern string
208      * @return this
209      * @throws IllegalArgumentException for syntax errors in the pattern string
210      * @throws IndexOutOfBoundsException if certain limits are exceeded
211      *         (e.g., argument number too high, argument name too long, etc.)
212      * @throws NumberFormatException if a number could not be parsed
213      * icu_annot::stable ICU 4.8
214      */
parsePluralStyle(String pattern)215     public MessagePattern parsePluralStyle(String pattern) {
216         preParse(pattern);
217         parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0);
218         postParse();
219         return this;
220     }
221 
222     /**
223      * Parses a SelectFormat pattern string.
224      * @param pattern a SelectFormat pattern string
225      * @return this
226      * @throws IllegalArgumentException for syntax errors in the pattern string
227      * @throws IndexOutOfBoundsException if certain limits are exceeded
228      *         (e.g., argument number too high, argument name too long, etc.)
229      * @throws NumberFormatException if a number could not be parsed
230      * icu_annot::stable ICU 4.8
231      */
parseSelectStyle(String pattern)232     public MessagePattern parseSelectStyle(String pattern) {
233         preParse(pattern);
234         parsePluralOrSelectStyle(ArgType.SELECT, 0, 0);
235         postParse();
236         return this;
237     }
238 
239     /**
240      * Clears this MessagePattern.
241      * countParts() will return 0.
242      * icu_annot::stable ICU 4.8
243      */
clear()244     public void clear() {
245         // Mostly the same as preParse().
246         if(isFrozen()) {
247             throw new UnsupportedOperationException(
248                 "Attempt to clear() a frozen MessagePattern instance.");
249         }
250         msg=null;
251         hasArgNames=hasArgNumbers=false;
252         needsAutoQuoting=false;
253         parts.clear();
254         if(numericValues!=null) {
255             numericValues.clear();
256         }
257     }
258 
259     /**
260      * Clears this MessagePattern and sets the ApostropheMode.
261      * countParts() will return 0.
262      * @param mode The new ApostropheMode.
263      * icu_annot::stable ICU 4.8
264      */
clearPatternAndSetApostropheMode(ApostropheMode mode)265     public void clearPatternAndSetApostropheMode(ApostropheMode mode) {
266         clear();
267         aposMode=mode;
268     }
269 
270     /**
271      * @param other another object to compare with.
272      * @return true if this object is equivalent to the other one.
273      * icu_annot::stable ICU 4.8
274      */
275     @Override
equals(Object other)276     public boolean equals(Object other) {
277         if(this==other) {
278             return true;
279         }
280         if(other==null || getClass()!=other.getClass()) {
281             return false;
282         }
283         MessagePattern o=(MessagePattern)other;
284         return
285             aposMode.equals(o.aposMode) &&
286             (msg==null ? o.msg==null : msg.equals(o.msg)) &&
287             parts.equals(o.parts);
288         // No need to compare numericValues if msg and parts are the same.
289     }
290 
291     /**
292      * {@inheritDoc}
293      * icu_annot::stable ICU 4.8
294      */
295     @Override
hashCode()296     public int hashCode() {
297         return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode();
298     }
299 
300     /**
301      * @return this instance's ApostropheMode.
302      * icu_annot::stable ICU 4.8
303      */
getApostropheMode()304     public ApostropheMode getApostropheMode() {
305         return aposMode;
306     }
307 
308     /**
309      * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED
310      * icu_annot::internal
311      */
jdkAposMode()312     public boolean jdkAposMode() {
313         return aposMode == ApostropheMode.DOUBLE_REQUIRED;
314     }
315 
316     /**
317      * @return the parsed pattern string (null if none was parsed).
318      * icu_annot::stable ICU 4.8
319      */
getPatternString()320     public String getPatternString() {
321         return msg;
322     }
323 
324     /**
325      * Does the parsed pattern have named arguments like {first_name}?
326      * @return true if the parsed pattern has at least one named argument.
327      * icu_annot::stable ICU 4.8
328      */
hasNamedArguments()329     public boolean hasNamedArguments() {
330         return hasArgNames;
331     }
332 
333     /**
334      * Does the parsed pattern have numbered arguments like {2}?
335      * @return true if the parsed pattern has at least one numbered argument.
336      * icu_annot::stable ICU 4.8
337      */
hasNumberedArguments()338     public boolean hasNumberedArguments() {
339         return hasArgNumbers;
340     }
341 
342     /**
343      * {@inheritDoc}
344      * icu_annot::stable ICU 4.8
345      */
346     @Override
toString()347     public String toString() {
348         return msg;
349     }
350 
351     /**
352      * Validates and parses an argument name or argument number string.
353      * An argument name must be a "pattern identifier", that is, it must contain
354      * no Unicode Pattern_Syntax or Pattern_White_Space characters.
355      * If it only contains ASCII digits, then it must be a small integer with no leading zero.
356      * @param name Input string.
357      * @return &gt;=0 if the name is a valid number,
358      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
359      *         ARG_NAME_NOT_VALID (-2) if it is neither.
360      * icu_annot::stable ICU 4.8
361      */
validateArgumentName(String name)362     public static int validateArgumentName(String name) {
363         if(!PatternProps.isIdentifier(name)) {
364             return ARG_NAME_NOT_VALID;
365         }
366         return parseArgNumber(name, 0, name.length());
367     }
368 
369     /**
370      * Return value from {@link #validateArgumentName(String)} for when
371      * the string is a valid "pattern identifier" but not a number.
372      * icu_annot::stable ICU 4.8
373      */
374     public static final int ARG_NAME_NOT_NUMBER=-1;
375 
376     /**
377      * Return value from {@link #validateArgumentName(String)} for when
378      * the string is invalid.
379      * It might not be a valid "pattern identifier",
380      * or it have only ASCII digits but there is a leading zero or the number is too large.
381      * icu_annot::stable ICU 4.8
382      */
383     public static final int ARG_NAME_NOT_VALID=-2;
384 
385     /**
386      * Returns a version of the parsed pattern string where each ASCII apostrophe
387      * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
388      * <p>
389      * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
390      * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
391      * @return the deep-auto-quoted version of the parsed pattern string.
392      * @see MessageFormat#autoQuoteApostrophe(String)
393      * icu_annot::stable ICU 4.8
394      */
autoQuoteApostropheDeep()395     public String autoQuoteApostropheDeep() {
396         if(!needsAutoQuoting) {
397             return msg;
398         }
399         StringBuilder modified=null;
400         // Iterate backward so that the insertion indexes do not change.
401         int count=countParts();
402         for(int i=count; i>0;) {
403             Part part;
404             if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) {
405                 if(modified==null) {
406                     modified=new StringBuilder(msg.length()+10).append(msg);
407                 }
408                 modified.insert(part.index, (char)part.value);
409             }
410         }
411         if(modified==null) {
412             return msg;
413         } else {
414             return modified.toString();
415         }
416     }
417 
418     /**
419      * Returns the number of "parts" created by parsing the pattern string.
420      * Returns 0 if no pattern has been parsed or clear() was called.
421      * @return the number of pattern parts.
422      * icu_annot::stable ICU 4.8
423      */
countParts()424     public int countParts() {
425         return parts.size();
426     }
427 
428     /**
429      * Gets the i-th pattern "part".
430      * @param i The index of the Part data. (0..countParts()-1)
431      * @return the i-th pattern "part".
432      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
433      * icu_annot::stable ICU 4.8
434      */
getPart(int i)435     public Part getPart(int i) {
436         return parts.get(i);
437     }
438 
439     /**
440      * Returns the Part.Type of the i-th pattern "part".
441      * Convenience method for getPart(i).getType().
442      * @param i The index of the Part data. (0..countParts()-1)
443      * @return The Part.Type of the i-th Part.
444      * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range
445      * icu_annot::stable ICU 4.8
446      */
getPartType(int i)447     public Part.Type getPartType(int i) {
448         return parts.get(i).type;
449     }
450 
451     /**
452      * Returns the pattern index of the specified pattern "part".
453      * Convenience method for getPart(partIndex).getIndex().
454      * @param partIndex The index of the Part data. (0..countParts()-1)
455      * @return The pattern index of this Part.
456      * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range
457      * icu_annot::stable ICU 4.8
458      */
getPatternIndex(int partIndex)459     public int getPatternIndex(int partIndex) {
460         return parts.get(partIndex).index;
461     }
462 
463     /**
464      * Returns the substring of the pattern string indicated by the Part.
465      * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
466      * @param part a part of this MessagePattern.
467      * @return the substring associated with part.
468      * icu_annot::stable ICU 4.8
469      */
getSubstring(Part part)470     public String getSubstring(Part part) {
471         int index=part.index;
472         return msg.substring(index, index+part.length);
473     }
474 
475     /**
476      * Compares the part's substring with the input string s.
477      * @param part a part of this MessagePattern.
478      * @param s a string.
479      * @return true if getSubstring(part).equals(s).
480      * icu_annot::stable ICU 4.8
481      */
partSubstringMatches(Part part, String s)482     public boolean partSubstringMatches(Part part, String s) {
483         return msg.regionMatches(part.index, s, 0, part.length);
484     }
485 
486     /**
487      * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
488      * @param part a part of this MessagePattern.
489      * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part.
490      * icu_annot::stable ICU 4.8
491      */
getNumericValue(Part part)492     public double getNumericValue(Part part) {
493         Part.Type type=part.type;
494         if(type==Part.Type.ARG_INT) {
495             return part.value;
496         } else if(type==Part.Type.ARG_DOUBLE) {
497             return numericValues.get(part.value);
498         } else {
499             return NO_NUMERIC_VALUE;
500         }
501     }
502 
503     /**
504      * Special value that is returned by getNumericValue(Part) when no
505      * numeric value is defined for a part.
506      * @see #getNumericValue
507      * icu_annot::stable ICU 4.8
508      */
509     public static final double NO_NUMERIC_VALUE=-123456789;
510 
511     /**
512      * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
513      * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
514      * @return the "offset:" value.
515      * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range
516      * icu_annot::stable ICU 4.8
517      */
getPluralOffset(int pluralStart)518     public double getPluralOffset(int pluralStart) {
519         Part part=parts.get(pluralStart);
520         if(part.type.hasNumericValue()) {
521             return getNumericValue(part);
522         } else {
523             return 0;
524         }
525     }
526 
527     /**
528      * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
529      * @param start The index of some Part data (0..countParts()-1);
530      *        this Part should be of Type ARG_START or MSG_START.
531      * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
532      *         or start itself if getPartType(msgStart)!=ARG|MSG_START.
533      * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range
534      * icu_annot::stable ICU 4.8
535      */
getLimitPartIndex(int start)536     public int getLimitPartIndex(int start) {
537         int limit=parts.get(start).limitPartIndex;
538         if(limit<start) {
539             return start;
540         }
541         return limit;
542     }
543 
544     /**
545      * A message pattern "part", representing a pattern parsing event.
546      * There is a part for the start and end of a message or argument,
547      * for quoting and escaping of and with ASCII apostrophes,
548      * and for syntax elements of "complex" arguments.
549      * icu_annot::stable ICU 4.8
550      */
551     public static final class Part {
Part(Type t, int i, int l, int v)552         private Part(Type t, int i, int l, int v) {
553             type=t;
554             index=i;
555             length=(char)l;
556             value=(short)v;
557         }
558 
559         /**
560          * Returns the type of this part.
561          * @return the part type.
562          * icu_annot::stable ICU 4.8
563          */
getType()564         public Type getType() {
565             return type;
566         }
567 
568         /**
569          * Returns the pattern string index associated with this Part.
570          * @return this part's pattern string index.
571          * icu_annot::stable ICU 4.8
572          */
getIndex()573         public int getIndex() {
574             return index;
575         }
576 
577         /**
578          * Returns the length of the pattern substring associated with this Part.
579          * This is 0 for some parts.
580          * @return this part's pattern substring length.
581          * icu_annot::stable ICU 4.8
582          */
getLength()583         public int getLength() {
584             return length;
585         }
586 
587         /**
588          * Returns the pattern string limit (exclusive-end) index associated with this Part.
589          * Convenience method for getIndex()+getLength().
590          * @return this part's pattern string limit index, same as getIndex()+getLength().
591          * icu_annot::stable ICU 4.8
592          */
getLimit()593         public int getLimit() {
594             return index+length;
595         }
596 
597         /**
598          * Returns a value associated with this part.
599          * See the documentation of each part type for details.
600          * @return the part value.
601          * icu_annot::stable ICU 4.8
602          */
getValue()603         public int getValue() {
604             return value;
605         }
606 
607         /**
608          * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
609          * otherwise ArgType.NONE.
610          * @return the argument type for this part.
611          * icu_annot::stable ICU 4.8
612          */
getArgType()613         public ArgType getArgType() {
614             Type type=getType();
615             if(type==Type.ARG_START || type==Type.ARG_LIMIT) {
616                 return argTypes[value];
617             } else {
618                 return ArgType.NONE;
619             }
620         }
621 
622         /**
623          * Part type constants.
624          * icu_annot::stable ICU 4.8
625          */
626         public enum Type {
627             /**
628              * Start of a message pattern (main or nested).
629              * The length is 0 for the top-level message
630              * and for a choice argument sub-message, otherwise 1 for the '{'.
631              * The value indicates the nesting level, starting with 0 for the main message.
632              * <p>
633              * There is always a later MSG_LIMIT part.
634              * icu_annot::stable ICU 4.8
635              */
636             MSG_START,
637             /**
638              * End of a message pattern (main or nested).
639              * The length is 0 for the top-level message and
640              * the last sub-message of a choice argument,
641              * otherwise 1 for the '}' or (in a choice argument style) the '|'.
642              * The value indicates the nesting level, starting with 0 for the main message.
643              * icu_annot::stable ICU 4.8
644              */
645             MSG_LIMIT,
646             /**
647              * Indicates a substring of the pattern string which is to be skipped when formatting.
648              * For example, an apostrophe that begins or ends quoted text
649              * would be indicated with such a part.
650              * The value is undefined and currently always 0.
651              * icu_annot::stable ICU 4.8
652              */
653             SKIP_SYNTAX,
654             /**
655              * Indicates that a syntax character needs to be inserted for auto-quoting.
656              * The length is 0.
657              * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
658              * icu_annot::stable ICU 4.8
659              */
660             INSERT_CHAR,
661             /**
662              * Indicates a syntactic (non-escaped) # symbol in a plural variant.
663              * When formatting, replace this part's substring with the
664              * (value-offset) for the plural argument value.
665              * The value is undefined and currently always 0.
666              * icu_annot::stable ICU 4.8
667              */
668             REPLACE_NUMBER,
669             /**
670              * Start of an argument.
671              * The length is 1 for the '{'.
672              * The value is the ordinal value of the ArgType. Use getArgType().
673              * <p>
674              * This part is followed by either an ARG_NUMBER or ARG_NAME,
675              * followed by optional argument sub-parts (see ArgType constants)
676              * and finally an ARG_LIMIT part.
677              * icu_annot::stable ICU 4.8
678              */
679             ARG_START,
680             /**
681              * End of an argument.
682              * The length is 1 for the '}'.
683              * The value is the ordinal value of the ArgType. Use getArgType().
684              * icu_annot::stable ICU 4.8
685              */
686             ARG_LIMIT,
687             /**
688              * The argument number, provided by the value.
689              * icu_annot::stable ICU 4.8
690              */
691             ARG_NUMBER,
692             /**
693              * The argument name.
694              * The value is undefined and currently always 0.
695              * icu_annot::stable ICU 4.8
696              */
697             ARG_NAME,
698             /**
699              * The argument type.
700              * The value is undefined and currently always 0.
701              * icu_annot::stable ICU 4.8
702              */
703             ARG_TYPE,
704             /**
705              * The argument style text.
706              * The value is undefined and currently always 0.
707              * icu_annot::stable ICU 4.8
708              */
709             ARG_STYLE,
710             /**
711              * A selector substring in a "complex" argument style.
712              * The value is undefined and currently always 0.
713              * icu_annot::stable ICU 4.8
714              */
715             ARG_SELECTOR,
716             /**
717              * An integer value, for example the offset or an explicit selector value
718              * in a PluralFormat style.
719              * The part value is the integer value.
720              * icu_annot::stable ICU 4.8
721              */
722             ARG_INT,
723             /**
724              * A numeric value, for example the offset or an explicit selector value
725              * in a PluralFormat style.
726              * The part value is an index into an internal array of numeric values;
727              * use getNumericValue().
728              * icu_annot::stable ICU 4.8
729              */
730             ARG_DOUBLE;
731 
732             /**
733              * Indicates whether this part has a numeric value.
734              * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}.
735              * @return true if this part has a numeric value.
736              * icu_annot::stable ICU 4.8
737              */
hasNumericValue()738             public boolean hasNumericValue() {
739                 return this==ARG_INT || this==ARG_DOUBLE;
740             }
741         }
742 
743         /**
744          * @return a string representation of this part.
745          * icu_annot::stable ICU 4.8
746          */
747         @Override
toString()748         public String toString() {
749             String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ?
750                 getArgType().name() : Integer.toString(value);
751             return type.name()+"("+valueString+")@"+index;
752         }
753 
754         /**
755          * @param other another object to compare with.
756          * @return true if this object is equivalent to the other one.
757          * icu_annot::stable ICU 4.8
758          */
759         @Override
equals(Object other)760         public boolean equals(Object other) {
761             if(this==other) {
762                 return true;
763             }
764             if(other==null || getClass()!=other.getClass()) {
765                 return false;
766             }
767             Part o=(Part)other;
768             return
769                 type.equals(o.type) &&
770                 index==o.index &&
771                 length==o.length &&
772                 value==o.value &&
773                 limitPartIndex==o.limitPartIndex;
774         }
775 
776         /**
777          * {@inheritDoc}
778          * icu_annot::stable ICU 4.8
779          */
780         @Override
hashCode()781         public int hashCode() {
782             return ((type.hashCode()*37+index)*37+length)*37+value;
783         }
784 
785         private static final int MAX_LENGTH=0xffff;
786         private static final int MAX_VALUE=Short.MAX_VALUE;
787 
788         // Some fields are not final because they are modified during pattern parsing.
789         // After pattern parsing, the parts are effectively immutable.
790         private final Type type;
791         private final int index;
792         private final char length;
793         private short value;
794         private int limitPartIndex;
795     }
796 
797     /**
798      * Argument type constants.
799      * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
800      *
801      * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
802      * with a nesting level one greater than the surrounding message.
803      * icu_annot::stable ICU 4.8
804      */
805     public enum ArgType {
806         /**
807          * The argument has no specified type.
808          * icu_annot::stable ICU 4.8
809          */
810         NONE,
811         /**
812          * The argument has a "simple" type which is provided by the ARG_TYPE part.
813          * An ARG_STYLE part might follow that.
814          * icu_annot::stable ICU 4.8
815          */
816         SIMPLE,
817         /**
818          * The argument is a ChoiceFormat with one or more
819          * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
820          * icu_annot::stable ICU 4.8
821          */
822         CHOICE,
823         /**
824          * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
825          * (e.g., offset:1)
826          * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
827          * If the selector has an explicit value (e.g., =2), then
828          * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
829          * Otherwise the message immediately follows the ARG_SELECTOR.
830          * icu_annot::stable ICU 4.8
831          */
832         PLURAL,
833         /**
834          * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
835          * icu_annot::stable ICU 4.8
836          */
837         SELECT,
838         /**
839          * The argument is an ordinal-number PluralFormat
840          * with the same style parts sequence and semantics as {@link ArgType#PLURAL}.
841          * icu_annot::stable ICU 50
842          */
843         SELECTORDINAL;
844 
845         /**
846          * @return true if the argument type has a plural style part sequence and semantics,
847          * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}.
848          * icu_annot::stable ICU 50
849          */
hasPluralStyle()850         public boolean hasPluralStyle() {
851             return this == PLURAL || this == SELECTORDINAL;
852         }
853     }
854 
855     /**
856      * Creates and returns a copy of this object.
857      * @return a copy of this object (or itself if frozen).
858      * icu_annot::stable ICU 4.8
859      */
860     @Override
clone()861     public Object clone() {
862         if(isFrozen()) {
863             return this;
864         } else {
865             return cloneAsThawed();
866         }
867     }
868 
869     /**
870      * Creates and returns an unfrozen copy of this object.
871      * @return a copy of this object.
872      * icu_annot::stable ICU 4.8
873      */
874     @SuppressWarnings("unchecked")
875     @Override
cloneAsThawed()876     public MessagePattern cloneAsThawed() {
877         MessagePattern newMsg;
878         try {
879             newMsg=(MessagePattern)super.clone();
880         } catch (CloneNotSupportedException e) {
881             throw new ICUCloneNotSupportedException(e);
882         }
883         newMsg.parts=(ArrayList<Part>)parts.clone();
884         if(numericValues!=null) {
885             newMsg.numericValues=(ArrayList<Double>)numericValues.clone();
886         }
887         newMsg.frozen=false;
888         return newMsg;
889     }
890 
891     /**
892      * Freezes this object, making it immutable and thread-safe.
893      * @return this
894      * icu_annot::stable ICU 4.8
895      */
896     @Override
freeze()897     public MessagePattern freeze() {
898         frozen=true;
899         return this;
900     }
901 
902     /**
903      * Determines whether this object is frozen (immutable) or not.
904      * @return true if this object is frozen.
905      * icu_annot::stable ICU 4.8
906      */
907     @Override
isFrozen()908     public boolean isFrozen() {
909         return frozen;
910     }
911 
preParse(String pattern)912     private void preParse(String pattern) {
913         if(isFrozen()) {
914             throw new UnsupportedOperationException(
915                 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance.");
916         }
917         msg=pattern;
918         hasArgNames=hasArgNumbers=false;
919         needsAutoQuoting=false;
920         parts.clear();
921         if(numericValues!=null) {
922             numericValues.clear();
923         }
924     }
925 
postParse()926     private void postParse() {
927         // Nothing to be done currently.
928     }
929 
parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)930     private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) {
931         if(nestingLevel>Part.MAX_VALUE) {
932             throw new IndexOutOfBoundsException();
933         }
934         int msgStart=parts.size();
935         addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel);
936         index+=msgStartLength;
937         while(index<msg.length()) {
938             char c=msg.charAt(index++);
939             if(c=='\'') {
940                 if(index==msg.length()) {
941                     // The apostrophe is the last character in the pattern.
942                     // Add a Part for auto-quoting.
943                     addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
944                     needsAutoQuoting=true;
945                 } else {
946                     c=msg.charAt(index);
947                     if(c=='\'') {
948                         // double apostrophe, skip the second one
949                         addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
950                     } else if(
951                         aposMode==ApostropheMode.DOUBLE_REQUIRED ||
952                         c=='{' || c=='}' ||
953                         (parentType==ArgType.CHOICE && c=='|') ||
954                         (parentType.hasPluralStyle() && c=='#')
955                     ) {
956                         // skip the quote-starting apostrophe
957                         addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0);
958                         // find the end of the quoted literal text
959                         for(;;) {
960                             index=msg.indexOf('\'', index+1);
961                             if(index>=0) {
962                                 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') {
963                                     // double apostrophe inside quoted literal text
964                                     // still encodes a single apostrophe, skip the second one
965                                     addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0);
966                                 } else {
967                                     // skip the quote-ending apostrophe
968                                     addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0);
969                                     break;
970                                 }
971                             } else {
972                                 // The quoted text reaches to the end of the of the message.
973                                 index=msg.length();
974                                 // Add a Part for auto-quoting.
975                                 addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
976                                 needsAutoQuoting=true;
977                                 break;
978                             }
979                         }
980                     } else {
981                         // Interpret the apostrophe as literal text.
982                         // Add a Part for auto-quoting.
983                         addPart(Part.Type.INSERT_CHAR, index, 0, '\'');  // value=char to be inserted
984                         needsAutoQuoting=true;
985                     }
986                 }
987             } else if(parentType.hasPluralStyle() && c=='#') {
988                 // The unquoted # in a plural message fragment will be replaced
989                 // with the (number-offset).
990                 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0);
991             } else if(c=='{') {
992                 index=parseArg(index-1, 1, nestingLevel);
993             } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) {
994                 // Finish the message before the terminator.
995                 // In a choice style, report the "}" substring only for the following ARG_LIMIT,
996                 // not for this MSG_LIMIT.
997                 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1;
998                 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel);
999                 if(parentType==ArgType.CHOICE) {
1000                     // Let the choice style parser see the '}' or '|'.
1001                     return index-1;
1002                 } else {
1003                     // continue parsing after the '}'
1004                     return index;
1005                 }
1006             }  // else: c is part of literal text
1007         }
1008         if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
1009             throw new IllegalArgumentException(
1010                 "Unmatched '{' braces in message "+prefix());
1011         }
1012         addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel);
1013         return index;
1014     }
1015 
parseArg(int index, int argStartLength, int nestingLevel)1016     private int parseArg(int index, int argStartLength, int nestingLevel) {
1017         int argStart=parts.size();
1018         ArgType argType=ArgType.NONE;
1019         addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal());
1020         int nameIndex=index=skipWhiteSpace(index+argStartLength);
1021         if(index==msg.length()) {
1022             throw new IllegalArgumentException(
1023                 "Unmatched '{' braces in message "+prefix());
1024         }
1025         // parse argument name or number
1026         index=skipIdentifier(index);
1027         int number=parseArgNumber(nameIndex, index);
1028         if(number>=0) {
1029             int length=index-nameIndex;
1030             if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) {
1031                 throw new IndexOutOfBoundsException(
1032                     "Argument number too large: "+prefix(nameIndex));
1033             }
1034             hasArgNumbers=true;
1035             addPart(Part.Type.ARG_NUMBER, nameIndex, length, number);
1036         } else if(number==ARG_NAME_NOT_NUMBER) {
1037             int length=index-nameIndex;
1038             if(length>Part.MAX_LENGTH) {
1039                 throw new IndexOutOfBoundsException(
1040                     "Argument name too long: "+prefix(nameIndex));
1041             }
1042             hasArgNames=true;
1043             addPart(Part.Type.ARG_NAME, nameIndex, length, 0);
1044         } else {  // number<-1 (ARG_NAME_NOT_VALID)
1045             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1046         }
1047         index=skipWhiteSpace(index);
1048         if(index==msg.length()) {
1049             throw new IllegalArgumentException(
1050                 "Unmatched '{' braces in message "+prefix());
1051         }
1052         char c=msg.charAt(index);
1053         if(c=='}') {
1054             // all done
1055         } else if(c!=',') {
1056             throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1057         } else /* ',' */ {
1058             // parse argument type: case-sensitive a-zA-Z
1059             int typeIndex=index=skipWhiteSpace(index+1);
1060             while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
1061                 ++index;
1062             }
1063             int length=index-typeIndex;
1064             index=skipWhiteSpace(index);
1065             if(index==msg.length()) {
1066                 throw new IllegalArgumentException(
1067                     "Unmatched '{' braces in message "+prefix());
1068             }
1069             if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) {
1070                 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex));
1071             }
1072             if(length>Part.MAX_LENGTH) {
1073                 throw new IndexOutOfBoundsException(
1074                     "Argument type name too long: "+prefix(nameIndex));
1075             }
1076             argType=ArgType.SIMPLE;
1077             if(length==6) {
1078                 // case-insensitive comparisons for complex-type names
1079                 if(isChoice(typeIndex)) {
1080                     argType=ArgType.CHOICE;
1081                 } else if(isPlural(typeIndex)) {
1082                     argType=ArgType.PLURAL;
1083                 } else if(isSelect(typeIndex)) {
1084                     argType=ArgType.SELECT;
1085                 }
1086             } else if(length==13) {
1087                 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
1088                     argType=ArgType.SELECTORDINAL;
1089                 }
1090             }
1091             // change the ARG_START type from NONE to argType
1092             parts.get(argStart).value=(short)argType.ordinal();
1093             if(argType==ArgType.SIMPLE) {
1094                 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0);
1095             }
1096             // look for an argument style (pattern)
1097             if(c=='}') {
1098                 if(argType!=ArgType.SIMPLE) {
1099                     throw new IllegalArgumentException(
1100                         "No style field for complex argument: "+prefix(nameIndex));
1101                 }
1102             } else /* ',' */ {
1103                 ++index;
1104                 if(argType==ArgType.SIMPLE) {
1105                     index=parseSimpleStyle(index);
1106                 } else if(argType==ArgType.CHOICE) {
1107                     index=parseChoiceStyle(index, nestingLevel);
1108                 } else {
1109                     index=parsePluralOrSelectStyle(argType, index, nestingLevel);
1110                 }
1111             }
1112         }
1113         // Argument parsing stopped on the '}'.
1114         addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal());
1115         return index+1;
1116     }
1117 
parseSimpleStyle(int index)1118     private int parseSimpleStyle(int index) {
1119         int start=index;
1120         int nestedBraces=0;
1121         while(index<msg.length()) {
1122             char c=msg.charAt(index++);
1123             if(c=='\'') {
1124                 // Treat apostrophe as quoting but include it in the style part.
1125                 // Find the end of the quoted literal text.
1126                 index=msg.indexOf('\'', index);
1127                 if(index<0) {
1128                     throw new IllegalArgumentException(
1129                         "Quoted literal argument style text reaches to the end of the message: "+
1130                         prefix(start));
1131                 }
1132                 // skip the quote-ending apostrophe
1133                 ++index;
1134             } else if(c=='{') {
1135                 ++nestedBraces;
1136             } else if(c=='}') {
1137                 if(nestedBraces>0) {
1138                     --nestedBraces;
1139                 } else {
1140                     int length=--index-start;
1141                     if(length>Part.MAX_LENGTH) {
1142                         throw new IndexOutOfBoundsException(
1143                             "Argument style text too long: "+prefix(start));
1144                     }
1145                     addPart(Part.Type.ARG_STYLE, start, length, 0);
1146                     return index;
1147                 }
1148             }  // c is part of literal text
1149         }
1150         throw new IllegalArgumentException(
1151             "Unmatched '{' braces in message "+prefix());
1152     }
1153 
parseChoiceStyle(int index, int nestingLevel)1154     private int parseChoiceStyle(int index, int nestingLevel) {
1155         int start=index;
1156         index=skipWhiteSpace(index);
1157         if(index==msg.length() || msg.charAt(index)=='}') {
1158             throw new IllegalArgumentException(
1159                 "Missing choice argument pattern in "+prefix());
1160         }
1161         for(;;) {
1162             // The choice argument style contains |-separated (number, separator, message) triples.
1163             // Parse the number.
1164             int numberIndex=index;
1165             index=skipDouble(index);
1166             int length=index-numberIndex;
1167             if(length==0) {
1168                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1169             }
1170             if(length>Part.MAX_LENGTH) {
1171                 throw new IndexOutOfBoundsException(
1172                     "Choice number too long: "+prefix(numberIndex));
1173             }
1174             parseDouble(numberIndex, index, true);  // adds ARG_INT or ARG_DOUBLE
1175             // Parse the separator.
1176             index=skipWhiteSpace(index);
1177             if(index==msg.length()) {
1178                 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start));
1179             }
1180             char c=msg.charAt(index);
1181             if(!(c=='#' || c=='<' || c=='\u2264')) {  // U+2264 is <=
1182                 throw new IllegalArgumentException(
1183                     "Expected choice separator (#<\u2264) instead of '"+c+
1184                     "' in choice pattern "+prefix(start));
1185             }
1186             addPart(Part.Type.ARG_SELECTOR, index, 1, 0);
1187             // Parse the message fragment.
1188             index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE);
1189             // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
1190             if(index==msg.length()) {
1191                 return index;
1192             }
1193             if(msg.charAt(index)=='}') {
1194                 if(!inMessageFormatPattern(nestingLevel)) {
1195                     throw new IllegalArgumentException(
1196                         "Bad choice pattern syntax: "+prefix(start));
1197                 }
1198                 return index;
1199             }  // else the terminator is '|'
1200             index=skipWhiteSpace(index+1);
1201         }
1202     }
1203 
parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1204     private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) {
1205         int start=index;
1206         boolean isEmpty=true;
1207         boolean hasOther=false;
1208         for(;;) {
1209             // First, collect the selector looking for a small set of terminators.
1210             // It would be a little faster to consider the syntax of each possible
1211             // token right here, but that makes the code too complicated.
1212             index=skipWhiteSpace(index);
1213             boolean eos=index==msg.length();
1214             if(eos || msg.charAt(index)=='}') {
1215                 if(eos==inMessageFormatPattern(nestingLevel)) {
1216                     throw new IllegalArgumentException(
1217                         "Bad "+
1218                         argType.toString().toLowerCase(Locale.ENGLISH)+
1219                         " pattern syntax: "+prefix(start));
1220                 }
1221                 if(!hasOther) {
1222                     throw new IllegalArgumentException(
1223                         "Missing 'other' keyword in "+
1224                         argType.toString().toLowerCase(Locale.ENGLISH)+
1225                         " pattern in "+prefix());
1226                 }
1227                 return index;
1228             }
1229             int selectorIndex=index;
1230             if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') {
1231                 // explicit-value plural selector: =double
1232                 index=skipDouble(index+1);
1233                 int length=index-selectorIndex;
1234                 if(length==1) {
1235                     throw new IllegalArgumentException(
1236                         "Bad "+
1237                         argType.toString().toLowerCase(Locale.ENGLISH)+
1238                         " pattern syntax: "+prefix(start));
1239                 }
1240                 if(length>Part.MAX_LENGTH) {
1241                     throw new IndexOutOfBoundsException(
1242                         "Argument selector too long: "+prefix(selectorIndex));
1243                 }
1244                 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1245                 parseDouble(selectorIndex+1, index, false);  // adds ARG_INT or ARG_DOUBLE
1246             } else {
1247                 index=skipIdentifier(index);
1248                 int length=index-selectorIndex;
1249                 if(length==0) {
1250                     throw new IllegalArgumentException(
1251                         "Bad "+
1252                         argType.toString().toLowerCase(Locale.ENGLISH)+
1253                         " pattern syntax: "+prefix(start));
1254                 }
1255                 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
1256                 if( argType.hasPluralStyle() && length==6 && index<msg.length() &&
1257                     msg.regionMatches(selectorIndex, "offset:", 0, 7)
1258                 ) {
1259                     // plural offset, not a selector
1260                     if(!isEmpty) {
1261                         throw new IllegalArgumentException(
1262                             "Plural argument 'offset:' (if present) must precede key-message pairs: "+
1263                             prefix(start));
1264                     }
1265                     // allow whitespace between offset: and its value
1266                     int valueIndex=skipWhiteSpace(index+1);  // The ':' is at index.
1267                     index=skipDouble(valueIndex);
1268                     if(index==valueIndex) {
1269                         throw new IllegalArgumentException(
1270                             "Missing value for plural 'offset:' "+prefix(start));
1271                     }
1272                     if((index-valueIndex)>Part.MAX_LENGTH) {
1273                         throw new IndexOutOfBoundsException(
1274                             "Plural offset value too long: "+prefix(valueIndex));
1275                     }
1276                     parseDouble(valueIndex, index, false);  // adds ARG_INT or ARG_DOUBLE
1277                     isEmpty=false;
1278                     continue;  // no message fragment after the offset
1279                 } else {
1280                     // normal selector word
1281                     if(length>Part.MAX_LENGTH) {
1282                         throw new IndexOutOfBoundsException(
1283                             "Argument selector too long: "+prefix(selectorIndex));
1284                     }
1285                     addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0);
1286                     if(msg.regionMatches(selectorIndex, "other", 0, length)) {
1287                         hasOther=true;
1288                     }
1289                 }
1290             }
1291 
1292             // parse the message fragment following the selector
1293             index=skipWhiteSpace(index);
1294             if(index==msg.length() || msg.charAt(index)!='{') {
1295                 throw new IllegalArgumentException(
1296                     "No message fragment after "+
1297                     argType.toString().toLowerCase(Locale.ENGLISH)+
1298                     " selector: "+prefix(selectorIndex));
1299             }
1300             index=parseMessage(index, 1, nestingLevel+1, argType);
1301             isEmpty=false;
1302         }
1303     }
1304 
1305     /**
1306      * Validates and parses an argument name or argument number string.
1307      * This internal method assumes that the input substring is a "pattern identifier".
1308      * @return &gt;=0 if the name is a valid number,
1309      *         ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
1310      *         ARG_NAME_NOT_VALID (-2) if it is neither.
1311      * @see #validateArgumentName(String)
1312      */
parseArgNumber(CharSequence s, int start, int limit)1313     private static int parseArgNumber(CharSequence s, int start, int limit) {
1314         // If the identifier contains only ASCII digits, then it is an argument _number_
1315         // and must not have leading zeros (except "0" itself).
1316         // Otherwise it is an argument _name_.
1317         if(start>=limit) {
1318             return ARG_NAME_NOT_VALID;
1319         }
1320         int number;
1321         // Defer numeric errors until we know there are only digits.
1322         boolean badNumber;
1323         char c=s.charAt(start++);
1324         if(c=='0') {
1325             if(start==limit) {
1326                 return 0;
1327             } else {
1328                 number=0;
1329                 badNumber=true;  // leading zero
1330             }
1331         } else if('1'<=c && c<='9') {
1332             number=c-'0';
1333             badNumber=false;
1334         } else {
1335             return ARG_NAME_NOT_NUMBER;
1336         }
1337         while(start<limit) {
1338             c=s.charAt(start++);
1339             if('0'<=c && c<='9') {
1340                 if(number>=Integer.MAX_VALUE/10) {
1341                     badNumber=true;  // overflow
1342                 }
1343                 number=number*10+(c-'0');
1344             } else {
1345                 return ARG_NAME_NOT_NUMBER;
1346             }
1347         }
1348         // There are only ASCII digits.
1349         if(badNumber) {
1350             return ARG_NAME_NOT_VALID;
1351         } else {
1352             return number;
1353         }
1354     }
1355 
parseArgNumber(int start, int limit)1356     private int parseArgNumber(int start, int limit) {
1357         return parseArgNumber(msg, start, limit);
1358     }
1359 
1360     /**
1361      * Parses a number from the specified message substring.
1362      * @param start start index into the message string
1363      * @param limit limit index into the message string, must be start<limit
1364      * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
1365      */
parseDouble(int start, int limit, boolean allowInfinity)1366     private void parseDouble(int start, int limit, boolean allowInfinity) {
1367         assert start<limit;
1368         // fake loop for easy exit and single throw statement
1369         for(;;) {
1370             // fast path for small integers and infinity
1371             int value=0;
1372             int isNegative=0;  // not boolean so that we can easily add it to value
1373             int index=start;
1374             char c=msg.charAt(index++);
1375             if(c=='-') {
1376                 isNegative=1;
1377                 if(index==limit) {
1378                     break;  // no number
1379                 }
1380                 c=msg.charAt(index++);
1381             } else if(c=='+') {
1382                 if(index==limit) {
1383                     break;  // no number
1384                 }
1385                 c=msg.charAt(index++);
1386             }
1387             if(c==0x221e) {  // infinity
1388                 if(allowInfinity && index==limit) {
1389                     addArgDoublePart(
1390                         isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY,
1391                         start, limit-start);
1392                     return;
1393                 } else {
1394                     break;
1395                 }
1396             }
1397             // try to parse the number as a small integer but fall back to a double
1398             while('0'<=c && c<='9') {
1399                 value=value*10+(c-'0');
1400                 if(value>(Part.MAX_VALUE+isNegative)) {
1401                     break;  // not a small-enough integer
1402                 }
1403                 if(index==limit) {
1404                     addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value);
1405                     return;
1406                 }
1407                 c=msg.charAt(index++);
1408             }
1409             // Let Double.parseDouble() throw a NumberFormatException.
1410             double numericValue=Double.parseDouble(msg.substring(start, limit));
1411             addArgDoublePart(numericValue, start, limit-start);
1412             return;
1413         }
1414         throw new NumberFormatException(
1415             "Bad syntax for numeric value: "+msg.substring(start, limit));
1416     }
1417 
1418     /**
1419      * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes
1420      * according to JDK pattern behavior.
1421      * icu_annot::internal
1422      */
1423     /* package */ static void appendReducedApostrophes(String s, int start, int limit,
1424                                                        StringBuilder sb) {
1425         int doubleApos=-1;
1426         for(;;) {
1427             int i=s.indexOf('\'', start);
1428             if(i<0 || i>=limit) {
1429                 sb.append(s, start, limit);
1430                 break;
1431             }
1432             if(i==doubleApos) {
1433                 // Double apostrophe at start-1 and start==i, append one.
1434                 sb.append('\'');
1435                 ++start;
1436                 doubleApos=-1;
1437             } else {
1438                 // Append text between apostrophes and skip this one.
1439                 sb.append(s, start, i);
1440                 doubleApos=start=i+1;
1441             }
1442         }
1443     }
1444 
1445     private int skipWhiteSpace(int index) {
1446         return PatternProps.skipWhiteSpace(msg, index);
1447     }
1448 
1449     private int skipIdentifier(int index) {
1450         return PatternProps.skipIdentifier(msg, index);
1451     }
1452 
1453     /**
1454      * Skips a sequence of characters that could occur in a double value.
1455      * Does not fully parse or validate the value.
1456      */
1457     private int skipDouble(int index) {
1458         while(index<msg.length()) {
1459             char c=msg.charAt(index);
1460             // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
1461             if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) {
1462                 break;
1463             }
1464             ++index;
1465         }
1466         return index;
1467     }
1468 
1469     private static boolean isArgTypeChar(int c) {
1470         return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
1471     }
1472 
1473     private boolean isChoice(int index) {
1474         char c;
1475         return
1476             ((c=msg.charAt(index++))=='c' || c=='C') &&
1477             ((c=msg.charAt(index++))=='h' || c=='H') &&
1478             ((c=msg.charAt(index++))=='o' || c=='O') &&
1479             ((c=msg.charAt(index++))=='i' || c=='I') &&
1480             ((c=msg.charAt(index++))=='c' || c=='C') &&
1481             ((c=msg.charAt(index))=='e' || c=='E');
1482     }
1483 
1484     private boolean isPlural(int index) {
1485         char c;
1486         return
1487             ((c=msg.charAt(index++))=='p' || c=='P') &&
1488             ((c=msg.charAt(index++))=='l' || c=='L') &&
1489             ((c=msg.charAt(index++))=='u' || c=='U') &&
1490             ((c=msg.charAt(index++))=='r' || c=='R') &&
1491             ((c=msg.charAt(index++))=='a' || c=='A') &&
1492             ((c=msg.charAt(index))=='l' || c=='L');
1493     }
1494 
1495     private boolean isSelect(int index) {
1496         char c;
1497         return
1498             ((c=msg.charAt(index++))=='s' || c=='S') &&
1499             ((c=msg.charAt(index++))=='e' || c=='E') &&
1500             ((c=msg.charAt(index++))=='l' || c=='L') &&
1501             ((c=msg.charAt(index++))=='e' || c=='E') &&
1502             ((c=msg.charAt(index++))=='c' || c=='C') &&
1503             ((c=msg.charAt(index))=='t' || c=='T');
1504     }
1505 
1506     private boolean isOrdinal(int index) {
1507         char c;
1508         return
1509             ((c=msg.charAt(index++))=='o' || c=='O') &&
1510             ((c=msg.charAt(index++))=='r' || c=='R') &&
1511             ((c=msg.charAt(index++))=='d' || c=='D') &&
1512             ((c=msg.charAt(index++))=='i' || c=='I') &&
1513             ((c=msg.charAt(index++))=='n' || c=='N') &&
1514             ((c=msg.charAt(index++))=='a' || c=='A') &&
1515             ((c=msg.charAt(index))=='l' || c=='L');
1516     }
1517 
1518     /**
1519      * @return true if we are inside a MessageFormat (sub-)pattern,
1520      *         as opposed to inside a top-level choice/plural/select pattern.
1521      */
1522     private boolean inMessageFormatPattern(int nestingLevel) {
1523         return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START;
1524     }
1525 
1526     /**
1527      * @return true if we are in a MessageFormat sub-pattern
1528      *         of a top-level ChoiceFormat pattern.
1529      */
1530     private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) {
1531         return
1532             nestingLevel==1 &&
1533             parentType==ArgType.CHOICE &&
1534             parts.get(0).type!=Part.Type.MSG_START;
1535     }
1536 
1537     private void addPart(Part.Type type, int index, int length, int value) {
1538         parts.add(new Part(type, index, length, value));
1539     }
1540 
1541     private void addLimitPart(int start, Part.Type type, int index, int length, int value) {
1542         parts.get(start).limitPartIndex=parts.size();
1543         addPart(type, index, length, value);
1544     }
1545 
1546     private void addArgDoublePart(double numericValue, int start, int length) {
1547         int numericIndex;
1548         if(numericValues==null) {
1549             numericValues=new ArrayList<Double>();
1550             numericIndex=0;
1551         } else {
1552             numericIndex=numericValues.size();
1553             if(numericIndex>Part.MAX_VALUE) {
1554                 throw new IndexOutOfBoundsException("Too many numeric values");
1555             }
1556         }
1557         numericValues.add(numericValue);
1558         addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex);
1559     }
1560 
1561     private static final int MAX_PREFIX_LENGTH=24;
1562 
1563     /**
1564      * Returns a prefix of s.substring(start). Used for Exception messages.
1565      * @param s
1566      * @param start start index in s
1567      * @return s.substring(start) or a prefix of that
1568      */
1569     private static String prefix(String s, int start) {
1570         StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20);
1571         if(start==0) {
1572             prefix.append("\"");
1573         } else {
1574             prefix.append("[at pattern index ").append(start).append("] \"");
1575         }
1576         int substringLength=s.length()-start;
1577         if(substringLength<=MAX_PREFIX_LENGTH) {
1578             prefix.append(start==0 ? s : s.substring(start));
1579         } else {
1580             int limit=start+MAX_PREFIX_LENGTH-4;
1581             if(Character.isHighSurrogate(s.charAt(limit-1))) {
1582                 // remove lead surrogate from the end of the prefix
1583                 --limit;
1584             }
1585             prefix.append(s, start, limit).append(" ...");
1586         }
1587         return prefix.append("\"").toString();
1588     }
1589 
1590     private static String prefix(String s) {
1591         return prefix(s, 0);
1592     }
1593 
1594     private String prefix(int start) {
1595         return prefix(msg, start);
1596     }
1597 
1598     private String prefix() {
1599         return prefix(msg, 0);
1600     }
1601 
1602     private ApostropheMode aposMode;
1603     private String msg;
1604     private ArrayList<Part> parts=new ArrayList<Part>();
1605     private ArrayList<Double> numericValues;
1606     private boolean hasArgNames;
1607     private boolean hasArgNumbers;
1608     private boolean needsAutoQuoting;
1609     private boolean frozen;
1610 
1611     private static final ApostropheMode defaultAposMode=
1612         ApostropheMode.DOUBLE_OPTIONAL;
1613 
1614     private static final ArgType[] argTypes=ArgType.values();
1615 }
1616