1 /* 2 ******************************************************************************* 3 * Copyright (C) 2010-2014, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * created on: 2010aug21 7 * created by: Markus W. Scherer 8 */ 9 10 package com.ibm.icu.text; 11 12 import java.util.ArrayList; 13 import java.util.Locale; 14 15 import com.ibm.icu.impl.ICUConfig; 16 import com.ibm.icu.impl.PatternProps; 17 import com.ibm.icu.util.Freezable; 18 import com.ibm.icu.util.ICUCloneNotSupportedException; 19 20 //Note: Minimize ICU dependencies, only use a very small part of the ICU core. 21 //In particular, do not depend on *Format classes. 22 23 /** 24 * Parses and represents ICU MessageFormat patterns. 25 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 26 * Used in the implementations of those classes as well as in tools 27 * for message validation, translation and format conversion. 28 * <p> 29 * The parser handles all syntax relevant for identifying message arguments. 30 * This includes "complex" arguments whose style strings contain 31 * nested MessageFormat pattern substrings. 32 * For "simple" arguments (with no nested MessageFormat pattern substrings), 33 * the argument style is not parsed any further. 34 * <p> 35 * The parser handles named and numbered message arguments and allows both in one message. 36 * <p> 37 * Once a pattern has been parsed successfully, iterate through the parsed data 38 * with countParts(), getPart() and related methods. 39 * <p> 40 * The data logically represents a parse tree, but is stored and accessed 41 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 42 * Arguments and nested messages are best handled via recursion. 43 * For every _START "part", {@link #getLimitPartIndex(int)} efficiently returns 44 * the index of the corresponding _LIMIT "part". 45 * <p> 46 * List of "parts": 47 * <pre> 48 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 49 * argument = noneArg | simpleArg | complexArg 50 * complexArg = choiceArg | pluralArg | selectArg 51 * 52 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 53 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 54 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 55 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 56 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 57 * 58 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 59 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 60 * selectStyle = (ARG_SELECTOR message)+ 61 * </pre> 62 * <ul> 63 * <li>Literal output text is not represented directly by "parts" but accessed 64 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 65 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 66 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 67 * the less-than-or-equal-to sign (U+2264). 68 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 69 * The optional numeric Part between each (ARG_SELECTOR, message) pair 70 * is the value of an explicit-number selector like "=2", 71 * otherwise the selector is a non-numeric identifier. 72 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 73 * <p> 74 * This class is not intended for public subclassing. 75 * 76 * @stable ICU 4.8 77 * @author Markus Scherer 78 */ 79 public final class MessagePattern implements Cloneable, Freezable<MessagePattern> { 80 /** 81 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 82 * The default is DOUBLE_OPTIONAL unless overridden via ICUConfig 83 * (/com/ibm/icu/ICUConfig.properties). 84 * <p> 85 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 86 * even when the pair is between two single, text-quoting apostrophes. 87 * <p> 88 * The following table shows examples of desired MessageFormat.format() output 89 * with the pattern strings that yield that output. 90 * <p> 91 * <table> 92 * <tr> 93 * <th>Desired output</th> 94 * <th>DOUBLE_OPTIONAL</th> 95 * <th>DOUBLE_REQUIRED</th> 96 * </tr> 97 * <tr> 98 * <td>I see {many}</td> 99 * <td>I see '{many}'</td> 100 * <td>(same)</td> 101 * </tr> 102 * <tr> 103 * <td>I said {'Wow!'}</td> 104 * <td>I said '{''Wow!''}'</td> 105 * <td>(same)</td> 106 * </tr> 107 * <tr> 108 * <td>I don't know</td> 109 * <td>I don't know OR<br> I don''t know</td> 110 * <td>I don''t know</td> 111 * </tr> 112 * </table> 113 * @stable ICU 4.8 114 */ 115 public enum ApostropheMode { 116 /** 117 * A literal apostrophe is represented by 118 * either a single or a double apostrophe pattern character. 119 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 120 * if it immediately precedes a curly brace {}, 121 * or a pipe symbol | if inside a choice format, 122 * or a pound symbol # if inside a plural format. 123 * <p> 124 * This is the default behavior starting with ICU 4.8. 125 * @stable ICU 4.8 126 */ 127 DOUBLE_OPTIONAL, 128 /** 129 * A literal apostrophe must be represented by 130 * a double apostrophe pattern character. 131 * A single apostrophe always starts quoted literal text. 132 * <p> 133 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 134 * @stable ICU 4.8 135 */ 136 DOUBLE_REQUIRED 137 } 138 139 /** 140 * Constructs an empty MessagePattern with default ApostropheMode. 141 * @stable ICU 4.8 142 */ MessagePattern()143 public MessagePattern() { 144 aposMode=defaultAposMode; 145 } 146 147 /** 148 * Constructs an empty MessagePattern. 149 * @param mode Explicit ApostropheMode. 150 * @stable ICU 4.8 151 */ MessagePattern(ApostropheMode mode)152 public MessagePattern(ApostropheMode mode) { 153 aposMode=mode; 154 } 155 156 /** 157 * Constructs a MessagePattern with default ApostropheMode and 158 * parses the MessageFormat pattern string. 159 * @param pattern a MessageFormat pattern string 160 * @throws IllegalArgumentException for syntax errors in the pattern string 161 * @throws IndexOutOfBoundsException if certain limits are exceeded 162 * (e.g., argument number too high, argument name too long, etc.) 163 * @throws NumberFormatException if a number could not be parsed 164 * @stable ICU 4.8 165 */ MessagePattern(String pattern)166 public MessagePattern(String pattern) { 167 aposMode=defaultAposMode; 168 parse(pattern); 169 } 170 171 /** 172 * Parses a MessageFormat pattern string. 173 * @param pattern a MessageFormat pattern string 174 * @return this 175 * @throws IllegalArgumentException for syntax errors in the pattern string 176 * @throws IndexOutOfBoundsException if certain limits are exceeded 177 * (e.g., argument number too high, argument name too long, etc.) 178 * @throws NumberFormatException if a number could not be parsed 179 * @stable ICU 4.8 180 */ parse(String pattern)181 public MessagePattern parse(String pattern) { 182 preParse(pattern); 183 parseMessage(0, 0, 0, ArgType.NONE); 184 postParse(); 185 return this; 186 } 187 188 /** 189 * Parses a ChoiceFormat pattern string. 190 * @param pattern a ChoiceFormat pattern string 191 * @return this 192 * @throws IllegalArgumentException for syntax errors in the pattern string 193 * @throws IndexOutOfBoundsException if certain limits are exceeded 194 * (e.g., argument number too high, argument name too long, etc.) 195 * @throws NumberFormatException if a number could not be parsed 196 * @stable ICU 4.8 197 */ parseChoiceStyle(String pattern)198 public MessagePattern parseChoiceStyle(String pattern) { 199 preParse(pattern); 200 parseChoiceStyle(0, 0); 201 postParse(); 202 return this; 203 } 204 205 /** 206 * Parses a PluralFormat pattern string. 207 * @param pattern a PluralFormat pattern string 208 * @return this 209 * @throws IllegalArgumentException for syntax errors in the pattern string 210 * @throws IndexOutOfBoundsException if certain limits are exceeded 211 * (e.g., argument number too high, argument name too long, etc.) 212 * @throws NumberFormatException if a number could not be parsed 213 * @stable ICU 4.8 214 */ parsePluralStyle(String pattern)215 public MessagePattern parsePluralStyle(String pattern) { 216 preParse(pattern); 217 parsePluralOrSelectStyle(ArgType.PLURAL, 0, 0); 218 postParse(); 219 return this; 220 } 221 222 /** 223 * Parses a SelectFormat pattern string. 224 * @param pattern a SelectFormat pattern string 225 * @return this 226 * @throws IllegalArgumentException for syntax errors in the pattern string 227 * @throws IndexOutOfBoundsException if certain limits are exceeded 228 * (e.g., argument number too high, argument name too long, etc.) 229 * @throws NumberFormatException if a number could not be parsed 230 * @stable ICU 4.8 231 */ parseSelectStyle(String pattern)232 public MessagePattern parseSelectStyle(String pattern) { 233 preParse(pattern); 234 parsePluralOrSelectStyle(ArgType.SELECT, 0, 0); 235 postParse(); 236 return this; 237 } 238 239 /** 240 * Clears this MessagePattern. 241 * countParts() will return 0. 242 * @stable ICU 4.8 243 */ clear()244 public void clear() { 245 // Mostly the same as preParse(). 246 if(isFrozen()) { 247 throw new UnsupportedOperationException( 248 "Attempt to clear() a frozen MessagePattern instance."); 249 } 250 msg=null; 251 hasArgNames=hasArgNumbers=false; 252 needsAutoQuoting=false; 253 parts.clear(); 254 if(numericValues!=null) { 255 numericValues.clear(); 256 } 257 } 258 259 /** 260 * Clears this MessagePattern and sets the ApostropheMode. 261 * countParts() will return 0. 262 * @param mode The new ApostropheMode. 263 * @stable ICU 4.8 264 */ clearPatternAndSetApostropheMode(ApostropheMode mode)265 public void clearPatternAndSetApostropheMode(ApostropheMode mode) { 266 clear(); 267 aposMode=mode; 268 } 269 270 /** 271 * @param other another object to compare with. 272 * @return true if this object is equivalent to the other one. 273 * @stable ICU 4.8 274 */ 275 @Override equals(Object other)276 public boolean equals(Object other) { 277 if(this==other) { 278 return true; 279 } 280 if(other==null || getClass()!=other.getClass()) { 281 return false; 282 } 283 MessagePattern o=(MessagePattern)other; 284 return 285 aposMode.equals(o.aposMode) && 286 (msg==null ? o.msg==null : msg.equals(o.msg)) && 287 parts.equals(o.parts); 288 // No need to compare numericValues if msg and parts are the same. 289 } 290 291 /** 292 * {@inheritDoc} 293 * @stable ICU 4.8 294 */ 295 @Override hashCode()296 public int hashCode() { 297 return (aposMode.hashCode()*37+(msg!=null ? msg.hashCode() : 0))*37+parts.hashCode(); 298 } 299 300 /** 301 * @return this instance's ApostropheMode. 302 * @stable ICU 4.8 303 */ getApostropheMode()304 public ApostropheMode getApostropheMode() { 305 return aposMode; 306 } 307 308 /** 309 * @return true if getApostropheMode() == ApostropheMode.DOUBLE_REQUIRED 310 * @internal 311 */ jdkAposMode()312 public boolean jdkAposMode() { 313 return aposMode == ApostropheMode.DOUBLE_REQUIRED; 314 } 315 316 /** 317 * @return the parsed pattern string (null if none was parsed). 318 * @stable ICU 4.8 319 */ getPatternString()320 public String getPatternString() { 321 return msg; 322 } 323 324 /** 325 * Does the parsed pattern have named arguments like {first_name}? 326 * @return true if the parsed pattern has at least one named argument. 327 * @stable ICU 4.8 328 */ hasNamedArguments()329 public boolean hasNamedArguments() { 330 return hasArgNames; 331 } 332 333 /** 334 * Does the parsed pattern have numbered arguments like {2}? 335 * @return true if the parsed pattern has at least one numbered argument. 336 * @stable ICU 4.8 337 */ hasNumberedArguments()338 public boolean hasNumberedArguments() { 339 return hasArgNumbers; 340 } 341 342 /** 343 * {@inheritDoc} 344 * @stable ICU 4.8 345 */ 346 @Override toString()347 public String toString() { 348 return msg; 349 } 350 351 /** 352 * Validates and parses an argument name or argument number string. 353 * An argument name must be a "pattern identifier", that is, it must contain 354 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 355 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 356 * @param name Input string. 357 * @return >=0 if the name is a valid number, 358 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 359 * ARG_NAME_NOT_VALID (-2) if it is neither. 360 * @stable ICU 4.8 361 */ validateArgumentName(String name)362 public static int validateArgumentName(String name) { 363 if(!PatternProps.isIdentifier(name)) { 364 return ARG_NAME_NOT_VALID; 365 } 366 return parseArgNumber(name, 0, name.length()); 367 } 368 369 /** 370 * Return value from {@link #validateArgumentName(String)} for when 371 * the string is a valid "pattern identifier" but not a number. 372 * @stable ICU 4.8 373 */ 374 public static final int ARG_NAME_NOT_NUMBER=-1; 375 376 /** 377 * Return value from {@link #validateArgumentName(String)} for when 378 * the string is invalid. 379 * It might not be a valid "pattern identifier", 380 * or it have only ASCII digits but there is a leading zero or the number is too large. 381 * @stable ICU 4.8 382 */ 383 public static final int ARG_NAME_NOT_VALID=-2; 384 385 /** 386 * Returns a version of the parsed pattern string where each ASCII apostrophe 387 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 388 * <p> 389 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 390 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 391 * @return the deep-auto-quoted version of the parsed pattern string. 392 * @see MessageFormat#autoQuoteApostrophe(String) 393 * @stable ICU 4.8 394 */ autoQuoteApostropheDeep()395 public String autoQuoteApostropheDeep() { 396 if(!needsAutoQuoting) { 397 return msg; 398 } 399 StringBuilder modified=null; 400 // Iterate backward so that the insertion indexes do not change. 401 int count=countParts(); 402 for(int i=count; i>0;) { 403 Part part; 404 if((part=getPart(--i)).getType()==Part.Type.INSERT_CHAR) { 405 if(modified==null) { 406 modified=new StringBuilder(msg.length()+10).append(msg); 407 } 408 modified.insert(part.index, (char)part.value); 409 } 410 } 411 if(modified==null) { 412 return msg; 413 } else { 414 return modified.toString(); 415 } 416 } 417 418 /** 419 * Returns the number of "parts" created by parsing the pattern string. 420 * Returns 0 if no pattern has been parsed or clear() was called. 421 * @return the number of pattern parts. 422 * @stable ICU 4.8 423 */ countParts()424 public int countParts() { 425 return parts.size(); 426 } 427 428 /** 429 * Gets the i-th pattern "part". 430 * @param i The index of the Part data. (0..countParts()-1) 431 * @return the i-th pattern "part". 432 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 433 * @stable ICU 4.8 434 */ getPart(int i)435 public Part getPart(int i) { 436 return parts.get(i); 437 } 438 439 /** 440 * Returns the Part.Type of the i-th pattern "part". 441 * Convenience method for getPart(i).getType(). 442 * @param i The index of the Part data. (0..countParts()-1) 443 * @return The Part.Type of the i-th Part. 444 * @throws IndexOutOfBoundsException if i is outside the (0..countParts()-1) range 445 * @stable ICU 4.8 446 */ getPartType(int i)447 public Part.Type getPartType(int i) { 448 return parts.get(i).type; 449 } 450 451 /** 452 * Returns the pattern index of the specified pattern "part". 453 * Convenience method for getPart(partIndex).getIndex(). 454 * @param partIndex The index of the Part data. (0..countParts()-1) 455 * @return The pattern index of this Part. 456 * @throws IndexOutOfBoundsException if partIndex is outside the (0..countParts()-1) range 457 * @stable ICU 4.8 458 */ getPatternIndex(int partIndex)459 public int getPatternIndex(int partIndex) { 460 return parts.get(partIndex).index; 461 } 462 463 /** 464 * Returns the substring of the pattern string indicated by the Part. 465 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 466 * @param part a part of this MessagePattern. 467 * @return the substring associated with part. 468 * @stable ICU 4.8 469 */ getSubstring(Part part)470 public String getSubstring(Part part) { 471 int index=part.index; 472 return msg.substring(index, index+part.length); 473 } 474 475 /** 476 * Compares the part's substring with the input string s. 477 * @param part a part of this MessagePattern. 478 * @param s a string. 479 * @return true if getSubstring(part).equals(s). 480 * @stable ICU 4.8 481 */ partSubstringMatches(Part part, String s)482 public boolean partSubstringMatches(Part part, String s) { 483 return msg.regionMatches(part.index, s, 0, part.length); 484 } 485 486 /** 487 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 488 * @param part a part of this MessagePattern. 489 * @return the part's numeric value, or NO_NUMERIC_VALUE if this is not a numeric part. 490 * @stable ICU 4.8 491 */ getNumericValue(Part part)492 public double getNumericValue(Part part) { 493 Part.Type type=part.type; 494 if(type==Part.Type.ARG_INT) { 495 return part.value; 496 } else if(type==Part.Type.ARG_DOUBLE) { 497 return numericValues.get(part.value); 498 } else { 499 return NO_NUMERIC_VALUE; 500 } 501 } 502 503 /** 504 * Special value that is returned by getNumericValue(Part) when no 505 * numeric value is defined for a part. 506 * @see #getNumericValue 507 * @stable ICU 4.8 508 */ 509 public static final double NO_NUMERIC_VALUE=-123456789; 510 511 /** 512 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 513 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 514 * @return the "offset:" value. 515 * @throws IndexOutOfBoundsException if pluralStart is outside the (0..countParts()-1) range 516 * @stable ICU 4.8 517 */ getPluralOffset(int pluralStart)518 public double getPluralOffset(int pluralStart) { 519 Part part=parts.get(pluralStart); 520 if(part.type.hasNumericValue()) { 521 return getNumericValue(part); 522 } else { 523 return 0; 524 } 525 } 526 527 /** 528 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 529 * @param start The index of some Part data (0..countParts()-1); 530 * this Part should be of Type ARG_START or MSG_START. 531 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 532 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 533 * @throws IndexOutOfBoundsException if start is outside the (0..countParts()-1) range 534 * @stable ICU 4.8 535 */ getLimitPartIndex(int start)536 public int getLimitPartIndex(int start) { 537 int limit=parts.get(start).limitPartIndex; 538 if(limit<start) { 539 return start; 540 } 541 return limit; 542 } 543 544 /** 545 * A message pattern "part", representing a pattern parsing event. 546 * There is a part for the start and end of a message or argument, 547 * for quoting and escaping of and with ASCII apostrophes, 548 * and for syntax elements of "complex" arguments. 549 * @stable ICU 4.8 550 */ 551 public static final class Part { Part(Type t, int i, int l, int v)552 private Part(Type t, int i, int l, int v) { 553 type=t; 554 index=i; 555 length=(char)l; 556 value=(short)v; 557 } 558 559 /** 560 * Returns the type of this part. 561 * @return the part type. 562 * @stable ICU 4.8 563 */ getType()564 public Type getType() { 565 return type; 566 } 567 568 /** 569 * Returns the pattern string index associated with this Part. 570 * @return this part's pattern string index. 571 * @stable ICU 4.8 572 */ getIndex()573 public int getIndex() { 574 return index; 575 } 576 577 /** 578 * Returns the length of the pattern substring associated with this Part. 579 * This is 0 for some parts. 580 * @return this part's pattern substring length. 581 * @stable ICU 4.8 582 */ getLength()583 public int getLength() { 584 return length; 585 } 586 587 /** 588 * Returns the pattern string limit (exclusive-end) index associated with this Part. 589 * Convenience method for getIndex()+getLength(). 590 * @return this part's pattern string limit index, same as getIndex()+getLength(). 591 * @stable ICU 4.8 592 */ getLimit()593 public int getLimit() { 594 return index+length; 595 } 596 597 /** 598 * Returns a value associated with this part. 599 * See the documentation of each part type for details. 600 * @return the part value. 601 * @stable ICU 4.8 602 */ getValue()603 public int getValue() { 604 return value; 605 } 606 607 /** 608 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 609 * otherwise ArgType.NONE. 610 * @return the argument type for this part. 611 * @stable ICU 4.8 612 */ getArgType()613 public ArgType getArgType() { 614 Type type=getType(); 615 if(type==Type.ARG_START || type==Type.ARG_LIMIT) { 616 return argTypes[value]; 617 } else { 618 return ArgType.NONE; 619 } 620 } 621 622 /** 623 * Part type constants. 624 * @stable ICU 4.8 625 */ 626 public enum Type { 627 /** 628 * Start of a message pattern (main or nested). 629 * The length is 0 for the top-level message 630 * and for a choice argument sub-message, otherwise 1 for the '{'. 631 * The value indicates the nesting level, starting with 0 for the main message. 632 * <p> 633 * There is always a later MSG_LIMIT part. 634 * @stable ICU 4.8 635 */ 636 MSG_START, 637 /** 638 * End of a message pattern (main or nested). 639 * The length is 0 for the top-level message and 640 * the last sub-message of a choice argument, 641 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 642 * The value indicates the nesting level, starting with 0 for the main message. 643 * @stable ICU 4.8 644 */ 645 MSG_LIMIT, 646 /** 647 * Indicates a substring of the pattern string which is to be skipped when formatting. 648 * For example, an apostrophe that begins or ends quoted text 649 * would be indicated with such a part. 650 * The value is undefined and currently always 0. 651 * @stable ICU 4.8 652 */ 653 SKIP_SYNTAX, 654 /** 655 * Indicates that a syntax character needs to be inserted for auto-quoting. 656 * The length is 0. 657 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 658 * @stable ICU 4.8 659 */ 660 INSERT_CHAR, 661 /** 662 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 663 * When formatting, replace this part's substring with the 664 * (value-offset) for the plural argument value. 665 * The value is undefined and currently always 0. 666 * @stable ICU 4.8 667 */ 668 REPLACE_NUMBER, 669 /** 670 * Start of an argument. 671 * The length is 1 for the '{'. 672 * The value is the ordinal value of the ArgType. Use getArgType(). 673 * <p> 674 * This part is followed by either an ARG_NUMBER or ARG_NAME, 675 * followed by optional argument sub-parts (see ArgType constants) 676 * and finally an ARG_LIMIT part. 677 * @stable ICU 4.8 678 */ 679 ARG_START, 680 /** 681 * End of an argument. 682 * The length is 1 for the '}'. 683 * The value is the ordinal value of the ArgType. Use getArgType(). 684 * @stable ICU 4.8 685 */ 686 ARG_LIMIT, 687 /** 688 * The argument number, provided by the value. 689 * @stable ICU 4.8 690 */ 691 ARG_NUMBER, 692 /** 693 * The argument name. 694 * The value is undefined and currently always 0. 695 * @stable ICU 4.8 696 */ 697 ARG_NAME, 698 /** 699 * The argument type. 700 * The value is undefined and currently always 0. 701 * @stable ICU 4.8 702 */ 703 ARG_TYPE, 704 /** 705 * The argument style text. 706 * The value is undefined and currently always 0. 707 * @stable ICU 4.8 708 */ 709 ARG_STYLE, 710 /** 711 * A selector substring in a "complex" argument style. 712 * The value is undefined and currently always 0. 713 * @stable ICU 4.8 714 */ 715 ARG_SELECTOR, 716 /** 717 * An integer value, for example the offset or an explicit selector value 718 * in a PluralFormat style. 719 * The part value is the integer value. 720 * @stable ICU 4.8 721 */ 722 ARG_INT, 723 /** 724 * A numeric value, for example the offset or an explicit selector value 725 * in a PluralFormat style. 726 * The part value is an index into an internal array of numeric values; 727 * use getNumericValue(). 728 * @stable ICU 4.8 729 */ 730 ARG_DOUBLE; 731 732 /** 733 * Indicates whether this part has a numeric value. 734 * If so, then that numeric value can be retrieved via {@link MessagePattern#getNumericValue(Part)}. 735 * @return true if this part has a numeric value. 736 * @stable ICU 4.8 737 */ hasNumericValue()738 public boolean hasNumericValue() { 739 return this==ARG_INT || this==ARG_DOUBLE; 740 } 741 } 742 743 /** 744 * @return a string representation of this part. 745 * @stable ICU 4.8 746 */ 747 @Override toString()748 public String toString() { 749 String valueString=(type==Type.ARG_START || type==Type.ARG_LIMIT) ? 750 getArgType().name() : Integer.toString(value); 751 return type.name()+"("+valueString+")@"+index; 752 } 753 754 /** 755 * @param other another object to compare with. 756 * @return true if this object is equivalent to the other one. 757 * @stable ICU 4.8 758 */ 759 @Override equals(Object other)760 public boolean equals(Object other) { 761 if(this==other) { 762 return true; 763 } 764 if(other==null || getClass()!=other.getClass()) { 765 return false; 766 } 767 Part o=(Part)other; 768 return 769 type.equals(o.type) && 770 index==o.index && 771 length==o.length && 772 value==o.value && 773 limitPartIndex==o.limitPartIndex; 774 } 775 776 /** 777 * {@inheritDoc} 778 * @stable ICU 4.8 779 */ 780 @Override hashCode()781 public int hashCode() { 782 return ((type.hashCode()*37+index)*37+length)*37+value; 783 } 784 785 private static final int MAX_LENGTH=0xffff; 786 private static final int MAX_VALUE=Short.MAX_VALUE; 787 788 // Some fields are not final because they are modified during pattern parsing. 789 // After pattern parsing, the parts are effectively immutable. 790 private final Type type; 791 private final int index; 792 private final char length; 793 private short value; 794 private int limitPartIndex; 795 } 796 797 /** 798 * Argument type constants. 799 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 800 * 801 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 802 * with a nesting level one greater than the surrounding message. 803 * @stable ICU 4.8 804 */ 805 public enum ArgType { 806 /** 807 * The argument has no specified type. 808 * @stable ICU 4.8 809 */ 810 NONE, 811 /** 812 * The argument has a "simple" type which is provided by the ARG_TYPE part. 813 * An ARG_STYLE part might follow that. 814 * @stable ICU 4.8 815 */ 816 SIMPLE, 817 /** 818 * The argument is a ChoiceFormat with one or more 819 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 820 * @stable ICU 4.8 821 */ 822 CHOICE, 823 /** 824 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 825 * (e.g., offset:1) 826 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 827 * If the selector has an explicit value (e.g., =2), then 828 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 829 * Otherwise the message immediately follows the ARG_SELECTOR. 830 * @stable ICU 4.8 831 */ 832 PLURAL, 833 /** 834 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 835 * @stable ICU 4.8 836 */ 837 SELECT, 838 /** 839 * The argument is an ordinal-number PluralFormat 840 * with the same style parts sequence and semantics as {@link ArgType#PLURAL}. 841 * @stable ICU 50 842 */ 843 SELECTORDINAL; 844 845 /** 846 * @return true if the argument type has a plural style part sequence and semantics, 847 * for example {@link ArgType#PLURAL} and {@link ArgType#SELECTORDINAL}. 848 * @stable ICU 50 849 */ hasPluralStyle()850 public boolean hasPluralStyle() { 851 return this == PLURAL || this == SELECTORDINAL; 852 } 853 } 854 855 /** 856 * Creates and returns a copy of this object. 857 * @return a copy of this object (or itself if frozen). 858 * @stable ICU 4.8 859 */ 860 @Override clone()861 public Object clone() { 862 if(isFrozen()) { 863 return this; 864 } else { 865 return cloneAsThawed(); 866 } 867 } 868 869 /** 870 * Creates and returns an unfrozen copy of this object. 871 * @return a copy of this object. 872 * @stable ICU 4.8 873 */ 874 @SuppressWarnings("unchecked") cloneAsThawed()875 public MessagePattern cloneAsThawed() { 876 MessagePattern newMsg; 877 try { 878 newMsg=(MessagePattern)super.clone(); 879 } catch (CloneNotSupportedException e) { 880 throw new ICUCloneNotSupportedException(e); 881 } 882 newMsg.parts=(ArrayList<Part>)parts.clone(); 883 if(numericValues!=null) { 884 newMsg.numericValues=(ArrayList<Double>)numericValues.clone(); 885 } 886 newMsg.frozen=false; 887 return newMsg; 888 } 889 890 /** 891 * Freezes this object, making it immutable and thread-safe. 892 * @return this 893 * @stable ICU 4.8 894 */ freeze()895 public MessagePattern freeze() { 896 frozen=true; 897 return this; 898 } 899 900 /** 901 * Determines whether this object is frozen (immutable) or not. 902 * @return true if this object is frozen. 903 * @stable ICU 4.8 904 */ isFrozen()905 public boolean isFrozen() { 906 return frozen; 907 } 908 preParse(String pattern)909 private void preParse(String pattern) { 910 if(isFrozen()) { 911 throw new UnsupportedOperationException( 912 "Attempt to parse("+prefix(pattern)+") on frozen MessagePattern instance."); 913 } 914 msg=pattern; 915 hasArgNames=hasArgNumbers=false; 916 needsAutoQuoting=false; 917 parts.clear(); 918 if(numericValues!=null) { 919 numericValues.clear(); 920 } 921 } 922 postParse()923 private void postParse() { 924 // Nothing to be done currently. 925 } 926 parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType)927 private int parseMessage(int index, int msgStartLength, int nestingLevel, ArgType parentType) { 928 if(nestingLevel>Part.MAX_VALUE) { 929 throw new IndexOutOfBoundsException(); 930 } 931 int msgStart=parts.size(); 932 addPart(Part.Type.MSG_START, index, msgStartLength, nestingLevel); 933 index+=msgStartLength; 934 while(index<msg.length()) { 935 char c=msg.charAt(index++); 936 if(c=='\'') { 937 if(index==msg.length()) { 938 // The apostrophe is the last character in the pattern. 939 // Add a Part for auto-quoting. 940 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 941 needsAutoQuoting=true; 942 } else { 943 c=msg.charAt(index); 944 if(c=='\'') { 945 // double apostrophe, skip the second one 946 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 947 } else if( 948 aposMode==ApostropheMode.DOUBLE_REQUIRED || 949 c=='{' || c=='}' || 950 (parentType==ArgType.CHOICE && c=='|') || 951 (parentType.hasPluralStyle() && c=='#') 952 ) { 953 // skip the quote-starting apostrophe 954 addPart(Part.Type.SKIP_SYNTAX, index-1, 1, 0); 955 // find the end of the quoted literal text 956 for(;;) { 957 index=msg.indexOf('\'', index+1); 958 if(index>=0) { 959 if((index+1)<msg.length() && msg.charAt(index+1)=='\'') { 960 // double apostrophe inside quoted literal text 961 // still encodes a single apostrophe, skip the second one 962 addPart(Part.Type.SKIP_SYNTAX, ++index, 1, 0); 963 } else { 964 // skip the quote-ending apostrophe 965 addPart(Part.Type.SKIP_SYNTAX, index++, 1, 0); 966 break; 967 } 968 } else { 969 // The quoted text reaches to the end of the of the message. 970 index=msg.length(); 971 // Add a Part for auto-quoting. 972 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 973 needsAutoQuoting=true; 974 break; 975 } 976 } 977 } else { 978 // Interpret the apostrophe as literal text. 979 // Add a Part for auto-quoting. 980 addPart(Part.Type.INSERT_CHAR, index, 0, '\''); // value=char to be inserted 981 needsAutoQuoting=true; 982 } 983 } 984 } else if(parentType.hasPluralStyle() && c=='#') { 985 // The unquoted # in a plural message fragment will be replaced 986 // with the (number-offset). 987 addPart(Part.Type.REPLACE_NUMBER, index-1, 1, 0); 988 } else if(c=='{') { 989 index=parseArg(index-1, 1, nestingLevel); 990 } else if((nestingLevel>0 && c=='}') || (parentType==ArgType.CHOICE && c=='|')) { 991 // Finish the message before the terminator. 992 // In a choice style, report the "}" substring only for the following ARG_LIMIT, 993 // not for this MSG_LIMIT. 994 int limitLength=(parentType==ArgType.CHOICE && c=='}') ? 0 : 1; 995 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index-1, limitLength, nestingLevel); 996 if(parentType==ArgType.CHOICE) { 997 // Let the choice style parser see the '}' or '|'. 998 return index-1; 999 } else { 1000 // continue parsing after the '}' 1001 return index; 1002 } 1003 } // else: c is part of literal text 1004 } 1005 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { 1006 throw new IllegalArgumentException( 1007 "Unmatched '{' braces in message "+prefix()); 1008 } 1009 addLimitPart(msgStart, Part.Type.MSG_LIMIT, index, 0, nestingLevel); 1010 return index; 1011 } 1012 parseArg(int index, int argStartLength, int nestingLevel)1013 private int parseArg(int index, int argStartLength, int nestingLevel) { 1014 int argStart=parts.size(); 1015 ArgType argType=ArgType.NONE; 1016 addPart(Part.Type.ARG_START, index, argStartLength, argType.ordinal()); 1017 int nameIndex=index=skipWhiteSpace(index+argStartLength); 1018 if(index==msg.length()) { 1019 throw new IllegalArgumentException( 1020 "Unmatched '{' braces in message "+prefix()); 1021 } 1022 // parse argument name or number 1023 index=skipIdentifier(index); 1024 int number=parseArgNumber(nameIndex, index); 1025 if(number>=0) { 1026 int length=index-nameIndex; 1027 if(length>Part.MAX_LENGTH || number>Part.MAX_VALUE) { 1028 throw new IndexOutOfBoundsException( 1029 "Argument number too large: "+prefix(nameIndex)); 1030 } 1031 hasArgNumbers=true; 1032 addPart(Part.Type.ARG_NUMBER, nameIndex, length, number); 1033 } else if(number==ARG_NAME_NOT_NUMBER) { 1034 int length=index-nameIndex; 1035 if(length>Part.MAX_LENGTH) { 1036 throw new IndexOutOfBoundsException( 1037 "Argument name too long: "+prefix(nameIndex)); 1038 } 1039 hasArgNames=true; 1040 addPart(Part.Type.ARG_NAME, nameIndex, length, 0); 1041 } else { // number<-1 (ARG_NAME_NOT_VALID) 1042 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1043 } 1044 index=skipWhiteSpace(index); 1045 if(index==msg.length()) { 1046 throw new IllegalArgumentException( 1047 "Unmatched '{' braces in message "+prefix()); 1048 } 1049 char c=msg.charAt(index); 1050 if(c=='}') { 1051 // all done 1052 } else if(c!=',') { 1053 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1054 } else /* ',' */ { 1055 // parse argument type: case-sensitive a-zA-Z 1056 int typeIndex=index=skipWhiteSpace(index+1); 1057 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { 1058 ++index; 1059 } 1060 int length=index-typeIndex; 1061 index=skipWhiteSpace(index); 1062 if(index==msg.length()) { 1063 throw new IllegalArgumentException( 1064 "Unmatched '{' braces in message "+prefix()); 1065 } 1066 if(length==0 || ((c=msg.charAt(index))!=',' && c!='}')) { 1067 throw new IllegalArgumentException("Bad argument syntax: "+prefix(nameIndex)); 1068 } 1069 if(length>Part.MAX_LENGTH) { 1070 throw new IndexOutOfBoundsException( 1071 "Argument type name too long: "+prefix(nameIndex)); 1072 } 1073 argType=ArgType.SIMPLE; 1074 if(length==6) { 1075 // case-insensitive comparisons for complex-type names 1076 if(isChoice(typeIndex)) { 1077 argType=ArgType.CHOICE; 1078 } else if(isPlural(typeIndex)) { 1079 argType=ArgType.PLURAL; 1080 } else if(isSelect(typeIndex)) { 1081 argType=ArgType.SELECT; 1082 } 1083 } else if(length==13) { 1084 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { 1085 argType=ArgType.SELECTORDINAL; 1086 } 1087 } 1088 // change the ARG_START type from NONE to argType 1089 parts.get(argStart).value=(short)argType.ordinal(); 1090 if(argType==ArgType.SIMPLE) { 1091 addPart(Part.Type.ARG_TYPE, typeIndex, length, 0); 1092 } 1093 // look for an argument style (pattern) 1094 if(c=='}') { 1095 if(argType!=ArgType.SIMPLE) { 1096 throw new IllegalArgumentException( 1097 "No style field for complex argument: "+prefix(nameIndex)); 1098 } 1099 } else /* ',' */ { 1100 ++index; 1101 if(argType==ArgType.SIMPLE) { 1102 index=parseSimpleStyle(index); 1103 } else if(argType==ArgType.CHOICE) { 1104 index=parseChoiceStyle(index, nestingLevel); 1105 } else { 1106 index=parsePluralOrSelectStyle(argType, index, nestingLevel); 1107 } 1108 } 1109 } 1110 // Argument parsing stopped on the '}'. 1111 addLimitPart(argStart, Part.Type.ARG_LIMIT, index, 1, argType.ordinal()); 1112 return index+1; 1113 } 1114 parseSimpleStyle(int index)1115 private int parseSimpleStyle(int index) { 1116 int start=index; 1117 int nestedBraces=0; 1118 while(index<msg.length()) { 1119 char c=msg.charAt(index++); 1120 if(c=='\'') { 1121 // Treat apostrophe as quoting but include it in the style part. 1122 // Find the end of the quoted literal text. 1123 index=msg.indexOf('\'', index); 1124 if(index<0) { 1125 throw new IllegalArgumentException( 1126 "Quoted literal argument style text reaches to the end of the message: "+ 1127 prefix(start)); 1128 } 1129 // skip the quote-ending apostrophe 1130 ++index; 1131 } else if(c=='{') { 1132 ++nestedBraces; 1133 } else if(c=='}') { 1134 if(nestedBraces>0) { 1135 --nestedBraces; 1136 } else { 1137 int length=--index-start; 1138 if(length>Part.MAX_LENGTH) { 1139 throw new IndexOutOfBoundsException( 1140 "Argument style text too long: "+prefix(start)); 1141 } 1142 addPart(Part.Type.ARG_STYLE, start, length, 0); 1143 return index; 1144 } 1145 } // c is part of literal text 1146 } 1147 throw new IllegalArgumentException( 1148 "Unmatched '{' braces in message "+prefix()); 1149 } 1150 parseChoiceStyle(int index, int nestingLevel)1151 private int parseChoiceStyle(int index, int nestingLevel) { 1152 int start=index; 1153 index=skipWhiteSpace(index); 1154 if(index==msg.length() || msg.charAt(index)=='}') { 1155 throw new IllegalArgumentException( 1156 "Missing choice argument pattern in "+prefix()); 1157 } 1158 for(;;) { 1159 // The choice argument style contains |-separated (number, separator, message) triples. 1160 // Parse the number. 1161 int numberIndex=index; 1162 index=skipDouble(index); 1163 int length=index-numberIndex; 1164 if(length==0) { 1165 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1166 } 1167 if(length>Part.MAX_LENGTH) { 1168 throw new IndexOutOfBoundsException( 1169 "Choice number too long: "+prefix(numberIndex)); 1170 } 1171 parseDouble(numberIndex, index, true); // adds ARG_INT or ARG_DOUBLE 1172 // Parse the separator. 1173 index=skipWhiteSpace(index); 1174 if(index==msg.length()) { 1175 throw new IllegalArgumentException("Bad choice pattern syntax: "+prefix(start)); 1176 } 1177 char c=msg.charAt(index); 1178 if(!(c=='#' || c=='<' || c=='\u2264')) { // U+2264 is <= 1179 throw new IllegalArgumentException( 1180 "Expected choice separator (#<\u2264) instead of '"+c+ 1181 "' in choice pattern "+prefix(start)); 1182 } 1183 addPart(Part.Type.ARG_SELECTOR, index, 1, 0); 1184 // Parse the message fragment. 1185 index=parseMessage(++index, 0, nestingLevel+1, ArgType.CHOICE); 1186 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). 1187 if(index==msg.length()) { 1188 return index; 1189 } 1190 if(msg.charAt(index)=='}') { 1191 if(!inMessageFormatPattern(nestingLevel)) { 1192 throw new IllegalArgumentException( 1193 "Bad choice pattern syntax: "+prefix(start)); 1194 } 1195 return index; 1196 } // else the terminator is '|' 1197 index=skipWhiteSpace(index+1); 1198 } 1199 } 1200 parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel)1201 private int parsePluralOrSelectStyle(ArgType argType, int index, int nestingLevel) { 1202 int start=index; 1203 boolean isEmpty=true; 1204 boolean hasOther=false; 1205 for(;;) { 1206 // First, collect the selector looking for a small set of terminators. 1207 // It would be a little faster to consider the syntax of each possible 1208 // token right here, but that makes the code too complicated. 1209 index=skipWhiteSpace(index); 1210 boolean eos=index==msg.length(); 1211 if(eos || msg.charAt(index)=='}') { 1212 if(eos==inMessageFormatPattern(nestingLevel)) { 1213 throw new IllegalArgumentException( 1214 "Bad "+ 1215 argType.toString().toLowerCase(Locale.ENGLISH)+ 1216 " pattern syntax: "+prefix(start)); 1217 } 1218 if(!hasOther) { 1219 throw new IllegalArgumentException( 1220 "Missing 'other' keyword in "+ 1221 argType.toString().toLowerCase(Locale.ENGLISH)+ 1222 " pattern in "+prefix()); 1223 } 1224 return index; 1225 } 1226 int selectorIndex=index; 1227 if(argType.hasPluralStyle() && msg.charAt(selectorIndex)=='=') { 1228 // explicit-value plural selector: =double 1229 index=skipDouble(index+1); 1230 int length=index-selectorIndex; 1231 if(length==1) { 1232 throw new IllegalArgumentException( 1233 "Bad "+ 1234 argType.toString().toLowerCase(Locale.ENGLISH)+ 1235 " pattern syntax: "+prefix(start)); 1236 } 1237 if(length>Part.MAX_LENGTH) { 1238 throw new IndexOutOfBoundsException( 1239 "Argument selector too long: "+prefix(selectorIndex)); 1240 } 1241 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1242 parseDouble(selectorIndex+1, index, false); // adds ARG_INT or ARG_DOUBLE 1243 } else { 1244 index=skipIdentifier(index); 1245 int length=index-selectorIndex; 1246 if(length==0) { 1247 throw new IllegalArgumentException( 1248 "Bad "+ 1249 argType.toString().toLowerCase(Locale.ENGLISH)+ 1250 " pattern syntax: "+prefix(start)); 1251 } 1252 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. 1253 if( argType.hasPluralStyle() && length==6 && index<msg.length() && 1254 msg.regionMatches(selectorIndex, "offset:", 0, 7) 1255 ) { 1256 // plural offset, not a selector 1257 if(!isEmpty) { 1258 throw new IllegalArgumentException( 1259 "Plural argument 'offset:' (if present) must precede key-message pairs: "+ 1260 prefix(start)); 1261 } 1262 // allow whitespace between offset: and its value 1263 int valueIndex=skipWhiteSpace(index+1); // The ':' is at index. 1264 index=skipDouble(valueIndex); 1265 if(index==valueIndex) { 1266 throw new IllegalArgumentException( 1267 "Missing value for plural 'offset:' "+prefix(start)); 1268 } 1269 if((index-valueIndex)>Part.MAX_LENGTH) { 1270 throw new IndexOutOfBoundsException( 1271 "Plural offset value too long: "+prefix(valueIndex)); 1272 } 1273 parseDouble(valueIndex, index, false); // adds ARG_INT or ARG_DOUBLE 1274 isEmpty=false; 1275 continue; // no message fragment after the offset 1276 } else { 1277 // normal selector word 1278 if(length>Part.MAX_LENGTH) { 1279 throw new IndexOutOfBoundsException( 1280 "Argument selector too long: "+prefix(selectorIndex)); 1281 } 1282 addPart(Part.Type.ARG_SELECTOR, selectorIndex, length, 0); 1283 if(msg.regionMatches(selectorIndex, "other", 0, length)) { 1284 hasOther=true; 1285 } 1286 } 1287 } 1288 1289 // parse the message fragment following the selector 1290 index=skipWhiteSpace(index); 1291 if(index==msg.length() || msg.charAt(index)!='{') { 1292 throw new IllegalArgumentException( 1293 "No message fragment after "+ 1294 argType.toString().toLowerCase(Locale.ENGLISH)+ 1295 " selector: "+prefix(selectorIndex)); 1296 } 1297 index=parseMessage(index, 1, nestingLevel+1, argType); 1298 isEmpty=false; 1299 } 1300 } 1301 1302 /** 1303 * Validates and parses an argument name or argument number string. 1304 * This internal method assumes that the input substring is a "pattern identifier". 1305 * @return >=0 if the name is a valid number, 1306 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 1307 * ARG_NAME_NOT_VALID (-2) if it is neither. 1308 * @see #validateArgumentName(String) 1309 */ parseArgNumber(CharSequence s, int start, int limit)1310 private static int parseArgNumber(CharSequence s, int start, int limit) { 1311 // If the identifier contains only ASCII digits, then it is an argument _number_ 1312 // and must not have leading zeros (except "0" itself). 1313 // Otherwise it is an argument _name_. 1314 if(start>=limit) { 1315 return ARG_NAME_NOT_VALID; 1316 } 1317 int number; 1318 // Defer numeric errors until we know there are only digits. 1319 boolean badNumber; 1320 char c=s.charAt(start++); 1321 if(c=='0') { 1322 if(start==limit) { 1323 return 0; 1324 } else { 1325 number=0; 1326 badNumber=true; // leading zero 1327 } 1328 } else if('1'<=c && c<='9') { 1329 number=c-'0'; 1330 badNumber=false; 1331 } else { 1332 return ARG_NAME_NOT_NUMBER; 1333 } 1334 while(start<limit) { 1335 c=s.charAt(start++); 1336 if('0'<=c && c<='9') { 1337 if(number>=Integer.MAX_VALUE/10) { 1338 badNumber=true; // overflow 1339 } 1340 number=number*10+(c-'0'); 1341 } else { 1342 return ARG_NAME_NOT_NUMBER; 1343 } 1344 } 1345 // There are only ASCII digits. 1346 if(badNumber) { 1347 return ARG_NAME_NOT_VALID; 1348 } else { 1349 return number; 1350 } 1351 } 1352 parseArgNumber(int start, int limit)1353 private int parseArgNumber(int start, int limit) { 1354 return parseArgNumber(msg, start, limit); 1355 } 1356 1357 /** 1358 * Parses a number from the specified message substring. 1359 * @param start start index into the message string 1360 * @param limit limit index into the message string, must be start<limit 1361 * @param allowInfinity true if U+221E is allowed (for ChoiceFormat) 1362 */ parseDouble(int start, int limit, boolean allowInfinity)1363 private void parseDouble(int start, int limit, boolean allowInfinity) { 1364 assert start<limit; 1365 // fake loop for easy exit and single throw statement 1366 for(;;) { 1367 // fast path for small integers and infinity 1368 int value=0; 1369 int isNegative=0; // not boolean so that we can easily add it to value 1370 int index=start; 1371 char c=msg.charAt(index++); 1372 if(c=='-') { 1373 isNegative=1; 1374 if(index==limit) { 1375 break; // no number 1376 } 1377 c=msg.charAt(index++); 1378 } else if(c=='+') { 1379 if(index==limit) { 1380 break; // no number 1381 } 1382 c=msg.charAt(index++); 1383 } 1384 if(c==0x221e) { // infinity 1385 if(allowInfinity && index==limit) { 1386 addArgDoublePart( 1387 isNegative!=0 ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY, 1388 start, limit-start); 1389 return; 1390 } else { 1391 break; 1392 } 1393 } 1394 // try to parse the number as a small integer but fall back to a double 1395 while('0'<=c && c<='9') { 1396 value=value*10+(c-'0'); 1397 if(value>(Part.MAX_VALUE+isNegative)) { 1398 break; // not a small-enough integer 1399 } 1400 if(index==limit) { 1401 addPart(Part.Type.ARG_INT, start, limit-start, isNegative!=0 ? -value : value); 1402 return; 1403 } 1404 c=msg.charAt(index++); 1405 } 1406 // Let Double.parseDouble() throw a NumberFormatException. 1407 double numericValue=Double.parseDouble(msg.substring(start, limit)); 1408 addArgDoublePart(numericValue, start, limit-start); 1409 return; 1410 } 1411 throw new NumberFormatException( 1412 "Bad syntax for numeric value: "+msg.substring(start, limit)); 1413 } 1414 1415 /** 1416 * Appends the s[start, limit[ substring to sb, but with only half of the apostrophes 1417 * according to JDK pattern behavior. 1418 * @internal 1419 */ 1420 /* package */ static void appendReducedApostrophes(String s, int start, int limit, 1421 StringBuilder sb) { 1422 int doubleApos=-1; 1423 for(;;) { 1424 int i=s.indexOf('\'', start); 1425 if(i<0 || i>=limit) { 1426 sb.append(s, start, limit); 1427 break; 1428 } 1429 if(i==doubleApos) { 1430 // Double apostrophe at start-1 and start==i, append one. 1431 sb.append('\''); 1432 ++start; 1433 doubleApos=-1; 1434 } else { 1435 // Append text between apostrophes and skip this one. 1436 sb.append(s, start, i); 1437 doubleApos=start=i+1; 1438 } 1439 } 1440 } 1441 1442 private int skipWhiteSpace(int index) { 1443 return PatternProps.skipWhiteSpace(msg, index); 1444 } 1445 1446 private int skipIdentifier(int index) { 1447 return PatternProps.skipIdentifier(msg, index); 1448 } 1449 1450 /** 1451 * Skips a sequence of characters that could occur in a double value. 1452 * Does not fully parse or validate the value. 1453 */ 1454 private int skipDouble(int index) { 1455 while(index<msg.length()) { 1456 char c=msg.charAt(index); 1457 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. 1458 if((c<'0' && "+-.".indexOf(c)<0) || (c>'9' && c!='e' && c!='E' && c!=0x221e)) { 1459 break; 1460 } 1461 ++index; 1462 } 1463 return index; 1464 } 1465 1466 private static boolean isArgTypeChar(int c) { 1467 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 1468 } 1469 1470 private boolean isChoice(int index) { 1471 char c; 1472 return 1473 ((c=msg.charAt(index++))=='c' || c=='C') && 1474 ((c=msg.charAt(index++))=='h' || c=='H') && 1475 ((c=msg.charAt(index++))=='o' || c=='O') && 1476 ((c=msg.charAt(index++))=='i' || c=='I') && 1477 ((c=msg.charAt(index++))=='c' || c=='C') && 1478 ((c=msg.charAt(index))=='e' || c=='E'); 1479 } 1480 1481 private boolean isPlural(int index) { 1482 char c; 1483 return 1484 ((c=msg.charAt(index++))=='p' || c=='P') && 1485 ((c=msg.charAt(index++))=='l' || c=='L') && 1486 ((c=msg.charAt(index++))=='u' || c=='U') && 1487 ((c=msg.charAt(index++))=='r' || c=='R') && 1488 ((c=msg.charAt(index++))=='a' || c=='A') && 1489 ((c=msg.charAt(index))=='l' || c=='L'); 1490 } 1491 1492 private boolean isSelect(int index) { 1493 char c; 1494 return 1495 ((c=msg.charAt(index++))=='s' || c=='S') && 1496 ((c=msg.charAt(index++))=='e' || c=='E') && 1497 ((c=msg.charAt(index++))=='l' || c=='L') && 1498 ((c=msg.charAt(index++))=='e' || c=='E') && 1499 ((c=msg.charAt(index++))=='c' || c=='C') && 1500 ((c=msg.charAt(index))=='t' || c=='T'); 1501 } 1502 1503 private boolean isOrdinal(int index) { 1504 char c; 1505 return 1506 ((c=msg.charAt(index++))=='o' || c=='O') && 1507 ((c=msg.charAt(index++))=='r' || c=='R') && 1508 ((c=msg.charAt(index++))=='d' || c=='D') && 1509 ((c=msg.charAt(index++))=='i' || c=='I') && 1510 ((c=msg.charAt(index++))=='n' || c=='N') && 1511 ((c=msg.charAt(index++))=='a' || c=='A') && 1512 ((c=msg.charAt(index))=='l' || c=='L'); 1513 } 1514 1515 /** 1516 * @return true if we are inside a MessageFormat (sub-)pattern, 1517 * as opposed to inside a top-level choice/plural/select pattern. 1518 */ 1519 private boolean inMessageFormatPattern(int nestingLevel) { 1520 return nestingLevel>0 || parts.get(0).type==Part.Type.MSG_START; 1521 } 1522 1523 /** 1524 * @return true if we are in a MessageFormat sub-pattern 1525 * of a top-level ChoiceFormat pattern. 1526 */ 1527 private boolean inTopLevelChoiceMessage(int nestingLevel, ArgType parentType) { 1528 return 1529 nestingLevel==1 && 1530 parentType==ArgType.CHOICE && 1531 parts.get(0).type!=Part.Type.MSG_START; 1532 } 1533 1534 private void addPart(Part.Type type, int index, int length, int value) { 1535 parts.add(new Part(type, index, length, value)); 1536 } 1537 1538 private void addLimitPart(int start, Part.Type type, int index, int length, int value) { 1539 parts.get(start).limitPartIndex=parts.size(); 1540 addPart(type, index, length, value); 1541 } 1542 1543 private void addArgDoublePart(double numericValue, int start, int length) { 1544 int numericIndex; 1545 if(numericValues==null) { 1546 numericValues=new ArrayList<Double>(); 1547 numericIndex=0; 1548 } else { 1549 numericIndex=numericValues.size(); 1550 if(numericIndex>Part.MAX_VALUE) { 1551 throw new IndexOutOfBoundsException("Too many numeric values"); 1552 } 1553 } 1554 numericValues.add(numericValue); 1555 addPart(Part.Type.ARG_DOUBLE, start, length, numericIndex); 1556 } 1557 1558 private static final int MAX_PREFIX_LENGTH=24; 1559 1560 /** 1561 * Returns a prefix of s.substring(start). Used for Exception messages. 1562 * @param s 1563 * @param start start index in s 1564 * @return s.substring(start) or a prefix of that 1565 */ 1566 private static String prefix(String s, int start) { 1567 StringBuilder prefix=new StringBuilder(MAX_PREFIX_LENGTH+20); 1568 if(start==0) { 1569 prefix.append("\""); 1570 } else { 1571 prefix.append("[at pattern index ").append(start).append("] \""); 1572 } 1573 int substringLength=s.length()-start; 1574 if(substringLength<=MAX_PREFIX_LENGTH) { 1575 prefix.append(start==0 ? s : s.substring(start)); 1576 } else { 1577 int limit=start+MAX_PREFIX_LENGTH-4; 1578 if(Character.isHighSurrogate(s.charAt(limit-1))) { 1579 // remove lead surrogate from the end of the prefix 1580 --limit; 1581 } 1582 prefix.append(s, start, limit).append(" ..."); 1583 } 1584 return prefix.append("\"").toString(); 1585 } 1586 1587 private static String prefix(String s) { 1588 return prefix(s, 0); 1589 } 1590 1591 private String prefix(int start) { 1592 return prefix(msg, start); 1593 } 1594 1595 private String prefix() { 1596 return prefix(msg, 0); 1597 } 1598 1599 private ApostropheMode aposMode; 1600 private String msg; 1601 private ArrayList<Part> parts=new ArrayList<Part>(); 1602 private ArrayList<Double> numericValues; 1603 private boolean hasArgNames; 1604 private boolean hasArgNumbers; 1605 private boolean needsAutoQuoting; 1606 private boolean frozen; 1607 1608 private static final ApostropheMode defaultAposMode= 1609 ApostropheMode.valueOf( 1610 ICUConfig.get("com.ibm.icu.text.MessagePattern.ApostropheMode", "DOUBLE_OPTIONAL")); 1611 1612 private static final ArgType[] argTypes=ArgType.values(); 1613 } 1614