1 /* 2 ******************************************************************************* 3 * Copyright (C) 2011, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: messagepattern.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2011mar14 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __MESSAGEPATTERN_H__ 16 #define __MESSAGEPATTERN_H__ 17 18 /** 19 * \file 20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_FORMATTING 26 27 #include "unicode/parseerr.h" 28 #include "unicode/unistr.h" 29 30 /** 31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 34 * <p> 35 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 36 * even when the pair is between two single, text-quoting apostrophes. 37 * <p> 38 * The following table shows examples of desired MessageFormat.format() output 39 * with the pattern strings that yield that output. 40 * <p> 41 * <table> 42 * <tr> 43 * <th>Desired output</th> 44 * <th>DOUBLE_OPTIONAL</th> 45 * <th>DOUBLE_REQUIRED</th> 46 * </tr> 47 * <tr> 48 * <td>I see {many}</td> 49 * <td>I see '{many}'</td> 50 * <td>(same)</td> 51 * </tr> 52 * <tr> 53 * <td>I said {'Wow!'}</td> 54 * <td>I said '{''Wow!''}'</td> 55 * <td>(same)</td> 56 * </tr> 57 * <tr> 58 * <td>I don't know</td> 59 * <td>I don't know OR<br> I don''t know</td> 60 * <td>I don''t know</td> 61 * </tr> 62 * </table> 63 * @draft ICU 4.8 64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 65 */ 66 enum UMessagePatternApostropheMode { 67 /** 68 * A literal apostrophe is represented by 69 * either a single or a double apostrophe pattern character. 70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 71 * if it immediately precedes a curly brace {}, 72 * or a pipe symbol | if inside a choice format, 73 * or a pound symbol # if inside a plural format. 74 * <p> 75 * This is the default behavior starting with ICU 4.8. 76 * @draft ICU 4.8 77 */ 78 UMSGPAT_APOS_DOUBLE_OPTIONAL, 79 /** 80 * A literal apostrophe must be represented by 81 * a double apostrophe pattern character. 82 * A single apostrophe always starts quoted literal text. 83 * <p> 84 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 85 * @draft ICU 4.8 86 */ 87 UMSGPAT_APOS_DOUBLE_REQUIRED 88 }; 89 /** 90 * @draft ICU 4.8 91 */ 92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 93 94 /** 95 * MessagePattern::Part type constants. 96 * @draft ICU 4.8 97 */ 98 enum UMessagePatternPartType { 99 /** 100 * Start of a message pattern (main or nested). 101 * The length is 0 for the top-level message 102 * and for a choice argument sub-message, otherwise 1 for the '{'. 103 * The value indicates the nesting level, starting with 0 for the main message. 104 * <p> 105 * There is always a later MSG_LIMIT part. 106 * @draft ICU 4.8 107 */ 108 UMSGPAT_PART_TYPE_MSG_START, 109 /** 110 * End of a message pattern (main or nested). 111 * The length is 0 for the top-level message and 112 * the last sub-message of a choice argument, 113 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 114 * The value indicates the nesting level, starting with 0 for the main message. 115 * @draft ICU 4.8 116 */ 117 UMSGPAT_PART_TYPE_MSG_LIMIT, 118 /** 119 * Indicates a substring of the pattern string which is to be skipped when formatting. 120 * For example, an apostrophe that begins or ends quoted text 121 * would be indicated with such a part. 122 * The value is undefined and currently always 0. 123 * @draft ICU 4.8 124 */ 125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 126 /** 127 * Indicates that a syntax character needs to be inserted for auto-quoting. 128 * The length is 0. 129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 130 * @draft ICU 4.8 131 */ 132 UMSGPAT_PART_TYPE_INSERT_CHAR, 133 /** 134 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 135 * When formatting, replace this part's substring with the 136 * (value-offset) for the plural argument value. 137 * The value is undefined and currently always 0. 138 * @draft ICU 4.8 139 */ 140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 141 /** 142 * Start of an argument. 143 * The length is 1 for the '{'. 144 * The value is the ordinal value of the ArgType. Use getArgType(). 145 * @draft ICU 4.8 146 */ 147 UMSGPAT_PART_TYPE_ARG_START, 148 /** 149 * End of an argument. 150 * The length is 1 for the '}'. 151 * The value is the ordinal value of the ArgType. Use getArgType(). 152 * <p> 153 * This part is followed by either an ARG_NUMBER or ARG_NAME, 154 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 155 * and finally an ARG_LIMIT part. 156 * @draft ICU 4.8 157 */ 158 UMSGPAT_PART_TYPE_ARG_LIMIT, 159 /** 160 * The argument number, provided by the value. 161 * @draft ICU 4.8 162 */ 163 UMSGPAT_PART_TYPE_ARG_NUMBER, 164 /** 165 * The argument name. 166 * The value is undefined and currently always 0. 167 * @draft ICU 4.8 168 */ 169 UMSGPAT_PART_TYPE_ARG_NAME, 170 /** 171 * The argument type. 172 * The value is undefined and currently always 0. 173 * @draft ICU 4.8 174 */ 175 UMSGPAT_PART_TYPE_ARG_TYPE, 176 /** 177 * The argument style text. 178 * The value is undefined and currently always 0. 179 * @draft ICU 4.8 180 */ 181 UMSGPAT_PART_TYPE_ARG_STYLE, 182 /** 183 * A selector substring in a "complex" argument style. 184 * The value is undefined and currently always 0. 185 * @draft ICU 4.8 186 */ 187 UMSGPAT_PART_TYPE_ARG_SELECTOR, 188 /** 189 * An integer value, for example the offset or an explicit selector value 190 * in a PluralFormat style. 191 * The part value is the integer value. 192 * @draft ICU 4.8 193 */ 194 UMSGPAT_PART_TYPE_ARG_INT, 195 /** 196 * A numeric value, for example the offset or an explicit selector value 197 * in a PluralFormat style. 198 * The part value is an index into an internal array of numeric values; 199 * use getNumericValue(). 200 * @draft ICU 4.8 201 */ 202 UMSGPAT_PART_TYPE_ARG_DOUBLE 203 }; 204 /** 205 * @draft ICU 4.8 206 */ 207 typedef enum UMessagePatternPartType UMessagePatternPartType; 208 209 /** 210 * Argument type constants. 211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 212 * 213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 214 * with a nesting level one greater than the surrounding message. 215 * @draft ICU 4.8 216 */ 217 enum UMessagePatternArgType { 218 /** 219 * The argument has no specified type. 220 * @draft ICU 4.8 221 */ 222 UMSGPAT_ARG_TYPE_NONE, 223 /** 224 * The argument has a "simple" type which is provided by the ARG_TYPE part. 225 * An ARG_STYLE part might follow that. 226 * @draft ICU 4.8 227 */ 228 UMSGPAT_ARG_TYPE_SIMPLE, 229 /** 230 * The argument is a ChoiceFormat with one or more 231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 232 * @draft ICU 4.8 233 */ 234 UMSGPAT_ARG_TYPE_CHOICE, 235 /** 236 * The argument is a PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 237 * (e.g., offset:1) 238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 239 * If the selector has an explicit value (e.g., =2), then 240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 241 * Otherwise the message immediately follows the ARG_SELECTOR. 242 * @draft ICU 4.8 243 */ 244 UMSGPAT_ARG_TYPE_PLURAL, 245 /** 246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 247 * @draft ICU 4.8 248 */ 249 UMSGPAT_ARG_TYPE_SELECT 250 }; 251 /** 252 * @draft ICU 4.8 253 */ 254 typedef enum UMessagePatternArgType UMessagePatternArgType; 255 256 /** 257 * @draft ICU 4.8 258 */ 259 enum { 260 /** 261 * Return value from MessagePattern.validateArgumentName() for when 262 * the string is a valid "pattern identifier" but not a number. 263 * @draft ICU 4.8 264 */ 265 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 266 267 /** 268 * Return value from MessagePattern.validateArgumentName() for when 269 * the string is invalid. 270 * It might not be a valid "pattern identifier", 271 * or it have only ASCII digits but there is a leading zero or the number is too large. 272 * @draft ICU 4.8 273 */ 274 UMSGPAT_ARG_NAME_NOT_VALID=-2 275 }; 276 277 /** 278 * Special value that is returned by getNumericValue(Part) when no 279 * numeric value is defined for a part. 280 * @see MessagePattern.getNumericValue() 281 * @draft ICU 4.8 282 */ 283 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 284 285 U_NAMESPACE_BEGIN 286 287 class MessagePatternDoubleList; 288 class MessagePatternPartsList; 289 290 /** 291 * Parses and represents ICU MessageFormat patterns. 292 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 293 * Used in the implementations of those classes as well as in tools 294 * for message validation, translation and format conversion. 295 * <p> 296 * The parser handles all syntax relevant for identifying message arguments. 297 * This includes "complex" arguments whose style strings contain 298 * nested MessageFormat pattern substrings. 299 * For "simple" arguments (with no nested MessageFormat pattern substrings), 300 * the argument style is not parsed any further. 301 * <p> 302 * The parser handles named and numbered message arguments and allows both in one message. 303 * <p> 304 * Once a pattern has been parsed successfully, iterate through the parsed data 305 * with countParts(), getPart() and related methods. 306 * <p> 307 * The data logically represents a parse tree, but is stored and accessed 308 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 309 * Arguments and nested messages are best handled via recursion. 310 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 311 * the index of the corresponding _LIMIT "part". 312 * <p> 313 * List of "parts": 314 * <pre> 315 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 316 * argument = noneArg | simpleArg | complexArg 317 * complexArg = choiceArg | pluralArg | selectArg 318 * 319 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 320 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 321 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 322 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 323 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 324 * 325 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 326 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 327 * selectStyle = (ARG_SELECTOR message)+ 328 * </pre> 329 * <ul> 330 * <li>Literal output text is not represented directly by "parts" but accessed 331 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 332 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 333 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 334 * the less-than-or-equal-to sign (U+2264). 335 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 336 * The optional numeric Part between each (ARG_SELECTOR, message) pair 337 * is the value of an explicit-number selector like "=2", 338 * otherwise the selector is a non-numeric identifier. 339 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 340 * </ul> 341 * <p> 342 * This class is not intended for public subclassing. 343 * 344 * @draft ICU 4.8 345 */ 346 class U_COMMON_API MessagePattern : public UObject { 347 public: 348 /** 349 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 350 * @param errorCode Standard ICU error code. Its input value must 351 * pass the U_SUCCESS() test, or else the function returns 352 * immediately. Check for U_FAILURE() on output or use with 353 * function chaining. (See User Guide for details.) 354 * @draft ICU 4.8 355 */ 356 MessagePattern(UErrorCode &errorCode); 357 358 /** 359 * Constructs an empty MessagePattern. 360 * @param mode Explicit UMessagePatternApostropheMode. 361 * @param errorCode Standard ICU error code. Its input value must 362 * pass the U_SUCCESS() test, or else the function returns 363 * immediately. Check for U_FAILURE() on output or use with 364 * function chaining. (See User Guide for details.) 365 * @draft ICU 4.8 366 */ 367 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 368 369 /** 370 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 371 * parses the MessageFormat pattern string. 372 * @param pattern a MessageFormat pattern string 373 * @param parseError Struct to receive information on the position 374 * of an error within the pattern. 375 * Can be NULL. 376 * @param errorCode Standard ICU error code. Its input value must 377 * pass the U_SUCCESS() test, or else the function returns 378 * immediately. Check for U_FAILURE() on output or use with 379 * function chaining. (See User Guide for details.) 380 * TODO: turn @throws into UErrorCode specifics? 381 * @throws IllegalArgumentException for syntax errors in the pattern string 382 * @throws IndexOutOfBoundsException if certain limits are exceeded 383 * (e.g., argument number too high, argument name too long, etc.) 384 * @throws NumberFormatException if a number could not be parsed 385 * @draft ICU 4.8 386 */ 387 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 388 389 /** 390 * Copy constructor. 391 * @param other Object to copy. 392 * @draft ICU 4.8 393 */ 394 MessagePattern(const MessagePattern &other); 395 396 /** 397 * Assignment operator. 398 * @param other Object to copy. 399 * @return *this=other 400 * @draft ICU 4.8 401 */ 402 MessagePattern &operator=(const MessagePattern &other); 403 404 /** 405 * Destructor. 406 * @draft ICU 4.8 407 */ 408 virtual ~MessagePattern(); 409 410 /** 411 * Parses a MessageFormat pattern string. 412 * @param pattern a MessageFormat pattern string 413 * @param parseError Struct to receive information on the position 414 * of an error within the pattern. 415 * Can be NULL. 416 * @param errorCode Standard ICU error code. Its input value must 417 * pass the U_SUCCESS() test, or else the function returns 418 * immediately. Check for U_FAILURE() on output or use with 419 * function chaining. (See User Guide for details.) 420 * @return *this 421 * @throws IllegalArgumentException for syntax errors in the pattern string 422 * @throws IndexOutOfBoundsException if certain limits are exceeded 423 * (e.g., argument number too high, argument name too long, etc.) 424 * @throws NumberFormatException if a number could not be parsed 425 * @draft ICU 4.8 426 */ 427 MessagePattern &parse(const UnicodeString &pattern, 428 UParseError *parseError, UErrorCode &errorCode); 429 430 /** 431 * Parses a ChoiceFormat pattern string. 432 * @param pattern a ChoiceFormat pattern string 433 * @param parseError Struct to receive information on the position 434 * of an error within the pattern. 435 * Can be NULL. 436 * @param errorCode Standard ICU error code. Its input value must 437 * pass the U_SUCCESS() test, or else the function returns 438 * immediately. Check for U_FAILURE() on output or use with 439 * function chaining. (See User Guide for details.) 440 * @return *this 441 * @throws IllegalArgumentException for syntax errors in the pattern string 442 * @throws IndexOutOfBoundsException if certain limits are exceeded 443 * (e.g., argument number too high, argument name too long, etc.) 444 * @throws NumberFormatException if a number could not be parsed 445 * @draft ICU 4.8 446 */ 447 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 448 UParseError *parseError, UErrorCode &errorCode); 449 450 /** 451 * Parses a PluralFormat pattern string. 452 * @param pattern a PluralFormat pattern string 453 * @param parseError Struct to receive information on the position 454 * of an error within the pattern. 455 * Can be NULL. 456 * @param errorCode Standard ICU error code. Its input value must 457 * pass the U_SUCCESS() test, or else the function returns 458 * immediately. Check for U_FAILURE() on output or use with 459 * function chaining. (See User Guide for details.) 460 * @return *this 461 * @throws IllegalArgumentException for syntax errors in the pattern string 462 * @throws IndexOutOfBoundsException if certain limits are exceeded 463 * (e.g., argument number too high, argument name too long, etc.) 464 * @throws NumberFormatException if a number could not be parsed 465 * @draft ICU 4.8 466 */ 467 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 468 UParseError *parseError, UErrorCode &errorCode); 469 470 /** 471 * Parses a SelectFormat pattern string. 472 * @param pattern a SelectFormat pattern string 473 * @param parseError Struct to receive information on the position 474 * of an error within the pattern. 475 * Can be NULL. 476 * @param errorCode Standard ICU error code. Its input value must 477 * pass the U_SUCCESS() test, or else the function returns 478 * immediately. Check for U_FAILURE() on output or use with 479 * function chaining. (See User Guide for details.) 480 * @return *this 481 * @throws IllegalArgumentException for syntax errors in the pattern string 482 * @throws IndexOutOfBoundsException if certain limits are exceeded 483 * (e.g., argument number too high, argument name too long, etc.) 484 * @throws NumberFormatException if a number could not be parsed 485 * @draft ICU 4.8 486 */ 487 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 488 UParseError *parseError, UErrorCode &errorCode); 489 490 /** 491 * Clears this MessagePattern. 492 * countParts() will return 0. 493 * @draft ICU 4.8 494 */ 495 void clear(); 496 497 /** 498 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 499 * countParts() will return 0. 500 * @param mode The new UMessagePatternApostropheMode. 501 * @draft ICU 4.8 502 */ clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)503 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 504 clear(); 505 aposMode=mode; 506 } 507 508 /** 509 * @param other another object to compare with. 510 * @return TRUE if this object is equivalent to the other one. 511 * @draft ICU 4.8 512 */ 513 UBool operator==(const MessagePattern &other) const; 514 515 /** 516 * @param other another object to compare with. 517 * @return FALSE if this object is equivalent to the other one. 518 * @draft ICU 4.8 519 */ 520 inline UBool operator!=(const MessagePattern &other) const { 521 return !operator==(other); 522 } 523 524 /** 525 * @return A hash code for this object. 526 * @draft ICU 4.8 527 */ 528 int32_t hashCode() const; 529 530 /** 531 * @return this instance's UMessagePatternApostropheMode. 532 * @draft ICU 4.8 533 */ getApostropheMode()534 UMessagePatternApostropheMode getApostropheMode() const { 535 return aposMode; 536 } 537 538 // Java has package-private jdkAposMode() here. 539 // In C++, this is declared in the MessageImpl class. 540 541 /** 542 * @return the parsed pattern string (null if none was parsed). 543 * @draft ICU 4.8 544 */ getPatternString()545 const UnicodeString &getPatternString() const { 546 return msg; 547 } 548 549 /** 550 * Does the parsed pattern have named arguments like {first_name}? 551 * @return TRUE if the parsed pattern has at least one named argument. 552 * @draft ICU 4.8 553 */ hasNamedArguments()554 UBool hasNamedArguments() const { 555 return hasArgNames; 556 } 557 558 /** 559 * Does the parsed pattern have numbered arguments like {2}? 560 * @return TRUE if the parsed pattern has at least one numbered argument. 561 * @draft ICU 4.8 562 */ hasNumberedArguments()563 UBool hasNumberedArguments() const { 564 return hasArgNumbers; 565 } 566 567 /** 568 * Validates and parses an argument name or argument number string. 569 * An argument name must be a "pattern identifier", that is, it must contain 570 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 571 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 572 * @param name Input string. 573 * @return >=0 if the name is a valid number, 574 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 575 * ARG_NAME_NOT_VALID (-2) if it is neither. 576 * @draft ICU 4.8 577 */ 578 static int32_t validateArgumentName(const UnicodeString &name); 579 580 /** 581 * Returns a version of the parsed pattern string where each ASCII apostrophe 582 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 583 * <p> 584 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 585 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 586 * @return the deep-auto-quoted version of the parsed pattern string. 587 * @see MessageFormat.autoQuoteApostrophe() 588 * @draft ICU 4.8 589 */ 590 UnicodeString autoQuoteApostropheDeep() const; 591 592 class Part; 593 594 /** 595 * Returns the number of "parts" created by parsing the pattern string. 596 * Returns 0 if no pattern has been parsed or clear() was called. 597 * @return the number of pattern parts. 598 * @draft ICU 4.8 599 */ countParts()600 int32_t countParts() const { 601 return partsLength; 602 } 603 604 /** 605 * Gets the i-th pattern "part". 606 * @param i The index of the Part data. (0..countParts()-1) 607 * @return the i-th pattern "part". 608 * @draft ICU 4.8 609 */ getPart(int32_t i)610 const Part &getPart(int32_t i) const { 611 return parts[i]; 612 } 613 614 /** 615 * Returns the UMessagePatternPartType of the i-th pattern "part". 616 * Convenience method for getPart(i).getType(). 617 * @param i The index of the Part data. (0..countParts()-1) 618 * @return The UMessagePatternPartType of the i-th Part. 619 * @draft ICU 4.8 620 */ getPartType(int32_t i)621 UMessagePatternPartType getPartType(int32_t i) const { 622 return getPart(i).type; 623 } 624 625 /** 626 * Returns the pattern index of the specified pattern "part". 627 * Convenience method for getPart(partIndex).getIndex(). 628 * @param partIndex The index of the Part data. (0..countParts()-1) 629 * @return The pattern index of this Part. 630 * @draft ICU 4.8 631 */ getPatternIndex(int32_t partIndex)632 int32_t getPatternIndex(int32_t partIndex) const { 633 return getPart(partIndex).index; 634 } 635 636 /** 637 * Returns the substring of the pattern string indicated by the Part. 638 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 639 * @param part a part of this MessagePattern. 640 * @return the substring associated with part. 641 * @draft ICU 4.8 642 */ getSubstring(const Part & part)643 UnicodeString getSubstring(const Part &part) const { 644 return msg.tempSubString(part.index, part.length); 645 } 646 647 /** 648 * Compares the part's substring with the input string s. 649 * @param part a part of this MessagePattern. 650 * @param s a string. 651 * @return TRUE if getSubstring(part).equals(s). 652 * @draft ICU 4.8 653 */ partSubstringMatches(const Part & part,const UnicodeString & s)654 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 655 return 0==msg.compare(part.index, part.length, s); 656 } 657 658 /** 659 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 660 * @param part a part of this MessagePattern. 661 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 662 * @draft ICU 4.8 663 */ 664 double getNumericValue(const Part &part) const; 665 666 /** 667 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 668 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 669 * @return the "offset:" value. 670 * @draft ICU 4.8 671 */ 672 double getPluralOffset(int32_t pluralStart) const; 673 674 /** 675 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 676 * @param start The index of some Part data (0..countParts()-1); 677 * this Part should be of Type ARG_START or MSG_START. 678 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 679 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 680 * @draft ICU 4.8 681 */ getLimitPartIndex(int32_t start)682 int32_t getLimitPartIndex(int32_t start) const { 683 int32_t limit=getPart(start).limitPartIndex; 684 if(limit<start) { 685 return start; 686 } 687 return limit; 688 } 689 690 /** 691 * A message pattern "part", representing a pattern parsing event. 692 * There is a part for the start and end of a message or argument, 693 * for quoting and escaping of and with ASCII apostrophes, 694 * and for syntax elements of "complex" arguments. 695 * @draft ICU 4.8 696 */ 697 class Part : public UMemory { 698 public: 699 /** 700 * Default constructor, do not use. 701 * @internal 702 */ Part()703 Part() {} 704 705 /** 706 * Returns the type of this part. 707 * @return the part type. 708 * @draft ICU 4.8 709 */ getType()710 UMessagePatternPartType getType() const { 711 return type; 712 } 713 714 /** 715 * Returns the pattern string index associated with this Part. 716 * @return this part's pattern string index. 717 * @draft ICU 4.8 718 */ getIndex()719 int32_t getIndex() const { 720 return index; 721 } 722 723 /** 724 * Returns the length of the pattern substring associated with this Part. 725 * This is 0 for some parts. 726 * @return this part's pattern string index. 727 * @draft ICU 4.8 728 */ getLength()729 int32_t getLength() const { 730 return length; 731 } 732 733 /** 734 * Returns the pattern string limit (exclusive-end) index associated with this Part. 735 * Convenience method for getIndex()+getLength(). 736 * @return this part's pattern string limit index, same as getIndex()+getLength(). 737 * @draft ICU 4.8 738 */ getLimit()739 int32_t getLimit() const { 740 return index+length; 741 } 742 743 /** 744 * Returns a value associated with this part. 745 * See the documentation of each part type for details. 746 * @return the part value. 747 * @draft ICU 4.8 748 */ getValue()749 int32_t getValue() const { 750 return value; 751 } 752 753 /** 754 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 755 * otherwise UMSGPAT_ARG_TYPE_NONE. 756 * @return the argument type for this part. 757 * @draft ICU 4.8 758 */ getArgType()759 UMessagePatternArgType getArgType() const { 760 UMessagePatternPartType type=getType(); 761 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 762 return (UMessagePatternArgType)value; 763 } else { 764 return UMSGPAT_ARG_TYPE_NONE; 765 } 766 } 767 768 /** 769 * Indicates whether the Part type has a numeric value. 770 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 771 * @param type The Part type to be tested. 772 * @return TRUE if the Part type has a numeric value. 773 * @draft ICU 4.8 774 */ hasNumericValue(UMessagePatternPartType type)775 static UBool hasNumericValue(UMessagePatternPartType type) { 776 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 777 } 778 779 /** 780 * @param other another object to compare with. 781 * @return TRUE if this object is equivalent to the other one. 782 * @draft ICU 4.8 783 */ 784 UBool operator==(const Part &other) const; 785 786 /** 787 * @param other another object to compare with. 788 * @return FALSE if this object is equivalent to the other one. 789 * @draft ICU 4.8 790 */ 791 inline UBool operator!=(const Part &other) const { 792 return !operator==(other); 793 } 794 795 /** 796 * @return A hash code for this object. 797 * @draft ICU 4.8 798 */ hashCode()799 int32_t hashCode() const { 800 return ((type*37+index)*37+length)*37+value; 801 } 802 803 private: 804 friend class MessagePattern; 805 806 static const int32_t MAX_LENGTH=0xffff; 807 static const int32_t MAX_VALUE=0x7fff; 808 809 // Some fields are not final because they are modified during pattern parsing. 810 // After pattern parsing, the parts are effectively immutable. 811 UMessagePatternPartType type; 812 int32_t index; 813 uint16_t length; 814 int16_t value; 815 int32_t limitPartIndex; 816 }; 817 818 private: 819 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 820 821 void postParse(); 822 823 int32_t parseMessage(int32_t index, int32_t msgStartLength, 824 int32_t nestingLevel, UMessagePatternArgType parentType, 825 UParseError *parseError, UErrorCode &errorCode); 826 827 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 828 UParseError *parseError, UErrorCode &errorCode); 829 830 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 831 832 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 833 UParseError *parseError, UErrorCode &errorCode); 834 835 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 836 UParseError *parseError, UErrorCode &errorCode); 837 838 /** 839 * Validates and parses an argument name or argument number string. 840 * This internal method assumes that the input substring is a "pattern identifier". 841 * @return >=0 if the name is a valid number, 842 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 843 * ARG_NAME_NOT_VALID (-2) if it is neither. 844 * @see #validateArgumentName(String) 845 */ 846 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 847 parseArgNumber(int32_t start,int32_t limit)848 int32_t parseArgNumber(int32_t start, int32_t limit) { 849 return parseArgNumber(msg, start, limit); 850 } 851 852 /** 853 * Parses a number from the specified message substring. 854 * @param start start index into the message string 855 * @param limit limit index into the message string, must be start<limit 856 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 857 * @param parseError 858 * @param errorCode 859 */ 860 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 861 UParseError *parseError, UErrorCode &errorCode); 862 863 // Java has package-private appendReducedApostrophes() here. 864 // In C++, this is declared in the MessageImpl class. 865 866 int32_t skipWhiteSpace(int32_t index); 867 868 int32_t skipIdentifier(int32_t index); 869 870 /** 871 * Skips a sequence of characters that could occur in a double value. 872 * Does not fully parse or validate the value. 873 */ 874 int32_t skipDouble(int32_t index); 875 876 static UBool isArgTypeChar(UChar32 c); 877 878 UBool isChoice(int32_t index); 879 880 UBool isPlural(int32_t index); 881 882 UBool isSelect(int32_t index); 883 884 /** 885 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 886 * as opposed to inside a top-level choice/plural/select pattern. 887 */ 888 UBool inMessageFormatPattern(int32_t nestingLevel); 889 890 /** 891 * @return TRUE if we are in a MessageFormat sub-pattern 892 * of a top-level ChoiceFormat pattern. 893 */ 894 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 895 896 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 897 int32_t value, UErrorCode &errorCode); 898 899 void addLimitPart(int32_t start, 900 UMessagePatternPartType type, int32_t index, int32_t length, 901 int32_t value, UErrorCode &errorCode); 902 903 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 904 905 void setParseError(UParseError *parseError, int32_t index); 906 907 // No ICU "poor man's RTTI" for this class nor its subclasses. 908 virtual UClassID getDynamicClassID() const; 909 910 UBool init(UErrorCode &errorCode); 911 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 912 913 UMessagePatternApostropheMode aposMode; 914 UnicodeString msg; 915 // ArrayList<Part> parts=new ArrayList<Part>(); 916 MessagePatternPartsList *partsList; 917 Part *parts; 918 int32_t partsLength; 919 // ArrayList<Double> numericValues; 920 MessagePatternDoubleList *numericValuesList; 921 double *numericValues; 922 int32_t numericValuesLength; 923 UBool hasArgNames; 924 UBool hasArgNumbers; 925 UBool needsAutoQuoting; 926 }; 927 928 U_NAMESPACE_END 929 930 #endif // !UCONFIG_NO_FORMATTING 931 932 #endif // __MESSAGEPATTERN_H__ 933