1 /* 2 ******************************************************************************* 3 * Copyright (C) 2011-2013, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: messagepattern.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2011mar14 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __MESSAGEPATTERN_H__ 16 #define __MESSAGEPATTERN_H__ 17 18 /** 19 * \file 20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_FORMATTING 26 27 #include "unicode/parseerr.h" 28 #include "unicode/unistr.h" 29 30 /** 31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 34 * <p> 35 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 36 * even when the pair is between two single, text-quoting apostrophes. 37 * <p> 38 * The following table shows examples of desired MessageFormat.format() output 39 * with the pattern strings that yield that output. 40 * <p> 41 * <table> 42 * <tr> 43 * <th>Desired output</th> 44 * <th>DOUBLE_OPTIONAL</th> 45 * <th>DOUBLE_REQUIRED</th> 46 * </tr> 47 * <tr> 48 * <td>I see {many}</td> 49 * <td>I see '{many}'</td> 50 * <td>(same)</td> 51 * </tr> 52 * <tr> 53 * <td>I said {'Wow!'}</td> 54 * <td>I said '{''Wow!''}'</td> 55 * <td>(same)</td> 56 * </tr> 57 * <tr> 58 * <td>I don't know</td> 59 * <td>I don't know OR<br> I don''t know</td> 60 * <td>I don''t know</td> 61 * </tr> 62 * </table> 63 * @stable ICU 4.8 64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 65 */ 66 enum UMessagePatternApostropheMode { 67 /** 68 * A literal apostrophe is represented by 69 * either a single or a double apostrophe pattern character. 70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 71 * if it immediately precedes a curly brace {}, 72 * or a pipe symbol | if inside a choice format, 73 * or a pound symbol # if inside a plural format. 74 * <p> 75 * This is the default behavior starting with ICU 4.8. 76 * @stable ICU 4.8 77 */ 78 UMSGPAT_APOS_DOUBLE_OPTIONAL, 79 /** 80 * A literal apostrophe must be represented by 81 * a double apostrophe pattern character. 82 * A single apostrophe always starts quoted literal text. 83 * <p> 84 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 85 * @stable ICU 4.8 86 */ 87 UMSGPAT_APOS_DOUBLE_REQUIRED 88 }; 89 /** 90 * @stable ICU 4.8 91 */ 92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 93 94 /** 95 * MessagePattern::Part type constants. 96 * @stable ICU 4.8 97 */ 98 enum UMessagePatternPartType { 99 /** 100 * Start of a message pattern (main or nested). 101 * The length is 0 for the top-level message 102 * and for a choice argument sub-message, otherwise 1 for the '{'. 103 * The value indicates the nesting level, starting with 0 for the main message. 104 * <p> 105 * There is always a later MSG_LIMIT part. 106 * @stable ICU 4.8 107 */ 108 UMSGPAT_PART_TYPE_MSG_START, 109 /** 110 * End of a message pattern (main or nested). 111 * The length is 0 for the top-level message and 112 * the last sub-message of a choice argument, 113 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 114 * The value indicates the nesting level, starting with 0 for the main message. 115 * @stable ICU 4.8 116 */ 117 UMSGPAT_PART_TYPE_MSG_LIMIT, 118 /** 119 * Indicates a substring of the pattern string which is to be skipped when formatting. 120 * For example, an apostrophe that begins or ends quoted text 121 * would be indicated with such a part. 122 * The value is undefined and currently always 0. 123 * @stable ICU 4.8 124 */ 125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 126 /** 127 * Indicates that a syntax character needs to be inserted for auto-quoting. 128 * The length is 0. 129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 130 * @stable ICU 4.8 131 */ 132 UMSGPAT_PART_TYPE_INSERT_CHAR, 133 /** 134 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 135 * When formatting, replace this part's substring with the 136 * (value-offset) for the plural argument value. 137 * The value is undefined and currently always 0. 138 * @stable ICU 4.8 139 */ 140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 141 /** 142 * Start of an argument. 143 * The length is 1 for the '{'. 144 * The value is the ordinal value of the ArgType. Use getArgType(). 145 * <p> 146 * This part is followed by either an ARG_NUMBER or ARG_NAME, 147 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 148 * and finally an ARG_LIMIT part. 149 * @stable ICU 4.8 150 */ 151 UMSGPAT_PART_TYPE_ARG_START, 152 /** 153 * End of an argument. 154 * The length is 1 for the '}'. 155 * The value is the ordinal value of the ArgType. Use getArgType(). 156 * @stable ICU 4.8 157 */ 158 UMSGPAT_PART_TYPE_ARG_LIMIT, 159 /** 160 * The argument number, provided by the value. 161 * @stable ICU 4.8 162 */ 163 UMSGPAT_PART_TYPE_ARG_NUMBER, 164 /** 165 * The argument name. 166 * The value is undefined and currently always 0. 167 * @stable ICU 4.8 168 */ 169 UMSGPAT_PART_TYPE_ARG_NAME, 170 /** 171 * The argument type. 172 * The value is undefined and currently always 0. 173 * @stable ICU 4.8 174 */ 175 UMSGPAT_PART_TYPE_ARG_TYPE, 176 /** 177 * The argument style text. 178 * The value is undefined and currently always 0. 179 * @stable ICU 4.8 180 */ 181 UMSGPAT_PART_TYPE_ARG_STYLE, 182 /** 183 * A selector substring in a "complex" argument style. 184 * The value is undefined and currently always 0. 185 * @stable ICU 4.8 186 */ 187 UMSGPAT_PART_TYPE_ARG_SELECTOR, 188 /** 189 * An integer value, for example the offset or an explicit selector value 190 * in a PluralFormat style. 191 * The part value is the integer value. 192 * @stable ICU 4.8 193 */ 194 UMSGPAT_PART_TYPE_ARG_INT, 195 /** 196 * A numeric value, for example the offset or an explicit selector value 197 * in a PluralFormat style. 198 * The part value is an index into an internal array of numeric values; 199 * use getNumericValue(). 200 * @stable ICU 4.8 201 */ 202 UMSGPAT_PART_TYPE_ARG_DOUBLE 203 }; 204 /** 205 * @stable ICU 4.8 206 */ 207 typedef enum UMessagePatternPartType UMessagePatternPartType; 208 209 /** 210 * Argument type constants. 211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 212 * 213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 214 * with a nesting level one greater than the surrounding message. 215 * @stable ICU 4.8 216 */ 217 enum UMessagePatternArgType { 218 /** 219 * The argument has no specified type. 220 * @stable ICU 4.8 221 */ 222 UMSGPAT_ARG_TYPE_NONE, 223 /** 224 * The argument has a "simple" type which is provided by the ARG_TYPE part. 225 * An ARG_STYLE part might follow that. 226 * @stable ICU 4.8 227 */ 228 UMSGPAT_ARG_TYPE_SIMPLE, 229 /** 230 * The argument is a ChoiceFormat with one or more 231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 232 * @stable ICU 4.8 233 */ 234 UMSGPAT_ARG_TYPE_CHOICE, 235 /** 236 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 237 * (e.g., offset:1) 238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 239 * If the selector has an explicit value (e.g., =2), then 240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 241 * Otherwise the message immediately follows the ARG_SELECTOR. 242 * @stable ICU 4.8 243 */ 244 UMSGPAT_ARG_TYPE_PLURAL, 245 /** 246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 247 * @stable ICU 4.8 248 */ 249 UMSGPAT_ARG_TYPE_SELECT, 250 /** 251 * The argument is an ordinal-number PluralFormat 252 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 253 * @stable ICU 50 254 */ 255 UMSGPAT_ARG_TYPE_SELECTORDINAL 256 }; 257 /** 258 * @stable ICU 4.8 259 */ 260 typedef enum UMessagePatternArgType UMessagePatternArgType; 261 262 /** 263 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE 264 * Returns TRUE if the argument type has a plural style part sequence and semantics, 265 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 266 * @stable ICU 50 267 */ 268 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 269 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 270 271 enum { 272 /** 273 * Return value from MessagePattern.validateArgumentName() for when 274 * the string is a valid "pattern identifier" but not a number. 275 * @stable ICU 4.8 276 */ 277 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 278 279 /** 280 * Return value from MessagePattern.validateArgumentName() for when 281 * the string is invalid. 282 * It might not be a valid "pattern identifier", 283 * or it have only ASCII digits but there is a leading zero or the number is too large. 284 * @stable ICU 4.8 285 */ 286 UMSGPAT_ARG_NAME_NOT_VALID=-2 287 }; 288 289 /** 290 * Special value that is returned by getNumericValue(Part) when no 291 * numeric value is defined for a part. 292 * @see MessagePattern.getNumericValue() 293 * @stable ICU 4.8 294 */ 295 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 296 297 U_NAMESPACE_BEGIN 298 299 class MessagePatternDoubleList; 300 class MessagePatternPartsList; 301 302 /** 303 * Parses and represents ICU MessageFormat patterns. 304 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 305 * Used in the implementations of those classes as well as in tools 306 * for message validation, translation and format conversion. 307 * <p> 308 * The parser handles all syntax relevant for identifying message arguments. 309 * This includes "complex" arguments whose style strings contain 310 * nested MessageFormat pattern substrings. 311 * For "simple" arguments (with no nested MessageFormat pattern substrings), 312 * the argument style is not parsed any further. 313 * <p> 314 * The parser handles named and numbered message arguments and allows both in one message. 315 * <p> 316 * Once a pattern has been parsed successfully, iterate through the parsed data 317 * with countParts(), getPart() and related methods. 318 * <p> 319 * The data logically represents a parse tree, but is stored and accessed 320 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 321 * Arguments and nested messages are best handled via recursion. 322 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 323 * the index of the corresponding _LIMIT "part". 324 * <p> 325 * List of "parts": 326 * <pre> 327 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 328 * argument = noneArg | simpleArg | complexArg 329 * complexArg = choiceArg | pluralArg | selectArg 330 * 331 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 332 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 333 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 334 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 335 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 336 * 337 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 338 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 339 * selectStyle = (ARG_SELECTOR message)+ 340 * </pre> 341 * <ul> 342 * <li>Literal output text is not represented directly by "parts" but accessed 343 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 344 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 345 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 346 * the less-than-or-equal-to sign (U+2264). 347 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 348 * The optional numeric Part between each (ARG_SELECTOR, message) pair 349 * is the value of an explicit-number selector like "=2", 350 * otherwise the selector is a non-numeric identifier. 351 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 352 * </ul> 353 * <p> 354 * This class is not intended for public subclassing. 355 * 356 * @stable ICU 4.8 357 */ 358 class U_COMMON_API MessagePattern : public UObject { 359 public: 360 /** 361 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 362 * @param errorCode Standard ICU error code. Its input value must 363 * pass the U_SUCCESS() test, or else the function returns 364 * immediately. Check for U_FAILURE() on output or use with 365 * function chaining. (See User Guide for details.) 366 * @stable ICU 4.8 367 */ 368 MessagePattern(UErrorCode &errorCode); 369 370 /** 371 * Constructs an empty MessagePattern. 372 * @param mode Explicit UMessagePatternApostropheMode. 373 * @param errorCode Standard ICU error code. Its input value must 374 * pass the U_SUCCESS() test, or else the function returns 375 * immediately. Check for U_FAILURE() on output or use with 376 * function chaining. (See User Guide for details.) 377 * @stable ICU 4.8 378 */ 379 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 380 381 /** 382 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 383 * parses the MessageFormat pattern string. 384 * @param pattern a MessageFormat pattern string 385 * @param parseError Struct to receive information on the position 386 * of an error within the pattern. 387 * Can be NULL. 388 * @param errorCode Standard ICU error code. Its input value must 389 * pass the U_SUCCESS() test, or else the function returns 390 * immediately. Check for U_FAILURE() on output or use with 391 * function chaining. (See User Guide for details.) 392 * TODO: turn @throws into UErrorCode specifics? 393 * @throws IllegalArgumentException for syntax errors in the pattern string 394 * @throws IndexOutOfBoundsException if certain limits are exceeded 395 * (e.g., argument number too high, argument name too long, etc.) 396 * @throws NumberFormatException if a number could not be parsed 397 * @stable ICU 4.8 398 */ 399 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 400 401 /** 402 * Copy constructor. 403 * @param other Object to copy. 404 * @stable ICU 4.8 405 */ 406 MessagePattern(const MessagePattern &other); 407 408 /** 409 * Assignment operator. 410 * @param other Object to copy. 411 * @return *this=other 412 * @stable ICU 4.8 413 */ 414 MessagePattern &operator=(const MessagePattern &other); 415 416 /** 417 * Destructor. 418 * @stable ICU 4.8 419 */ 420 virtual ~MessagePattern(); 421 422 /** 423 * Parses a MessageFormat pattern string. 424 * @param pattern a MessageFormat pattern string 425 * @param parseError Struct to receive information on the position 426 * of an error within the pattern. 427 * Can be NULL. 428 * @param errorCode Standard ICU error code. Its input value must 429 * pass the U_SUCCESS() test, or else the function returns 430 * immediately. Check for U_FAILURE() on output or use with 431 * function chaining. (See User Guide for details.) 432 * @return *this 433 * @throws IllegalArgumentException for syntax errors in the pattern string 434 * @throws IndexOutOfBoundsException if certain limits are exceeded 435 * (e.g., argument number too high, argument name too long, etc.) 436 * @throws NumberFormatException if a number could not be parsed 437 * @stable ICU 4.8 438 */ 439 MessagePattern &parse(const UnicodeString &pattern, 440 UParseError *parseError, UErrorCode &errorCode); 441 442 /** 443 * Parses a ChoiceFormat pattern string. 444 * @param pattern a ChoiceFormat pattern string 445 * @param parseError Struct to receive information on the position 446 * of an error within the pattern. 447 * Can be NULL. 448 * @param errorCode Standard ICU error code. Its input value must 449 * pass the U_SUCCESS() test, or else the function returns 450 * immediately. Check for U_FAILURE() on output or use with 451 * function chaining. (See User Guide for details.) 452 * @return *this 453 * @throws IllegalArgumentException for syntax errors in the pattern string 454 * @throws IndexOutOfBoundsException if certain limits are exceeded 455 * (e.g., argument number too high, argument name too long, etc.) 456 * @throws NumberFormatException if a number could not be parsed 457 * @stable ICU 4.8 458 */ 459 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 460 UParseError *parseError, UErrorCode &errorCode); 461 462 /** 463 * Parses a PluralFormat pattern string. 464 * @param pattern a PluralFormat pattern string 465 * @param parseError Struct to receive information on the position 466 * of an error within the pattern. 467 * Can be NULL. 468 * @param errorCode Standard ICU error code. Its input value must 469 * pass the U_SUCCESS() test, or else the function returns 470 * immediately. Check for U_FAILURE() on output or use with 471 * function chaining. (See User Guide for details.) 472 * @return *this 473 * @throws IllegalArgumentException for syntax errors in the pattern string 474 * @throws IndexOutOfBoundsException if certain limits are exceeded 475 * (e.g., argument number too high, argument name too long, etc.) 476 * @throws NumberFormatException if a number could not be parsed 477 * @stable ICU 4.8 478 */ 479 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 480 UParseError *parseError, UErrorCode &errorCode); 481 482 /** 483 * Parses a SelectFormat pattern string. 484 * @param pattern a SelectFormat pattern string 485 * @param parseError Struct to receive information on the position 486 * of an error within the pattern. 487 * Can be NULL. 488 * @param errorCode Standard ICU error code. Its input value must 489 * pass the U_SUCCESS() test, or else the function returns 490 * immediately. Check for U_FAILURE() on output or use with 491 * function chaining. (See User Guide for details.) 492 * @return *this 493 * @throws IllegalArgumentException for syntax errors in the pattern string 494 * @throws IndexOutOfBoundsException if certain limits are exceeded 495 * (e.g., argument number too high, argument name too long, etc.) 496 * @throws NumberFormatException if a number could not be parsed 497 * @stable ICU 4.8 498 */ 499 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 500 UParseError *parseError, UErrorCode &errorCode); 501 502 /** 503 * Clears this MessagePattern. 504 * countParts() will return 0. 505 * @stable ICU 4.8 506 */ 507 void clear(); 508 509 /** 510 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 511 * countParts() will return 0. 512 * @param mode The new UMessagePatternApostropheMode. 513 * @stable ICU 4.8 514 */ clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)515 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 516 clear(); 517 aposMode=mode; 518 } 519 520 /** 521 * @param other another object to compare with. 522 * @return TRUE if this object is equivalent to the other one. 523 * @stable ICU 4.8 524 */ 525 UBool operator==(const MessagePattern &other) const; 526 527 /** 528 * @param other another object to compare with. 529 * @return FALSE if this object is equivalent to the other one. 530 * @stable ICU 4.8 531 */ 532 inline UBool operator!=(const MessagePattern &other) const { 533 return !operator==(other); 534 } 535 536 /** 537 * @return A hash code for this object. 538 * @stable ICU 4.8 539 */ 540 int32_t hashCode() const; 541 542 /** 543 * @return this instance's UMessagePatternApostropheMode. 544 * @stable ICU 4.8 545 */ getApostropheMode()546 UMessagePatternApostropheMode getApostropheMode() const { 547 return aposMode; 548 } 549 550 // Java has package-private jdkAposMode() here. 551 // In C++, this is declared in the MessageImpl class. 552 553 /** 554 * @return the parsed pattern string (null if none was parsed). 555 * @stable ICU 4.8 556 */ getPatternString()557 const UnicodeString &getPatternString() const { 558 return msg; 559 } 560 561 /** 562 * Does the parsed pattern have named arguments like {first_name}? 563 * @return TRUE if the parsed pattern has at least one named argument. 564 * @stable ICU 4.8 565 */ hasNamedArguments()566 UBool hasNamedArguments() const { 567 return hasArgNames; 568 } 569 570 /** 571 * Does the parsed pattern have numbered arguments like {2}? 572 * @return TRUE if the parsed pattern has at least one numbered argument. 573 * @stable ICU 4.8 574 */ hasNumberedArguments()575 UBool hasNumberedArguments() const { 576 return hasArgNumbers; 577 } 578 579 /** 580 * Validates and parses an argument name or argument number string. 581 * An argument name must be a "pattern identifier", that is, it must contain 582 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 583 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 584 * @param name Input string. 585 * @return >=0 if the name is a valid number, 586 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 587 * ARG_NAME_NOT_VALID (-2) if it is neither. 588 * @stable ICU 4.8 589 */ 590 static int32_t validateArgumentName(const UnicodeString &name); 591 592 /** 593 * Returns a version of the parsed pattern string where each ASCII apostrophe 594 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 595 * <p> 596 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 597 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 598 * @return the deep-auto-quoted version of the parsed pattern string. 599 * @see MessageFormat.autoQuoteApostrophe() 600 * @stable ICU 4.8 601 */ 602 UnicodeString autoQuoteApostropheDeep() const; 603 604 class Part; 605 606 /** 607 * Returns the number of "parts" created by parsing the pattern string. 608 * Returns 0 if no pattern has been parsed or clear() was called. 609 * @return the number of pattern parts. 610 * @stable ICU 4.8 611 */ countParts()612 int32_t countParts() const { 613 return partsLength; 614 } 615 616 /** 617 * Gets the i-th pattern "part". 618 * @param i The index of the Part data. (0..countParts()-1) 619 * @return the i-th pattern "part". 620 * @stable ICU 4.8 621 */ getPart(int32_t i)622 const Part &getPart(int32_t i) const { 623 return parts[i]; 624 } 625 626 /** 627 * Returns the UMessagePatternPartType of the i-th pattern "part". 628 * Convenience method for getPart(i).getType(). 629 * @param i The index of the Part data. (0..countParts()-1) 630 * @return The UMessagePatternPartType of the i-th Part. 631 * @stable ICU 4.8 632 */ getPartType(int32_t i)633 UMessagePatternPartType getPartType(int32_t i) const { 634 return getPart(i).type; 635 } 636 637 /** 638 * Returns the pattern index of the specified pattern "part". 639 * Convenience method for getPart(partIndex).getIndex(). 640 * @param partIndex The index of the Part data. (0..countParts()-1) 641 * @return The pattern index of this Part. 642 * @stable ICU 4.8 643 */ getPatternIndex(int32_t partIndex)644 int32_t getPatternIndex(int32_t partIndex) const { 645 return getPart(partIndex).index; 646 } 647 648 /** 649 * Returns the substring of the pattern string indicated by the Part. 650 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 651 * @param part a part of this MessagePattern. 652 * @return the substring associated with part. 653 * @stable ICU 4.8 654 */ getSubstring(const Part & part)655 UnicodeString getSubstring(const Part &part) const { 656 return msg.tempSubString(part.index, part.length); 657 } 658 659 /** 660 * Compares the part's substring with the input string s. 661 * @param part a part of this MessagePattern. 662 * @param s a string. 663 * @return TRUE if getSubstring(part).equals(s). 664 * @stable ICU 4.8 665 */ partSubstringMatches(const Part & part,const UnicodeString & s)666 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 667 return 0==msg.compare(part.index, part.length, s); 668 } 669 670 /** 671 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 672 * @param part a part of this MessagePattern. 673 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 674 * @stable ICU 4.8 675 */ 676 double getNumericValue(const Part &part) const; 677 678 /** 679 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 680 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 681 * @return the "offset:" value. 682 * @stable ICU 4.8 683 */ 684 double getPluralOffset(int32_t pluralStart) const; 685 686 /** 687 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 688 * @param start The index of some Part data (0..countParts()-1); 689 * this Part should be of Type ARG_START or MSG_START. 690 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 691 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 692 * @stable ICU 4.8 693 */ getLimitPartIndex(int32_t start)694 int32_t getLimitPartIndex(int32_t start) const { 695 int32_t limit=getPart(start).limitPartIndex; 696 if(limit<start) { 697 return start; 698 } 699 return limit; 700 } 701 702 /** 703 * A message pattern "part", representing a pattern parsing event. 704 * There is a part for the start and end of a message or argument, 705 * for quoting and escaping of and with ASCII apostrophes, 706 * and for syntax elements of "complex" arguments. 707 * @stable ICU 4.8 708 */ 709 class Part : public UMemory { 710 public: 711 /** 712 * Default constructor, do not use. 713 * @internal 714 */ Part()715 Part() {} 716 717 /** 718 * Returns the type of this part. 719 * @return the part type. 720 * @stable ICU 4.8 721 */ getType()722 UMessagePatternPartType getType() const { 723 return type; 724 } 725 726 /** 727 * Returns the pattern string index associated with this Part. 728 * @return this part's pattern string index. 729 * @stable ICU 4.8 730 */ getIndex()731 int32_t getIndex() const { 732 return index; 733 } 734 735 /** 736 * Returns the length of the pattern substring associated with this Part. 737 * This is 0 for some parts. 738 * @return this part's pattern substring length. 739 * @stable ICU 4.8 740 */ getLength()741 int32_t getLength() const { 742 return length; 743 } 744 745 /** 746 * Returns the pattern string limit (exclusive-end) index associated with this Part. 747 * Convenience method for getIndex()+getLength(). 748 * @return this part's pattern string limit index, same as getIndex()+getLength(). 749 * @stable ICU 4.8 750 */ getLimit()751 int32_t getLimit() const { 752 return index+length; 753 } 754 755 /** 756 * Returns a value associated with this part. 757 * See the documentation of each part type for details. 758 * @return the part value. 759 * @stable ICU 4.8 760 */ getValue()761 int32_t getValue() const { 762 return value; 763 } 764 765 /** 766 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 767 * otherwise UMSGPAT_ARG_TYPE_NONE. 768 * @return the argument type for this part. 769 * @stable ICU 4.8 770 */ getArgType()771 UMessagePatternArgType getArgType() const { 772 UMessagePatternPartType type=getType(); 773 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 774 return (UMessagePatternArgType)value; 775 } else { 776 return UMSGPAT_ARG_TYPE_NONE; 777 } 778 } 779 780 /** 781 * Indicates whether the Part type has a numeric value. 782 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 783 * @param type The Part type to be tested. 784 * @return TRUE if the Part type has a numeric value. 785 * @stable ICU 4.8 786 */ hasNumericValue(UMessagePatternPartType type)787 static UBool hasNumericValue(UMessagePatternPartType type) { 788 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 789 } 790 791 /** 792 * @param other another object to compare with. 793 * @return TRUE if this object is equivalent to the other one. 794 * @stable ICU 4.8 795 */ 796 UBool operator==(const Part &other) const; 797 798 /** 799 * @param other another object to compare with. 800 * @return FALSE if this object is equivalent to the other one. 801 * @stable ICU 4.8 802 */ 803 inline UBool operator!=(const Part &other) const { 804 return !operator==(other); 805 } 806 807 /** 808 * @return A hash code for this object. 809 * @stable ICU 4.8 810 */ hashCode()811 int32_t hashCode() const { 812 return ((type*37+index)*37+length)*37+value; 813 } 814 815 private: 816 friend class MessagePattern; 817 818 static const int32_t MAX_LENGTH=0xffff; 819 static const int32_t MAX_VALUE=0x7fff; 820 821 // Some fields are not final because they are modified during pattern parsing. 822 // After pattern parsing, the parts are effectively immutable. 823 UMessagePatternPartType type; 824 int32_t index; 825 uint16_t length; 826 int16_t value; 827 int32_t limitPartIndex; 828 }; 829 830 private: 831 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 832 833 void postParse(); 834 835 int32_t parseMessage(int32_t index, int32_t msgStartLength, 836 int32_t nestingLevel, UMessagePatternArgType parentType, 837 UParseError *parseError, UErrorCode &errorCode); 838 839 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 840 UParseError *parseError, UErrorCode &errorCode); 841 842 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 843 844 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 845 UParseError *parseError, UErrorCode &errorCode); 846 847 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 848 UParseError *parseError, UErrorCode &errorCode); 849 850 /** 851 * Validates and parses an argument name or argument number string. 852 * This internal method assumes that the input substring is a "pattern identifier". 853 * @return >=0 if the name is a valid number, 854 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 855 * ARG_NAME_NOT_VALID (-2) if it is neither. 856 * @see #validateArgumentName(String) 857 */ 858 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 859 parseArgNumber(int32_t start,int32_t limit)860 int32_t parseArgNumber(int32_t start, int32_t limit) { 861 return parseArgNumber(msg, start, limit); 862 } 863 864 /** 865 * Parses a number from the specified message substring. 866 * @param start start index into the message string 867 * @param limit limit index into the message string, must be start<limit 868 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 869 * @param parseError 870 * @param errorCode 871 */ 872 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 873 UParseError *parseError, UErrorCode &errorCode); 874 875 // Java has package-private appendReducedApostrophes() here. 876 // In C++, this is declared in the MessageImpl class. 877 878 int32_t skipWhiteSpace(int32_t index); 879 880 int32_t skipIdentifier(int32_t index); 881 882 /** 883 * Skips a sequence of characters that could occur in a double value. 884 * Does not fully parse or validate the value. 885 */ 886 int32_t skipDouble(int32_t index); 887 888 static UBool isArgTypeChar(UChar32 c); 889 890 UBool isChoice(int32_t index); 891 892 UBool isPlural(int32_t index); 893 894 UBool isSelect(int32_t index); 895 896 UBool isOrdinal(int32_t index); 897 898 /** 899 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 900 * as opposed to inside a top-level choice/plural/select pattern. 901 */ 902 UBool inMessageFormatPattern(int32_t nestingLevel); 903 904 /** 905 * @return TRUE if we are in a MessageFormat sub-pattern 906 * of a top-level ChoiceFormat pattern. 907 */ 908 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 909 910 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 911 int32_t value, UErrorCode &errorCode); 912 913 void addLimitPart(int32_t start, 914 UMessagePatternPartType type, int32_t index, int32_t length, 915 int32_t value, UErrorCode &errorCode); 916 917 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 918 919 void setParseError(UParseError *parseError, int32_t index); 920 921 UBool init(UErrorCode &errorCode); 922 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 923 924 UMessagePatternApostropheMode aposMode; 925 UnicodeString msg; 926 // ArrayList<Part> parts=new ArrayList<Part>(); 927 MessagePatternPartsList *partsList; 928 Part *parts; 929 int32_t partsLength; 930 // ArrayList<Double> numericValues; 931 MessagePatternDoubleList *numericValuesList; 932 double *numericValues; 933 int32_t numericValuesLength; 934 UBool hasArgNames; 935 UBool hasArgNumbers; 936 UBool needsAutoQuoting; 937 }; 938 939 U_NAMESPACE_END 940 941 #endif // !UCONFIG_NO_FORMATTING 942 943 #endif // __MESSAGEPATTERN_H__ 944