1 /* 2 ******************************************************************************* 3 * Copyright (C) 2011-2012, International Business Machines 4 * Corporation and others. All Rights Reserved. 5 ******************************************************************************* 6 * file name: messagepattern.h 7 * encoding: US-ASCII 8 * tab size: 8 (not used) 9 * indentation:4 10 * 11 * created on: 2011mar14 12 * created by: Markus W. Scherer 13 */ 14 15 #ifndef __MESSAGEPATTERN_H__ 16 #define __MESSAGEPATTERN_H__ 17 18 /** 19 * \file 20 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 21 */ 22 23 #include "unicode/utypes.h" 24 25 #if !UCONFIG_NO_FORMATTING 26 27 #include "unicode/parseerr.h" 28 #include "unicode/unistr.h" 29 30 /** 31 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 32 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 33 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 34 * <p> 35 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 36 * even when the pair is between two single, text-quoting apostrophes. 37 * <p> 38 * The following table shows examples of desired MessageFormat.format() output 39 * with the pattern strings that yield that output. 40 * <p> 41 * <table> 42 * <tr> 43 * <th>Desired output</th> 44 * <th>DOUBLE_OPTIONAL</th> 45 * <th>DOUBLE_REQUIRED</th> 46 * </tr> 47 * <tr> 48 * <td>I see {many}</td> 49 * <td>I see '{many}'</td> 50 * <td>(same)</td> 51 * </tr> 52 * <tr> 53 * <td>I said {'Wow!'}</td> 54 * <td>I said '{''Wow!''}'</td> 55 * <td>(same)</td> 56 * </tr> 57 * <tr> 58 * <td>I don't know</td> 59 * <td>I don't know OR<br> I don''t know</td> 60 * <td>I don''t know</td> 61 * </tr> 62 * </table> 63 * @stable ICU 4.8 64 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 65 */ 66 enum UMessagePatternApostropheMode { 67 /** 68 * A literal apostrophe is represented by 69 * either a single or a double apostrophe pattern character. 70 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 71 * if it immediately precedes a curly brace {}, 72 * or a pipe symbol | if inside a choice format, 73 * or a pound symbol # if inside a plural format. 74 * <p> 75 * This is the default behavior starting with ICU 4.8. 76 * @stable ICU 4.8 77 */ 78 UMSGPAT_APOS_DOUBLE_OPTIONAL, 79 /** 80 * A literal apostrophe must be represented by 81 * a double apostrophe pattern character. 82 * A single apostrophe always starts quoted literal text. 83 * <p> 84 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 85 * @stable ICU 4.8 86 */ 87 UMSGPAT_APOS_DOUBLE_REQUIRED 88 }; 89 /** 90 * @stable ICU 4.8 91 */ 92 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 93 94 /** 95 * MessagePattern::Part type constants. 96 * @stable ICU 4.8 97 */ 98 enum UMessagePatternPartType { 99 /** 100 * Start of a message pattern (main or nested). 101 * The length is 0 for the top-level message 102 * and for a choice argument sub-message, otherwise 1 for the '{'. 103 * The value indicates the nesting level, starting with 0 for the main message. 104 * <p> 105 * There is always a later MSG_LIMIT part. 106 * @stable ICU 4.8 107 */ 108 UMSGPAT_PART_TYPE_MSG_START, 109 /** 110 * End of a message pattern (main or nested). 111 * The length is 0 for the top-level message and 112 * the last sub-message of a choice argument, 113 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 114 * The value indicates the nesting level, starting with 0 for the main message. 115 * @stable ICU 4.8 116 */ 117 UMSGPAT_PART_TYPE_MSG_LIMIT, 118 /** 119 * Indicates a substring of the pattern string which is to be skipped when formatting. 120 * For example, an apostrophe that begins or ends quoted text 121 * would be indicated with such a part. 122 * The value is undefined and currently always 0. 123 * @stable ICU 4.8 124 */ 125 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 126 /** 127 * Indicates that a syntax character needs to be inserted for auto-quoting. 128 * The length is 0. 129 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 130 * @stable ICU 4.8 131 */ 132 UMSGPAT_PART_TYPE_INSERT_CHAR, 133 /** 134 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 135 * When formatting, replace this part's substring with the 136 * (value-offset) for the plural argument value. 137 * The value is undefined and currently always 0. 138 * @stable ICU 4.8 139 */ 140 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 141 /** 142 * Start of an argument. 143 * The length is 1 for the '{'. 144 * The value is the ordinal value of the ArgType. Use getArgType(). 145 * <p> 146 * This part is followed by either an ARG_NUMBER or ARG_NAME, 147 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 148 * and finally an ARG_LIMIT part. 149 * @stable ICU 4.8 150 */ 151 UMSGPAT_PART_TYPE_ARG_START, 152 /** 153 * End of an argument. 154 * The length is 1 for the '}'. 155 * The value is the ordinal value of the ArgType. Use getArgType(). 156 * @stable ICU 4.8 157 */ 158 UMSGPAT_PART_TYPE_ARG_LIMIT, 159 /** 160 * The argument number, provided by the value. 161 * @stable ICU 4.8 162 */ 163 UMSGPAT_PART_TYPE_ARG_NUMBER, 164 /** 165 * The argument name. 166 * The value is undefined and currently always 0. 167 * @stable ICU 4.8 168 */ 169 UMSGPAT_PART_TYPE_ARG_NAME, 170 /** 171 * The argument type. 172 * The value is undefined and currently always 0. 173 * @stable ICU 4.8 174 */ 175 UMSGPAT_PART_TYPE_ARG_TYPE, 176 /** 177 * The argument style text. 178 * The value is undefined and currently always 0. 179 * @stable ICU 4.8 180 */ 181 UMSGPAT_PART_TYPE_ARG_STYLE, 182 /** 183 * A selector substring in a "complex" argument style. 184 * The value is undefined and currently always 0. 185 * @stable ICU 4.8 186 */ 187 UMSGPAT_PART_TYPE_ARG_SELECTOR, 188 /** 189 * An integer value, for example the offset or an explicit selector value 190 * in a PluralFormat style. 191 * The part value is the integer value. 192 * @stable ICU 4.8 193 */ 194 UMSGPAT_PART_TYPE_ARG_INT, 195 /** 196 * A numeric value, for example the offset or an explicit selector value 197 * in a PluralFormat style. 198 * The part value is an index into an internal array of numeric values; 199 * use getNumericValue(). 200 * @stable ICU 4.8 201 */ 202 UMSGPAT_PART_TYPE_ARG_DOUBLE 203 }; 204 /** 205 * @stable ICU 4.8 206 */ 207 typedef enum UMessagePatternPartType UMessagePatternPartType; 208 209 /** 210 * Argument type constants. 211 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 212 * 213 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 214 * with a nesting level one greater than the surrounding message. 215 * @stable ICU 4.8 216 */ 217 enum UMessagePatternArgType { 218 /** 219 * The argument has no specified type. 220 * @stable ICU 4.8 221 */ 222 UMSGPAT_ARG_TYPE_NONE, 223 /** 224 * The argument has a "simple" type which is provided by the ARG_TYPE part. 225 * An ARG_STYLE part might follow that. 226 * @stable ICU 4.8 227 */ 228 UMSGPAT_ARG_TYPE_SIMPLE, 229 /** 230 * The argument is a ChoiceFormat with one or more 231 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 232 * @stable ICU 4.8 233 */ 234 UMSGPAT_ARG_TYPE_CHOICE, 235 /** 236 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 237 * (e.g., offset:1) 238 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 239 * If the selector has an explicit value (e.g., =2), then 240 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 241 * Otherwise the message immediately follows the ARG_SELECTOR. 242 * @stable ICU 4.8 243 */ 244 UMSGPAT_ARG_TYPE_PLURAL, 245 /** 246 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 247 * @stable ICU 4.8 248 */ 249 UMSGPAT_ARG_TYPE_SELECT, 250 /** 251 * The argument is an ordinal-number PluralFormat 252 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 253 * @draft ICU 50 254 */ 255 UMSGPAT_ARG_TYPE_SELECTORDINAL 256 }; 257 /** 258 * @stable ICU 4.8 259 */ 260 typedef enum UMessagePatternArgType UMessagePatternArgType; 261 262 /** 263 * Returns TRUE if the argument type has a plural style part sequence and semantics, 264 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 265 * @draft ICU 50 266 */ 267 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 268 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 269 270 enum { 271 /** 272 * Return value from MessagePattern.validateArgumentName() for when 273 * the string is a valid "pattern identifier" but not a number. 274 * @stable ICU 4.8 275 */ 276 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 277 278 /** 279 * Return value from MessagePattern.validateArgumentName() for when 280 * the string is invalid. 281 * It might not be a valid "pattern identifier", 282 * or it have only ASCII digits but there is a leading zero or the number is too large. 283 * @stable ICU 4.8 284 */ 285 UMSGPAT_ARG_NAME_NOT_VALID=-2 286 }; 287 288 /** 289 * Special value that is returned by getNumericValue(Part) when no 290 * numeric value is defined for a part. 291 * @see MessagePattern.getNumericValue() 292 * @stable ICU 4.8 293 */ 294 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 295 296 U_NAMESPACE_BEGIN 297 298 class MessagePatternDoubleList; 299 class MessagePatternPartsList; 300 301 /** 302 * Parses and represents ICU MessageFormat patterns. 303 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 304 * Used in the implementations of those classes as well as in tools 305 * for message validation, translation and format conversion. 306 * <p> 307 * The parser handles all syntax relevant for identifying message arguments. 308 * This includes "complex" arguments whose style strings contain 309 * nested MessageFormat pattern substrings. 310 * For "simple" arguments (with no nested MessageFormat pattern substrings), 311 * the argument style is not parsed any further. 312 * <p> 313 * The parser handles named and numbered message arguments and allows both in one message. 314 * <p> 315 * Once a pattern has been parsed successfully, iterate through the parsed data 316 * with countParts(), getPart() and related methods. 317 * <p> 318 * The data logically represents a parse tree, but is stored and accessed 319 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 320 * Arguments and nested messages are best handled via recursion. 321 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 322 * the index of the corresponding _LIMIT "part". 323 * <p> 324 * List of "parts": 325 * <pre> 326 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 327 * argument = noneArg | simpleArg | complexArg 328 * complexArg = choiceArg | pluralArg | selectArg 329 * 330 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 331 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 332 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 333 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 334 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 335 * 336 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 337 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 338 * selectStyle = (ARG_SELECTOR message)+ 339 * </pre> 340 * <ul> 341 * <li>Literal output text is not represented directly by "parts" but accessed 342 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 343 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 344 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 345 * the less-than-or-equal-to sign (U+2264). 346 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 347 * The optional numeric Part between each (ARG_SELECTOR, message) pair 348 * is the value of an explicit-number selector like "=2", 349 * otherwise the selector is a non-numeric identifier. 350 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 351 * </ul> 352 * <p> 353 * This class is not intended for public subclassing. 354 * 355 * @stable ICU 4.8 356 */ 357 class U_COMMON_API MessagePattern : public UObject { 358 public: 359 /** 360 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 361 * @param errorCode Standard ICU error code. Its input value must 362 * pass the U_SUCCESS() test, or else the function returns 363 * immediately. Check for U_FAILURE() on output or use with 364 * function chaining. (See User Guide for details.) 365 * @stable ICU 4.8 366 */ 367 MessagePattern(UErrorCode &errorCode); 368 369 /** 370 * Constructs an empty MessagePattern. 371 * @param mode Explicit UMessagePatternApostropheMode. 372 * @param errorCode Standard ICU error code. Its input value must 373 * pass the U_SUCCESS() test, or else the function returns 374 * immediately. Check for U_FAILURE() on output or use with 375 * function chaining. (See User Guide for details.) 376 * @stable ICU 4.8 377 */ 378 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 379 380 /** 381 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 382 * parses the MessageFormat pattern string. 383 * @param pattern a MessageFormat pattern string 384 * @param parseError Struct to receive information on the position 385 * of an error within the pattern. 386 * Can be NULL. 387 * @param errorCode Standard ICU error code. Its input value must 388 * pass the U_SUCCESS() test, or else the function returns 389 * immediately. Check for U_FAILURE() on output or use with 390 * function chaining. (See User Guide for details.) 391 * TODO: turn @throws into UErrorCode specifics? 392 * @throws IllegalArgumentException for syntax errors in the pattern string 393 * @throws IndexOutOfBoundsException if certain limits are exceeded 394 * (e.g., argument number too high, argument name too long, etc.) 395 * @throws NumberFormatException if a number could not be parsed 396 * @stable ICU 4.8 397 */ 398 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 399 400 /** 401 * Copy constructor. 402 * @param other Object to copy. 403 * @stable ICU 4.8 404 */ 405 MessagePattern(const MessagePattern &other); 406 407 /** 408 * Assignment operator. 409 * @param other Object to copy. 410 * @return *this=other 411 * @stable ICU 4.8 412 */ 413 MessagePattern &operator=(const MessagePattern &other); 414 415 /** 416 * Destructor. 417 * @stable ICU 4.8 418 */ 419 virtual ~MessagePattern(); 420 421 /** 422 * Parses a MessageFormat pattern string. 423 * @param pattern a MessageFormat pattern string 424 * @param parseError Struct to receive information on the position 425 * of an error within the pattern. 426 * Can be NULL. 427 * @param errorCode Standard ICU error code. Its input value must 428 * pass the U_SUCCESS() test, or else the function returns 429 * immediately. Check for U_FAILURE() on output or use with 430 * function chaining. (See User Guide for details.) 431 * @return *this 432 * @throws IllegalArgumentException for syntax errors in the pattern string 433 * @throws IndexOutOfBoundsException if certain limits are exceeded 434 * (e.g., argument number too high, argument name too long, etc.) 435 * @throws NumberFormatException if a number could not be parsed 436 * @stable ICU 4.8 437 */ 438 MessagePattern &parse(const UnicodeString &pattern, 439 UParseError *parseError, UErrorCode &errorCode); 440 441 /** 442 * Parses a ChoiceFormat pattern string. 443 * @param pattern a ChoiceFormat pattern string 444 * @param parseError Struct to receive information on the position 445 * of an error within the pattern. 446 * Can be NULL. 447 * @param errorCode Standard ICU error code. Its input value must 448 * pass the U_SUCCESS() test, or else the function returns 449 * immediately. Check for U_FAILURE() on output or use with 450 * function chaining. (See User Guide for details.) 451 * @return *this 452 * @throws IllegalArgumentException for syntax errors in the pattern string 453 * @throws IndexOutOfBoundsException if certain limits are exceeded 454 * (e.g., argument number too high, argument name too long, etc.) 455 * @throws NumberFormatException if a number could not be parsed 456 * @stable ICU 4.8 457 */ 458 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 459 UParseError *parseError, UErrorCode &errorCode); 460 461 /** 462 * Parses a PluralFormat pattern string. 463 * @param pattern a PluralFormat pattern string 464 * @param parseError Struct to receive information on the position 465 * of an error within the pattern. 466 * Can be NULL. 467 * @param errorCode Standard ICU error code. Its input value must 468 * pass the U_SUCCESS() test, or else the function returns 469 * immediately. Check for U_FAILURE() on output or use with 470 * function chaining. (See User Guide for details.) 471 * @return *this 472 * @throws IllegalArgumentException for syntax errors in the pattern string 473 * @throws IndexOutOfBoundsException if certain limits are exceeded 474 * (e.g., argument number too high, argument name too long, etc.) 475 * @throws NumberFormatException if a number could not be parsed 476 * @stable ICU 4.8 477 */ 478 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 479 UParseError *parseError, UErrorCode &errorCode); 480 481 /** 482 * Parses a SelectFormat pattern string. 483 * @param pattern a SelectFormat pattern string 484 * @param parseError Struct to receive information on the position 485 * of an error within the pattern. 486 * Can be NULL. 487 * @param errorCode Standard ICU error code. Its input value must 488 * pass the U_SUCCESS() test, or else the function returns 489 * immediately. Check for U_FAILURE() on output or use with 490 * function chaining. (See User Guide for details.) 491 * @return *this 492 * @throws IllegalArgumentException for syntax errors in the pattern string 493 * @throws IndexOutOfBoundsException if certain limits are exceeded 494 * (e.g., argument number too high, argument name too long, etc.) 495 * @throws NumberFormatException if a number could not be parsed 496 * @stable ICU 4.8 497 */ 498 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 499 UParseError *parseError, UErrorCode &errorCode); 500 501 /** 502 * Clears this MessagePattern. 503 * countParts() will return 0. 504 * @stable ICU 4.8 505 */ 506 void clear(); 507 508 /** 509 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 510 * countParts() will return 0. 511 * @param mode The new UMessagePatternApostropheMode. 512 * @stable ICU 4.8 513 */ clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)514 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 515 clear(); 516 aposMode=mode; 517 } 518 519 /** 520 * @param other another object to compare with. 521 * @return TRUE if this object is equivalent to the other one. 522 * @stable ICU 4.8 523 */ 524 UBool operator==(const MessagePattern &other) const; 525 526 /** 527 * @param other another object to compare with. 528 * @return FALSE if this object is equivalent to the other one. 529 * @stable ICU 4.8 530 */ 531 inline UBool operator!=(const MessagePattern &other) const { 532 return !operator==(other); 533 } 534 535 /** 536 * @return A hash code for this object. 537 * @stable ICU 4.8 538 */ 539 int32_t hashCode() const; 540 541 /** 542 * @return this instance's UMessagePatternApostropheMode. 543 * @stable ICU 4.8 544 */ getApostropheMode()545 UMessagePatternApostropheMode getApostropheMode() const { 546 return aposMode; 547 } 548 549 // Java has package-private jdkAposMode() here. 550 // In C++, this is declared in the MessageImpl class. 551 552 /** 553 * @return the parsed pattern string (null if none was parsed). 554 * @stable ICU 4.8 555 */ getPatternString()556 const UnicodeString &getPatternString() const { 557 return msg; 558 } 559 560 /** 561 * Does the parsed pattern have named arguments like {first_name}? 562 * @return TRUE if the parsed pattern has at least one named argument. 563 * @stable ICU 4.8 564 */ hasNamedArguments()565 UBool hasNamedArguments() const { 566 return hasArgNames; 567 } 568 569 /** 570 * Does the parsed pattern have numbered arguments like {2}? 571 * @return TRUE if the parsed pattern has at least one numbered argument. 572 * @stable ICU 4.8 573 */ hasNumberedArguments()574 UBool hasNumberedArguments() const { 575 return hasArgNumbers; 576 } 577 578 /** 579 * Validates and parses an argument name or argument number string. 580 * An argument name must be a "pattern identifier", that is, it must contain 581 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 582 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 583 * @param name Input string. 584 * @return >=0 if the name is a valid number, 585 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 586 * ARG_NAME_NOT_VALID (-2) if it is neither. 587 * @stable ICU 4.8 588 */ 589 static int32_t validateArgumentName(const UnicodeString &name); 590 591 /** 592 * Returns a version of the parsed pattern string where each ASCII apostrophe 593 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 594 * <p> 595 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 596 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 597 * @return the deep-auto-quoted version of the parsed pattern string. 598 * @see MessageFormat.autoQuoteApostrophe() 599 * @stable ICU 4.8 600 */ 601 UnicodeString autoQuoteApostropheDeep() const; 602 603 class Part; 604 605 /** 606 * Returns the number of "parts" created by parsing the pattern string. 607 * Returns 0 if no pattern has been parsed or clear() was called. 608 * @return the number of pattern parts. 609 * @stable ICU 4.8 610 */ countParts()611 int32_t countParts() const { 612 return partsLength; 613 } 614 615 /** 616 * Gets the i-th pattern "part". 617 * @param i The index of the Part data. (0..countParts()-1) 618 * @return the i-th pattern "part". 619 * @stable ICU 4.8 620 */ getPart(int32_t i)621 const Part &getPart(int32_t i) const { 622 return parts[i]; 623 } 624 625 /** 626 * Returns the UMessagePatternPartType of the i-th pattern "part". 627 * Convenience method for getPart(i).getType(). 628 * @param i The index of the Part data. (0..countParts()-1) 629 * @return The UMessagePatternPartType of the i-th Part. 630 * @stable ICU 4.8 631 */ getPartType(int32_t i)632 UMessagePatternPartType getPartType(int32_t i) const { 633 return getPart(i).type; 634 } 635 636 /** 637 * Returns the pattern index of the specified pattern "part". 638 * Convenience method for getPart(partIndex).getIndex(). 639 * @param partIndex The index of the Part data. (0..countParts()-1) 640 * @return The pattern index of this Part. 641 * @stable ICU 4.8 642 */ getPatternIndex(int32_t partIndex)643 int32_t getPatternIndex(int32_t partIndex) const { 644 return getPart(partIndex).index; 645 } 646 647 /** 648 * Returns the substring of the pattern string indicated by the Part. 649 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 650 * @param part a part of this MessagePattern. 651 * @return the substring associated with part. 652 * @stable ICU 4.8 653 */ getSubstring(const Part & part)654 UnicodeString getSubstring(const Part &part) const { 655 return msg.tempSubString(part.index, part.length); 656 } 657 658 /** 659 * Compares the part's substring with the input string s. 660 * @param part a part of this MessagePattern. 661 * @param s a string. 662 * @return TRUE if getSubstring(part).equals(s). 663 * @stable ICU 4.8 664 */ partSubstringMatches(const Part & part,const UnicodeString & s)665 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 666 return 0==msg.compare(part.index, part.length, s); 667 } 668 669 /** 670 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 671 * @param part a part of this MessagePattern. 672 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 673 * @stable ICU 4.8 674 */ 675 double getNumericValue(const Part &part) const; 676 677 /** 678 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 679 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 680 * @return the "offset:" value. 681 * @stable ICU 4.8 682 */ 683 double getPluralOffset(int32_t pluralStart) const; 684 685 /** 686 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 687 * @param start The index of some Part data (0..countParts()-1); 688 * this Part should be of Type ARG_START or MSG_START. 689 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 690 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 691 * @stable ICU 4.8 692 */ getLimitPartIndex(int32_t start)693 int32_t getLimitPartIndex(int32_t start) const { 694 int32_t limit=getPart(start).limitPartIndex; 695 if(limit<start) { 696 return start; 697 } 698 return limit; 699 } 700 701 /** 702 * A message pattern "part", representing a pattern parsing event. 703 * There is a part for the start and end of a message or argument, 704 * for quoting and escaping of and with ASCII apostrophes, 705 * and for syntax elements of "complex" arguments. 706 * @stable ICU 4.8 707 */ 708 class Part : public UMemory { 709 public: 710 /** 711 * Default constructor, do not use. 712 * @internal 713 */ Part()714 Part() {} 715 716 /** 717 * Returns the type of this part. 718 * @return the part type. 719 * @stable ICU 4.8 720 */ getType()721 UMessagePatternPartType getType() const { 722 return type; 723 } 724 725 /** 726 * Returns the pattern string index associated with this Part. 727 * @return this part's pattern string index. 728 * @stable ICU 4.8 729 */ getIndex()730 int32_t getIndex() const { 731 return index; 732 } 733 734 /** 735 * Returns the length of the pattern substring associated with this Part. 736 * This is 0 for some parts. 737 * @return this part's pattern substring length. 738 * @stable ICU 4.8 739 */ getLength()740 int32_t getLength() const { 741 return length; 742 } 743 744 /** 745 * Returns the pattern string limit (exclusive-end) index associated with this Part. 746 * Convenience method for getIndex()+getLength(). 747 * @return this part's pattern string limit index, same as getIndex()+getLength(). 748 * @stable ICU 4.8 749 */ getLimit()750 int32_t getLimit() const { 751 return index+length; 752 } 753 754 /** 755 * Returns a value associated with this part. 756 * See the documentation of each part type for details. 757 * @return the part value. 758 * @stable ICU 4.8 759 */ getValue()760 int32_t getValue() const { 761 return value; 762 } 763 764 /** 765 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 766 * otherwise UMSGPAT_ARG_TYPE_NONE. 767 * @return the argument type for this part. 768 * @stable ICU 4.8 769 */ getArgType()770 UMessagePatternArgType getArgType() const { 771 UMessagePatternPartType type=getType(); 772 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 773 return (UMessagePatternArgType)value; 774 } else { 775 return UMSGPAT_ARG_TYPE_NONE; 776 } 777 } 778 779 /** 780 * Indicates whether the Part type has a numeric value. 781 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 782 * @param type The Part type to be tested. 783 * @return TRUE if the Part type has a numeric value. 784 * @stable ICU 4.8 785 */ hasNumericValue(UMessagePatternPartType type)786 static UBool hasNumericValue(UMessagePatternPartType type) { 787 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 788 } 789 790 /** 791 * @param other another object to compare with. 792 * @return TRUE if this object is equivalent to the other one. 793 * @stable ICU 4.8 794 */ 795 UBool operator==(const Part &other) const; 796 797 /** 798 * @param other another object to compare with. 799 * @return FALSE if this object is equivalent to the other one. 800 * @stable ICU 4.8 801 */ 802 inline UBool operator!=(const Part &other) const { 803 return !operator==(other); 804 } 805 806 /** 807 * @return A hash code for this object. 808 * @stable ICU 4.8 809 */ hashCode()810 int32_t hashCode() const { 811 return ((type*37+index)*37+length)*37+value; 812 } 813 814 private: 815 friend class MessagePattern; 816 817 static const int32_t MAX_LENGTH=0xffff; 818 static const int32_t MAX_VALUE=0x7fff; 819 820 // Some fields are not final because they are modified during pattern parsing. 821 // After pattern parsing, the parts are effectively immutable. 822 UMessagePatternPartType type; 823 int32_t index; 824 uint16_t length; 825 int16_t value; 826 int32_t limitPartIndex; 827 }; 828 829 private: 830 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 831 832 void postParse(); 833 834 int32_t parseMessage(int32_t index, int32_t msgStartLength, 835 int32_t nestingLevel, UMessagePatternArgType parentType, 836 UParseError *parseError, UErrorCode &errorCode); 837 838 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 839 UParseError *parseError, UErrorCode &errorCode); 840 841 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 842 843 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 844 UParseError *parseError, UErrorCode &errorCode); 845 846 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 847 UParseError *parseError, UErrorCode &errorCode); 848 849 /** 850 * Validates and parses an argument name or argument number string. 851 * This internal method assumes that the input substring is a "pattern identifier". 852 * @return >=0 if the name is a valid number, 853 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 854 * ARG_NAME_NOT_VALID (-2) if it is neither. 855 * @see #validateArgumentName(String) 856 */ 857 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 858 parseArgNumber(int32_t start,int32_t limit)859 int32_t parseArgNumber(int32_t start, int32_t limit) { 860 return parseArgNumber(msg, start, limit); 861 } 862 863 /** 864 * Parses a number from the specified message substring. 865 * @param start start index into the message string 866 * @param limit limit index into the message string, must be start<limit 867 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 868 * @param parseError 869 * @param errorCode 870 */ 871 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 872 UParseError *parseError, UErrorCode &errorCode); 873 874 // Java has package-private appendReducedApostrophes() here. 875 // In C++, this is declared in the MessageImpl class. 876 877 int32_t skipWhiteSpace(int32_t index); 878 879 int32_t skipIdentifier(int32_t index); 880 881 /** 882 * Skips a sequence of characters that could occur in a double value. 883 * Does not fully parse or validate the value. 884 */ 885 int32_t skipDouble(int32_t index); 886 887 static UBool isArgTypeChar(UChar32 c); 888 889 UBool isChoice(int32_t index); 890 891 UBool isPlural(int32_t index); 892 893 UBool isSelect(int32_t index); 894 895 UBool isOrdinal(int32_t index); 896 897 /** 898 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 899 * as opposed to inside a top-level choice/plural/select pattern. 900 */ 901 UBool inMessageFormatPattern(int32_t nestingLevel); 902 903 /** 904 * @return TRUE if we are in a MessageFormat sub-pattern 905 * of a top-level ChoiceFormat pattern. 906 */ 907 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 908 909 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 910 int32_t value, UErrorCode &errorCode); 911 912 void addLimitPart(int32_t start, 913 UMessagePatternPartType type, int32_t index, int32_t length, 914 int32_t value, UErrorCode &errorCode); 915 916 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 917 918 void setParseError(UParseError *parseError, int32_t index); 919 920 // No ICU "poor man's RTTI" for this class nor its subclasses. 921 virtual UClassID getDynamicClassID() const; 922 923 UBool init(UErrorCode &errorCode); 924 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 925 926 UMessagePatternApostropheMode aposMode; 927 UnicodeString msg; 928 // ArrayList<Part> parts=new ArrayList<Part>(); 929 MessagePatternPartsList *partsList; 930 Part *parts; 931 int32_t partsLength; 932 // ArrayList<Double> numericValues; 933 MessagePatternDoubleList *numericValuesList; 934 double *numericValues; 935 int32_t numericValuesLength; 936 UBool hasArgNames; 937 UBool hasArgNumbers; 938 UBool needsAutoQuoting; 939 }; 940 941 U_NAMESPACE_END 942 943 #endif // !UCONFIG_NO_FORMATTING 944 945 #endif // __MESSAGEPATTERN_H__ 946