1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2013, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: messagepattern.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2011mar14 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __MESSAGEPATTERN_H__ 18 #define __MESSAGEPATTERN_H__ 19 20 /** 21 * \file 22 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if !UCONFIG_NO_FORMATTING 28 29 #include "unicode/parseerr.h" 30 #include "unicode/unistr.h" 31 32 /** 33 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 34 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 35 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 36 * <p> 37 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 38 * even when the pair is between two single, text-quoting apostrophes. 39 * <p> 40 * The following table shows examples of desired MessageFormat.format() output 41 * with the pattern strings that yield that output. 42 * <p> 43 * <table> 44 * <tr> 45 * <th>Desired output</th> 46 * <th>DOUBLE_OPTIONAL</th> 47 * <th>DOUBLE_REQUIRED</th> 48 * </tr> 49 * <tr> 50 * <td>I see {many}</td> 51 * <td>I see '{many}'</td> 52 * <td>(same)</td> 53 * </tr> 54 * <tr> 55 * <td>I said {'Wow!'}</td> 56 * <td>I said '{''Wow!''}'</td> 57 * <td>(same)</td> 58 * </tr> 59 * <tr> 60 * <td>I don't know</td> 61 * <td>I don't know OR<br> I don''t know</td> 62 * <td>I don''t know</td> 63 * </tr> 64 * </table> 65 * @stable ICU 4.8 66 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 67 */ 68 enum UMessagePatternApostropheMode { 69 /** 70 * A literal apostrophe is represented by 71 * either a single or a double apostrophe pattern character. 72 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 73 * if it immediately precedes a curly brace {}, 74 * or a pipe symbol | if inside a choice format, 75 * or a pound symbol # if inside a plural format. 76 * <p> 77 * This is the default behavior starting with ICU 4.8. 78 * @stable ICU 4.8 79 */ 80 UMSGPAT_APOS_DOUBLE_OPTIONAL, 81 /** 82 * A literal apostrophe must be represented by 83 * a double apostrophe pattern character. 84 * A single apostrophe always starts quoted literal text. 85 * <p> 86 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 87 * @stable ICU 4.8 88 */ 89 UMSGPAT_APOS_DOUBLE_REQUIRED 90 }; 91 /** 92 * @stable ICU 4.8 93 */ 94 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 95 96 /** 97 * MessagePattern::Part type constants. 98 * @stable ICU 4.8 99 */ 100 enum UMessagePatternPartType { 101 /** 102 * Start of a message pattern (main or nested). 103 * The length is 0 for the top-level message 104 * and for a choice argument sub-message, otherwise 1 for the '{'. 105 * The value indicates the nesting level, starting with 0 for the main message. 106 * <p> 107 * There is always a later MSG_LIMIT part. 108 * @stable ICU 4.8 109 */ 110 UMSGPAT_PART_TYPE_MSG_START, 111 /** 112 * End of a message pattern (main or nested). 113 * The length is 0 for the top-level message and 114 * the last sub-message of a choice argument, 115 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 116 * The value indicates the nesting level, starting with 0 for the main message. 117 * @stable ICU 4.8 118 */ 119 UMSGPAT_PART_TYPE_MSG_LIMIT, 120 /** 121 * Indicates a substring of the pattern string which is to be skipped when formatting. 122 * For example, an apostrophe that begins or ends quoted text 123 * would be indicated with such a part. 124 * The value is undefined and currently always 0. 125 * @stable ICU 4.8 126 */ 127 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 128 /** 129 * Indicates that a syntax character needs to be inserted for auto-quoting. 130 * The length is 0. 131 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 132 * @stable ICU 4.8 133 */ 134 UMSGPAT_PART_TYPE_INSERT_CHAR, 135 /** 136 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 137 * When formatting, replace this part's substring with the 138 * (value-offset) for the plural argument value. 139 * The value is undefined and currently always 0. 140 * @stable ICU 4.8 141 */ 142 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 143 /** 144 * Start of an argument. 145 * The length is 1 for the '{'. 146 * The value is the ordinal value of the ArgType. Use getArgType(). 147 * <p> 148 * This part is followed by either an ARG_NUMBER or ARG_NAME, 149 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 150 * and finally an ARG_LIMIT part. 151 * @stable ICU 4.8 152 */ 153 UMSGPAT_PART_TYPE_ARG_START, 154 /** 155 * End of an argument. 156 * The length is 1 for the '}'. 157 * The value is the ordinal value of the ArgType. Use getArgType(). 158 * @stable ICU 4.8 159 */ 160 UMSGPAT_PART_TYPE_ARG_LIMIT, 161 /** 162 * The argument number, provided by the value. 163 * @stable ICU 4.8 164 */ 165 UMSGPAT_PART_TYPE_ARG_NUMBER, 166 /** 167 * The argument name. 168 * The value is undefined and currently always 0. 169 * @stable ICU 4.8 170 */ 171 UMSGPAT_PART_TYPE_ARG_NAME, 172 /** 173 * The argument type. 174 * The value is undefined and currently always 0. 175 * @stable ICU 4.8 176 */ 177 UMSGPAT_PART_TYPE_ARG_TYPE, 178 /** 179 * The argument style text. 180 * The value is undefined and currently always 0. 181 * @stable ICU 4.8 182 */ 183 UMSGPAT_PART_TYPE_ARG_STYLE, 184 /** 185 * A selector substring in a "complex" argument style. 186 * The value is undefined and currently always 0. 187 * @stable ICU 4.8 188 */ 189 UMSGPAT_PART_TYPE_ARG_SELECTOR, 190 /** 191 * An integer value, for example the offset or an explicit selector value 192 * in a PluralFormat style. 193 * The part value is the integer value. 194 * @stable ICU 4.8 195 */ 196 UMSGPAT_PART_TYPE_ARG_INT, 197 /** 198 * A numeric value, for example the offset or an explicit selector value 199 * in a PluralFormat style. 200 * The part value is an index into an internal array of numeric values; 201 * use getNumericValue(). 202 * @stable ICU 4.8 203 */ 204 UMSGPAT_PART_TYPE_ARG_DOUBLE 205 }; 206 /** 207 * @stable ICU 4.8 208 */ 209 typedef enum UMessagePatternPartType UMessagePatternPartType; 210 211 /** 212 * Argument type constants. 213 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 214 * 215 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 216 * with a nesting level one greater than the surrounding message. 217 * @stable ICU 4.8 218 */ 219 enum UMessagePatternArgType { 220 /** 221 * The argument has no specified type. 222 * @stable ICU 4.8 223 */ 224 UMSGPAT_ARG_TYPE_NONE, 225 /** 226 * The argument has a "simple" type which is provided by the ARG_TYPE part. 227 * An ARG_STYLE part might follow that. 228 * @stable ICU 4.8 229 */ 230 UMSGPAT_ARG_TYPE_SIMPLE, 231 /** 232 * The argument is a ChoiceFormat with one or more 233 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 234 * @stable ICU 4.8 235 */ 236 UMSGPAT_ARG_TYPE_CHOICE, 237 /** 238 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 239 * (e.g., offset:1) 240 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 241 * If the selector has an explicit value (e.g., =2), then 242 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 243 * Otherwise the message immediately follows the ARG_SELECTOR. 244 * @stable ICU 4.8 245 */ 246 UMSGPAT_ARG_TYPE_PLURAL, 247 /** 248 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 249 * @stable ICU 4.8 250 */ 251 UMSGPAT_ARG_TYPE_SELECT, 252 /** 253 * The argument is an ordinal-number PluralFormat 254 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 255 * @stable ICU 50 256 */ 257 UMSGPAT_ARG_TYPE_SELECTORDINAL 258 }; 259 /** 260 * @stable ICU 4.8 261 */ 262 typedef enum UMessagePatternArgType UMessagePatternArgType; 263 264 /** 265 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE 266 * Returns TRUE if the argument type has a plural style part sequence and semantics, 267 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 268 * @stable ICU 50 269 */ 270 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 271 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 272 273 enum { 274 /** 275 * Return value from MessagePattern.validateArgumentName() for when 276 * the string is a valid "pattern identifier" but not a number. 277 * @stable ICU 4.8 278 */ 279 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 280 281 /** 282 * Return value from MessagePattern.validateArgumentName() for when 283 * the string is invalid. 284 * It might not be a valid "pattern identifier", 285 * or it have only ASCII digits but there is a leading zero or the number is too large. 286 * @stable ICU 4.8 287 */ 288 UMSGPAT_ARG_NAME_NOT_VALID=-2 289 }; 290 291 /** 292 * Special value that is returned by getNumericValue(Part) when no 293 * numeric value is defined for a part. 294 * @see MessagePattern.getNumericValue() 295 * @stable ICU 4.8 296 */ 297 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 298 299 U_NAMESPACE_BEGIN 300 301 class MessagePatternDoubleList; 302 class MessagePatternPartsList; 303 304 /** 305 * Parses and represents ICU MessageFormat patterns. 306 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 307 * Used in the implementations of those classes as well as in tools 308 * for message validation, translation and format conversion. 309 * <p> 310 * The parser handles all syntax relevant for identifying message arguments. 311 * This includes "complex" arguments whose style strings contain 312 * nested MessageFormat pattern substrings. 313 * For "simple" arguments (with no nested MessageFormat pattern substrings), 314 * the argument style is not parsed any further. 315 * <p> 316 * The parser handles named and numbered message arguments and allows both in one message. 317 * <p> 318 * Once a pattern has been parsed successfully, iterate through the parsed data 319 * with countParts(), getPart() and related methods. 320 * <p> 321 * The data logically represents a parse tree, but is stored and accessed 322 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 323 * Arguments and nested messages are best handled via recursion. 324 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 325 * the index of the corresponding _LIMIT "part". 326 * <p> 327 * List of "parts": 328 * <pre> 329 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 330 * argument = noneArg | simpleArg | complexArg 331 * complexArg = choiceArg | pluralArg | selectArg 332 * 333 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 334 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 335 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 336 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 337 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 338 * 339 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 340 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 341 * selectStyle = (ARG_SELECTOR message)+ 342 * </pre> 343 * <ul> 344 * <li>Literal output text is not represented directly by "parts" but accessed 345 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 346 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 347 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 348 * the less-than-or-equal-to sign (U+2264). 349 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 350 * The optional numeric Part between each (ARG_SELECTOR, message) pair 351 * is the value of an explicit-number selector like "=2", 352 * otherwise the selector is a non-numeric identifier. 353 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 354 * </ul> 355 * <p> 356 * This class is not intended for public subclassing. 357 * 358 * @stable ICU 4.8 359 */ 360 class U_COMMON_API MessagePattern : public UObject { 361 public: 362 /** 363 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 364 * @param errorCode Standard ICU error code. Its input value must 365 * pass the U_SUCCESS() test, or else the function returns 366 * immediately. Check for U_FAILURE() on output or use with 367 * function chaining. (See User Guide for details.) 368 * @stable ICU 4.8 369 */ 370 MessagePattern(UErrorCode &errorCode); 371 372 /** 373 * Constructs an empty MessagePattern. 374 * @param mode Explicit UMessagePatternApostropheMode. 375 * @param errorCode Standard ICU error code. Its input value must 376 * pass the U_SUCCESS() test, or else the function returns 377 * immediately. Check for U_FAILURE() on output or use with 378 * function chaining. (See User Guide for details.) 379 * @stable ICU 4.8 380 */ 381 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 382 383 /** 384 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 385 * parses the MessageFormat pattern string. 386 * @param pattern a MessageFormat pattern string 387 * @param parseError Struct to receive information on the position 388 * of an error within the pattern. 389 * Can be NULL. 390 * @param errorCode Standard ICU error code. Its input value must 391 * pass the U_SUCCESS() test, or else the function returns 392 * immediately. Check for U_FAILURE() on output or use with 393 * function chaining. (See User Guide for details.) 394 * TODO: turn @throws into UErrorCode specifics? 395 * @throws IllegalArgumentException for syntax errors in the pattern string 396 * @throws IndexOutOfBoundsException if certain limits are exceeded 397 * (e.g., argument number too high, argument name too long, etc.) 398 * @throws NumberFormatException if a number could not be parsed 399 * @stable ICU 4.8 400 */ 401 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 402 403 /** 404 * Copy constructor. 405 * @param other Object to copy. 406 * @stable ICU 4.8 407 */ 408 MessagePattern(const MessagePattern &other); 409 410 /** 411 * Assignment operator. 412 * @param other Object to copy. 413 * @return *this=other 414 * @stable ICU 4.8 415 */ 416 MessagePattern &operator=(const MessagePattern &other); 417 418 /** 419 * Destructor. 420 * @stable ICU 4.8 421 */ 422 virtual ~MessagePattern(); 423 424 /** 425 * Parses a MessageFormat pattern string. 426 * @param pattern a MessageFormat pattern string 427 * @param parseError Struct to receive information on the position 428 * of an error within the pattern. 429 * Can be NULL. 430 * @param errorCode Standard ICU error code. Its input value must 431 * pass the U_SUCCESS() test, or else the function returns 432 * immediately. Check for U_FAILURE() on output or use with 433 * function chaining. (See User Guide for details.) 434 * @return *this 435 * @throws IllegalArgumentException for syntax errors in the pattern string 436 * @throws IndexOutOfBoundsException if certain limits are exceeded 437 * (e.g., argument number too high, argument name too long, etc.) 438 * @throws NumberFormatException if a number could not be parsed 439 * @stable ICU 4.8 440 */ 441 MessagePattern &parse(const UnicodeString &pattern, 442 UParseError *parseError, UErrorCode &errorCode); 443 444 /** 445 * Parses a ChoiceFormat pattern string. 446 * @param pattern a ChoiceFormat pattern string 447 * @param parseError Struct to receive information on the position 448 * of an error within the pattern. 449 * Can be NULL. 450 * @param errorCode Standard ICU error code. Its input value must 451 * pass the U_SUCCESS() test, or else the function returns 452 * immediately. Check for U_FAILURE() on output or use with 453 * function chaining. (See User Guide for details.) 454 * @return *this 455 * @throws IllegalArgumentException for syntax errors in the pattern string 456 * @throws IndexOutOfBoundsException if certain limits are exceeded 457 * (e.g., argument number too high, argument name too long, etc.) 458 * @throws NumberFormatException if a number could not be parsed 459 * @stable ICU 4.8 460 */ 461 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 462 UParseError *parseError, UErrorCode &errorCode); 463 464 /** 465 * Parses a PluralFormat pattern string. 466 * @param pattern a PluralFormat pattern string 467 * @param parseError Struct to receive information on the position 468 * of an error within the pattern. 469 * Can be NULL. 470 * @param errorCode Standard ICU error code. Its input value must 471 * pass the U_SUCCESS() test, or else the function returns 472 * immediately. Check for U_FAILURE() on output or use with 473 * function chaining. (See User Guide for details.) 474 * @return *this 475 * @throws IllegalArgumentException for syntax errors in the pattern string 476 * @throws IndexOutOfBoundsException if certain limits are exceeded 477 * (e.g., argument number too high, argument name too long, etc.) 478 * @throws NumberFormatException if a number could not be parsed 479 * @stable ICU 4.8 480 */ 481 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 482 UParseError *parseError, UErrorCode &errorCode); 483 484 /** 485 * Parses a SelectFormat pattern string. 486 * @param pattern a SelectFormat pattern string 487 * @param parseError Struct to receive information on the position 488 * of an error within the pattern. 489 * Can be NULL. 490 * @param errorCode Standard ICU error code. Its input value must 491 * pass the U_SUCCESS() test, or else the function returns 492 * immediately. Check for U_FAILURE() on output or use with 493 * function chaining. (See User Guide for details.) 494 * @return *this 495 * @throws IllegalArgumentException for syntax errors in the pattern string 496 * @throws IndexOutOfBoundsException if certain limits are exceeded 497 * (e.g., argument number too high, argument name too long, etc.) 498 * @throws NumberFormatException if a number could not be parsed 499 * @stable ICU 4.8 500 */ 501 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 502 UParseError *parseError, UErrorCode &errorCode); 503 504 /** 505 * Clears this MessagePattern. 506 * countParts() will return 0. 507 * @stable ICU 4.8 508 */ 509 void clear(); 510 511 /** 512 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 513 * countParts() will return 0. 514 * @param mode The new UMessagePatternApostropheMode. 515 * @stable ICU 4.8 516 */ clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)517 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 518 clear(); 519 aposMode=mode; 520 } 521 522 /** 523 * @param other another object to compare with. 524 * @return TRUE if this object is equivalent to the other one. 525 * @stable ICU 4.8 526 */ 527 UBool operator==(const MessagePattern &other) const; 528 529 /** 530 * @param other another object to compare with. 531 * @return FALSE if this object is equivalent to the other one. 532 * @stable ICU 4.8 533 */ 534 inline UBool operator!=(const MessagePattern &other) const { 535 return !operator==(other); 536 } 537 538 /** 539 * @return A hash code for this object. 540 * @stable ICU 4.8 541 */ 542 int32_t hashCode() const; 543 544 /** 545 * @return this instance's UMessagePatternApostropheMode. 546 * @stable ICU 4.8 547 */ getApostropheMode()548 UMessagePatternApostropheMode getApostropheMode() const { 549 return aposMode; 550 } 551 552 // Java has package-private jdkAposMode() here. 553 // In C++, this is declared in the MessageImpl class. 554 555 /** 556 * @return the parsed pattern string (null if none was parsed). 557 * @stable ICU 4.8 558 */ getPatternString()559 const UnicodeString &getPatternString() const { 560 return msg; 561 } 562 563 /** 564 * Does the parsed pattern have named arguments like {first_name}? 565 * @return TRUE if the parsed pattern has at least one named argument. 566 * @stable ICU 4.8 567 */ hasNamedArguments()568 UBool hasNamedArguments() const { 569 return hasArgNames; 570 } 571 572 /** 573 * Does the parsed pattern have numbered arguments like {2}? 574 * @return TRUE if the parsed pattern has at least one numbered argument. 575 * @stable ICU 4.8 576 */ hasNumberedArguments()577 UBool hasNumberedArguments() const { 578 return hasArgNumbers; 579 } 580 581 /** 582 * Validates and parses an argument name or argument number string. 583 * An argument name must be a "pattern identifier", that is, it must contain 584 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 585 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 586 * @param name Input string. 587 * @return >=0 if the name is a valid number, 588 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 589 * ARG_NAME_NOT_VALID (-2) if it is neither. 590 * @stable ICU 4.8 591 */ 592 static int32_t validateArgumentName(const UnicodeString &name); 593 594 /** 595 * Returns a version of the parsed pattern string where each ASCII apostrophe 596 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 597 * <p> 598 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 599 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 600 * @return the deep-auto-quoted version of the parsed pattern string. 601 * @see MessageFormat.autoQuoteApostrophe() 602 * @stable ICU 4.8 603 */ 604 UnicodeString autoQuoteApostropheDeep() const; 605 606 class Part; 607 608 /** 609 * Returns the number of "parts" created by parsing the pattern string. 610 * Returns 0 if no pattern has been parsed or clear() was called. 611 * @return the number of pattern parts. 612 * @stable ICU 4.8 613 */ countParts()614 int32_t countParts() const { 615 return partsLength; 616 } 617 618 /** 619 * Gets the i-th pattern "part". 620 * @param i The index of the Part data. (0..countParts()-1) 621 * @return the i-th pattern "part". 622 * @stable ICU 4.8 623 */ getPart(int32_t i)624 const Part &getPart(int32_t i) const { 625 return parts[i]; 626 } 627 628 /** 629 * Returns the UMessagePatternPartType of the i-th pattern "part". 630 * Convenience method for getPart(i).getType(). 631 * @param i The index of the Part data. (0..countParts()-1) 632 * @return The UMessagePatternPartType of the i-th Part. 633 * @stable ICU 4.8 634 */ getPartType(int32_t i)635 UMessagePatternPartType getPartType(int32_t i) const { 636 return getPart(i).type; 637 } 638 639 /** 640 * Returns the pattern index of the specified pattern "part". 641 * Convenience method for getPart(partIndex).getIndex(). 642 * @param partIndex The index of the Part data. (0..countParts()-1) 643 * @return The pattern index of this Part. 644 * @stable ICU 4.8 645 */ getPatternIndex(int32_t partIndex)646 int32_t getPatternIndex(int32_t partIndex) const { 647 return getPart(partIndex).index; 648 } 649 650 /** 651 * Returns the substring of the pattern string indicated by the Part. 652 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 653 * @param part a part of this MessagePattern. 654 * @return the substring associated with part. 655 * @stable ICU 4.8 656 */ getSubstring(const Part & part)657 UnicodeString getSubstring(const Part &part) const { 658 return msg.tempSubString(part.index, part.length); 659 } 660 661 /** 662 * Compares the part's substring with the input string s. 663 * @param part a part of this MessagePattern. 664 * @param s a string. 665 * @return TRUE if getSubstring(part).equals(s). 666 * @stable ICU 4.8 667 */ partSubstringMatches(const Part & part,const UnicodeString & s)668 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 669 return 0==msg.compare(part.index, part.length, s); 670 } 671 672 /** 673 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 674 * @param part a part of this MessagePattern. 675 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 676 * @stable ICU 4.8 677 */ 678 double getNumericValue(const Part &part) const; 679 680 /** 681 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 682 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 683 * @return the "offset:" value. 684 * @stable ICU 4.8 685 */ 686 double getPluralOffset(int32_t pluralStart) const; 687 688 /** 689 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 690 * @param start The index of some Part data (0..countParts()-1); 691 * this Part should be of Type ARG_START or MSG_START. 692 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 693 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 694 * @stable ICU 4.8 695 */ getLimitPartIndex(int32_t start)696 int32_t getLimitPartIndex(int32_t start) const { 697 int32_t limit=getPart(start).limitPartIndex; 698 if(limit<start) { 699 return start; 700 } 701 return limit; 702 } 703 704 /** 705 * A message pattern "part", representing a pattern parsing event. 706 * There is a part for the start and end of a message or argument, 707 * for quoting and escaping of and with ASCII apostrophes, 708 * and for syntax elements of "complex" arguments. 709 * @stable ICU 4.8 710 */ 711 class Part : public UMemory { 712 public: 713 /** 714 * Default constructor, do not use. 715 * @internal 716 */ Part()717 Part() {} 718 719 /** 720 * Returns the type of this part. 721 * @return the part type. 722 * @stable ICU 4.8 723 */ getType()724 UMessagePatternPartType getType() const { 725 return type; 726 } 727 728 /** 729 * Returns the pattern string index associated with this Part. 730 * @return this part's pattern string index. 731 * @stable ICU 4.8 732 */ getIndex()733 int32_t getIndex() const { 734 return index; 735 } 736 737 /** 738 * Returns the length of the pattern substring associated with this Part. 739 * This is 0 for some parts. 740 * @return this part's pattern substring length. 741 * @stable ICU 4.8 742 */ getLength()743 int32_t getLength() const { 744 return length; 745 } 746 747 /** 748 * Returns the pattern string limit (exclusive-end) index associated with this Part. 749 * Convenience method for getIndex()+getLength(). 750 * @return this part's pattern string limit index, same as getIndex()+getLength(). 751 * @stable ICU 4.8 752 */ getLimit()753 int32_t getLimit() const { 754 return index+length; 755 } 756 757 /** 758 * Returns a value associated with this part. 759 * See the documentation of each part type for details. 760 * @return the part value. 761 * @stable ICU 4.8 762 */ getValue()763 int32_t getValue() const { 764 return value; 765 } 766 767 /** 768 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 769 * otherwise UMSGPAT_ARG_TYPE_NONE. 770 * @return the argument type for this part. 771 * @stable ICU 4.8 772 */ getArgType()773 UMessagePatternArgType getArgType() const { 774 UMessagePatternPartType type=getType(); 775 if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 776 return (UMessagePatternArgType)value; 777 } else { 778 return UMSGPAT_ARG_TYPE_NONE; 779 } 780 } 781 782 /** 783 * Indicates whether the Part type has a numeric value. 784 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 785 * @param type The Part type to be tested. 786 * @return TRUE if the Part type has a numeric value. 787 * @stable ICU 4.8 788 */ hasNumericValue(UMessagePatternPartType type)789 static UBool hasNumericValue(UMessagePatternPartType type) { 790 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 791 } 792 793 /** 794 * @param other another object to compare with. 795 * @return TRUE if this object is equivalent to the other one. 796 * @stable ICU 4.8 797 */ 798 UBool operator==(const Part &other) const; 799 800 /** 801 * @param other another object to compare with. 802 * @return FALSE if this object is equivalent to the other one. 803 * @stable ICU 4.8 804 */ 805 inline UBool operator!=(const Part &other) const { 806 return !operator==(other); 807 } 808 809 /** 810 * @return A hash code for this object. 811 * @stable ICU 4.8 812 */ hashCode()813 int32_t hashCode() const { 814 return ((type*37+index)*37+length)*37+value; 815 } 816 817 private: 818 friend class MessagePattern; 819 820 static const int32_t MAX_LENGTH=0xffff; 821 static const int32_t MAX_VALUE=0x7fff; 822 823 // Some fields are not final because they are modified during pattern parsing. 824 // After pattern parsing, the parts are effectively immutable. 825 UMessagePatternPartType type; 826 int32_t index; 827 uint16_t length; 828 int16_t value; 829 int32_t limitPartIndex; 830 }; 831 832 private: 833 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 834 835 void postParse(); 836 837 int32_t parseMessage(int32_t index, int32_t msgStartLength, 838 int32_t nestingLevel, UMessagePatternArgType parentType, 839 UParseError *parseError, UErrorCode &errorCode); 840 841 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 842 UParseError *parseError, UErrorCode &errorCode); 843 844 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 845 846 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 847 UParseError *parseError, UErrorCode &errorCode); 848 849 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 850 UParseError *parseError, UErrorCode &errorCode); 851 852 /** 853 * Validates and parses an argument name or argument number string. 854 * This internal method assumes that the input substring is a "pattern identifier". 855 * @return >=0 if the name is a valid number, 856 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 857 * ARG_NAME_NOT_VALID (-2) if it is neither. 858 * @see #validateArgumentName(String) 859 */ 860 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 861 parseArgNumber(int32_t start,int32_t limit)862 int32_t parseArgNumber(int32_t start, int32_t limit) { 863 return parseArgNumber(msg, start, limit); 864 } 865 866 /** 867 * Parses a number from the specified message substring. 868 * @param start start index into the message string 869 * @param limit limit index into the message string, must be start<limit 870 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 871 * @param parseError 872 * @param errorCode 873 */ 874 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 875 UParseError *parseError, UErrorCode &errorCode); 876 877 // Java has package-private appendReducedApostrophes() here. 878 // In C++, this is declared in the MessageImpl class. 879 880 int32_t skipWhiteSpace(int32_t index); 881 882 int32_t skipIdentifier(int32_t index); 883 884 /** 885 * Skips a sequence of characters that could occur in a double value. 886 * Does not fully parse or validate the value. 887 */ 888 int32_t skipDouble(int32_t index); 889 890 static UBool isArgTypeChar(UChar32 c); 891 892 UBool isChoice(int32_t index); 893 894 UBool isPlural(int32_t index); 895 896 UBool isSelect(int32_t index); 897 898 UBool isOrdinal(int32_t index); 899 900 /** 901 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 902 * as opposed to inside a top-level choice/plural/select pattern. 903 */ 904 UBool inMessageFormatPattern(int32_t nestingLevel); 905 906 /** 907 * @return TRUE if we are in a MessageFormat sub-pattern 908 * of a top-level ChoiceFormat pattern. 909 */ 910 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 911 912 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 913 int32_t value, UErrorCode &errorCode); 914 915 void addLimitPart(int32_t start, 916 UMessagePatternPartType type, int32_t index, int32_t length, 917 int32_t value, UErrorCode &errorCode); 918 919 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 920 921 void setParseError(UParseError *parseError, int32_t index); 922 923 UBool init(UErrorCode &errorCode); 924 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 925 926 UMessagePatternApostropheMode aposMode; 927 UnicodeString msg; 928 // ArrayList<Part> parts=new ArrayList<Part>(); 929 MessagePatternPartsList *partsList; 930 Part *parts; 931 int32_t partsLength; 932 // ArrayList<Double> numericValues; 933 MessagePatternDoubleList *numericValuesList; 934 double *numericValues; 935 int32_t numericValuesLength; 936 UBool hasArgNames; 937 UBool hasArgNumbers; 938 UBool needsAutoQuoting; 939 }; 940 941 U_NAMESPACE_END 942 943 #endif // !UCONFIG_NO_FORMATTING 944 945 #endif // __MESSAGEPATTERN_H__ 946