1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2013, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: messagepattern.h 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2011mar14 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __MESSAGEPATTERN_H__ 18 #define __MESSAGEPATTERN_H__ 19 20 /** 21 * \file 22 * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns. 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if U_SHOW_CPLUSPLUS_API 28 29 #if !UCONFIG_NO_FORMATTING 30 31 #include "unicode/parseerr.h" 32 #include "unicode/unistr.h" 33 34 /** 35 * Mode for when an apostrophe starts quoted literal text for MessageFormat output. 36 * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h 37 * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE). 38 * <p> 39 * A pair of adjacent apostrophes always results in a single apostrophe in the output, 40 * even when the pair is between two single, text-quoting apostrophes. 41 * <p> 42 * The following table shows examples of desired MessageFormat.format() output 43 * with the pattern strings that yield that output. 44 * <p> 45 * <table> 46 * <tr> 47 * <th>Desired output</th> 48 * <th>DOUBLE_OPTIONAL</th> 49 * <th>DOUBLE_REQUIRED</th> 50 * </tr> 51 * <tr> 52 * <td>I see {many}</td> 53 * <td>I see '{many}'</td> 54 * <td>(same)</td> 55 * </tr> 56 * <tr> 57 * <td>I said {'Wow!'}</td> 58 * <td>I said '{''Wow!''}'</td> 59 * <td>(same)</td> 60 * </tr> 61 * <tr> 62 * <td>I don't know</td> 63 * <td>I don't know OR<br> I don''t know</td> 64 * <td>I don''t know</td> 65 * </tr> 66 * </table> 67 * @stable ICU 4.8 68 * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE 69 */ 70 enum UMessagePatternApostropheMode { 71 /** 72 * A literal apostrophe is represented by 73 * either a single or a double apostrophe pattern character. 74 * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text 75 * if it immediately precedes a curly brace {}, 76 * or a pipe symbol | if inside a choice format, 77 * or a pound symbol # if inside a plural format. 78 * <p> 79 * This is the default behavior starting with ICU 4.8. 80 * @stable ICU 4.8 81 */ 82 UMSGPAT_APOS_DOUBLE_OPTIONAL, 83 /** 84 * A literal apostrophe must be represented by 85 * a double apostrophe pattern character. 86 * A single apostrophe always starts quoted literal text. 87 * <p> 88 * This is the behavior of ICU 4.6 and earlier, and of the JDK. 89 * @stable ICU 4.8 90 */ 91 UMSGPAT_APOS_DOUBLE_REQUIRED 92 }; 93 /** 94 * @stable ICU 4.8 95 */ 96 typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode; 97 98 /** 99 * MessagePattern::Part type constants. 100 * @stable ICU 4.8 101 */ 102 enum UMessagePatternPartType { 103 /** 104 * Start of a message pattern (main or nested). 105 * The length is 0 for the top-level message 106 * and for a choice argument sub-message, otherwise 1 for the '{'. 107 * The value indicates the nesting level, starting with 0 for the main message. 108 * <p> 109 * There is always a later MSG_LIMIT part. 110 * @stable ICU 4.8 111 */ 112 UMSGPAT_PART_TYPE_MSG_START, 113 /** 114 * End of a message pattern (main or nested). 115 * The length is 0 for the top-level message and 116 * the last sub-message of a choice argument, 117 * otherwise 1 for the '}' or (in a choice argument style) the '|'. 118 * The value indicates the nesting level, starting with 0 for the main message. 119 * @stable ICU 4.8 120 */ 121 UMSGPAT_PART_TYPE_MSG_LIMIT, 122 /** 123 * Indicates a substring of the pattern string which is to be skipped when formatting. 124 * For example, an apostrophe that begins or ends quoted text 125 * would be indicated with such a part. 126 * The value is undefined and currently always 0. 127 * @stable ICU 4.8 128 */ 129 UMSGPAT_PART_TYPE_SKIP_SYNTAX, 130 /** 131 * Indicates that a syntax character needs to be inserted for auto-quoting. 132 * The length is 0. 133 * The value is the character code of the insertion character. (U+0027=APOSTROPHE) 134 * @stable ICU 4.8 135 */ 136 UMSGPAT_PART_TYPE_INSERT_CHAR, 137 /** 138 * Indicates a syntactic (non-escaped) # symbol in a plural variant. 139 * When formatting, replace this part's substring with the 140 * (value-offset) for the plural argument value. 141 * The value is undefined and currently always 0. 142 * @stable ICU 4.8 143 */ 144 UMSGPAT_PART_TYPE_REPLACE_NUMBER, 145 /** 146 * Start of an argument. 147 * The length is 1 for the '{'. 148 * The value is the ordinal value of the ArgType. Use getArgType(). 149 * <p> 150 * This part is followed by either an ARG_NUMBER or ARG_NAME, 151 * followed by optional argument sub-parts (see UMessagePatternArgType constants) 152 * and finally an ARG_LIMIT part. 153 * @stable ICU 4.8 154 */ 155 UMSGPAT_PART_TYPE_ARG_START, 156 /** 157 * End of an argument. 158 * The length is 1 for the '}'. 159 * The value is the ordinal value of the ArgType. Use getArgType(). 160 * @stable ICU 4.8 161 */ 162 UMSGPAT_PART_TYPE_ARG_LIMIT, 163 /** 164 * The argument number, provided by the value. 165 * @stable ICU 4.8 166 */ 167 UMSGPAT_PART_TYPE_ARG_NUMBER, 168 /** 169 * The argument name. 170 * The value is undefined and currently always 0. 171 * @stable ICU 4.8 172 */ 173 UMSGPAT_PART_TYPE_ARG_NAME, 174 /** 175 * The argument type. 176 * The value is undefined and currently always 0. 177 * @stable ICU 4.8 178 */ 179 UMSGPAT_PART_TYPE_ARG_TYPE, 180 /** 181 * The argument style text. 182 * The value is undefined and currently always 0. 183 * @stable ICU 4.8 184 */ 185 UMSGPAT_PART_TYPE_ARG_STYLE, 186 /** 187 * A selector substring in a "complex" argument style. 188 * The value is undefined and currently always 0. 189 * @stable ICU 4.8 190 */ 191 UMSGPAT_PART_TYPE_ARG_SELECTOR, 192 /** 193 * An integer value, for example the offset or an explicit selector value 194 * in a PluralFormat style. 195 * The part value is the integer value. 196 * @stable ICU 4.8 197 */ 198 UMSGPAT_PART_TYPE_ARG_INT, 199 /** 200 * A numeric value, for example the offset or an explicit selector value 201 * in a PluralFormat style. 202 * The part value is an index into an internal array of numeric values; 203 * use getNumericValue(). 204 * @stable ICU 4.8 205 */ 206 UMSGPAT_PART_TYPE_ARG_DOUBLE 207 }; 208 /** 209 * @stable ICU 4.8 210 */ 211 typedef enum UMessagePatternPartType UMessagePatternPartType; 212 213 /** 214 * Argument type constants. 215 * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts. 216 * 217 * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT, 218 * with a nesting level one greater than the surrounding message. 219 * @stable ICU 4.8 220 */ 221 enum UMessagePatternArgType { 222 /** 223 * The argument has no specified type. 224 * @stable ICU 4.8 225 */ 226 UMSGPAT_ARG_TYPE_NONE, 227 /** 228 * The argument has a "simple" type which is provided by the ARG_TYPE part. 229 * An ARG_STYLE part might follow that. 230 * @stable ICU 4.8 231 */ 232 UMSGPAT_ARG_TYPE_SIMPLE, 233 /** 234 * The argument is a ChoiceFormat with one or more 235 * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples. 236 * @stable ICU 4.8 237 */ 238 UMSGPAT_ARG_TYPE_CHOICE, 239 /** 240 * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset 241 * (e.g., offset:1) 242 * and one or more (ARG_SELECTOR [explicit-value] message) tuples. 243 * If the selector has an explicit value (e.g., =2), then 244 * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message. 245 * Otherwise the message immediately follows the ARG_SELECTOR. 246 * @stable ICU 4.8 247 */ 248 UMSGPAT_ARG_TYPE_PLURAL, 249 /** 250 * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs. 251 * @stable ICU 4.8 252 */ 253 UMSGPAT_ARG_TYPE_SELECT, 254 /** 255 * The argument is an ordinal-number PluralFormat 256 * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL. 257 * @stable ICU 50 258 */ 259 UMSGPAT_ARG_TYPE_SELECTORDINAL 260 }; 261 /** 262 * @stable ICU 4.8 263 */ 264 typedef enum UMessagePatternArgType UMessagePatternArgType; 265 266 /** 267 * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE 268 * Returns TRUE if the argument type has a plural style part sequence and semantics, 269 * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL. 270 * @stable ICU 50 271 */ 272 #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \ 273 ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL) 274 275 enum { 276 /** 277 * Return value from MessagePattern.validateArgumentName() for when 278 * the string is a valid "pattern identifier" but not a number. 279 * @stable ICU 4.8 280 */ 281 UMSGPAT_ARG_NAME_NOT_NUMBER=-1, 282 283 /** 284 * Return value from MessagePattern.validateArgumentName() for when 285 * the string is invalid. 286 * It might not be a valid "pattern identifier", 287 * or it have only ASCII digits but there is a leading zero or the number is too large. 288 * @stable ICU 4.8 289 */ 290 UMSGPAT_ARG_NAME_NOT_VALID=-2 291 }; 292 293 /** 294 * Special value that is returned by getNumericValue(Part) when no 295 * numeric value is defined for a part. 296 * @see MessagePattern.getNumericValue() 297 * @stable ICU 4.8 298 */ 299 #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789)) 300 301 U_NAMESPACE_BEGIN 302 303 class MessagePatternDoubleList; 304 class MessagePatternPartsList; 305 306 /** 307 * Parses and represents ICU MessageFormat patterns. 308 * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat. 309 * Used in the implementations of those classes as well as in tools 310 * for message validation, translation and format conversion. 311 * <p> 312 * The parser handles all syntax relevant for identifying message arguments. 313 * This includes "complex" arguments whose style strings contain 314 * nested MessageFormat pattern substrings. 315 * For "simple" arguments (with no nested MessageFormat pattern substrings), 316 * the argument style is not parsed any further. 317 * <p> 318 * The parser handles named and numbered message arguments and allows both in one message. 319 * <p> 320 * Once a pattern has been parsed successfully, iterate through the parsed data 321 * with countParts(), getPart() and related methods. 322 * <p> 323 * The data logically represents a parse tree, but is stored and accessed 324 * as a list of "parts" for fast and simple parsing and to minimize object allocations. 325 * Arguments and nested messages are best handled via recursion. 326 * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns 327 * the index of the corresponding _LIMIT "part". 328 * <p> 329 * List of "parts": 330 * <pre> 331 * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT 332 * argument = noneArg | simpleArg | complexArg 333 * complexArg = choiceArg | pluralArg | selectArg 334 * 335 * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE 336 * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE 337 * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE 338 * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL 339 * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT 340 * 341 * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+ 342 * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+ 343 * selectStyle = (ARG_SELECTOR message)+ 344 * </pre> 345 * <ul> 346 * <li>Literal output text is not represented directly by "parts" but accessed 347 * between parts of a message, from one part's getLimit() to the next part's getIndex(). 348 * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE. 349 * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or 350 * the less-than-or-equal-to sign (U+2264). 351 * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value. 352 * The optional numeric Part between each (ARG_SELECTOR, message) pair 353 * is the value of an explicit-number selector like "=2", 354 * otherwise the selector is a non-numeric identifier. 355 * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle. 356 * </ul> 357 * <p> 358 * This class is not intended for public subclassing. 359 * 360 * @stable ICU 4.8 361 */ 362 class U_COMMON_API MessagePattern : public UObject { 363 public: 364 /** 365 * Constructs an empty MessagePattern with default UMessagePatternApostropheMode. 366 * @param errorCode Standard ICU error code. Its input value must 367 * pass the U_SUCCESS() test, or else the function returns 368 * immediately. Check for U_FAILURE() on output or use with 369 * function chaining. (See User Guide for details.) 370 * @stable ICU 4.8 371 */ 372 MessagePattern(UErrorCode &errorCode); 373 374 /** 375 * Constructs an empty MessagePattern. 376 * @param mode Explicit UMessagePatternApostropheMode. 377 * @param errorCode Standard ICU error code. Its input value must 378 * pass the U_SUCCESS() test, or else the function returns 379 * immediately. Check for U_FAILURE() on output or use with 380 * function chaining. (See User Guide for details.) 381 * @stable ICU 4.8 382 */ 383 MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode); 384 385 /** 386 * Constructs a MessagePattern with default UMessagePatternApostropheMode and 387 * parses the MessageFormat pattern string. 388 * @param pattern a MessageFormat pattern string 389 * @param parseError Struct to receive information on the position 390 * of an error within the pattern. 391 * Can be NULL. 392 * @param errorCode Standard ICU error code. Its input value must 393 * pass the U_SUCCESS() test, or else the function returns 394 * immediately. Check for U_FAILURE() on output or use with 395 * function chaining. (See User Guide for details.) 396 * TODO: turn @throws into UErrorCode specifics? 397 * @throws IllegalArgumentException for syntax errors in the pattern string 398 * @throws IndexOutOfBoundsException if certain limits are exceeded 399 * (e.g., argument number too high, argument name too long, etc.) 400 * @throws NumberFormatException if a number could not be parsed 401 * @stable ICU 4.8 402 */ 403 MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 404 405 /** 406 * Copy constructor. 407 * @param other Object to copy. 408 * @stable ICU 4.8 409 */ 410 MessagePattern(const MessagePattern &other); 411 412 /** 413 * Assignment operator. 414 * @param other Object to copy. 415 * @return *this=other 416 * @stable ICU 4.8 417 */ 418 MessagePattern &operator=(const MessagePattern &other); 419 420 /** 421 * Destructor. 422 * @stable ICU 4.8 423 */ 424 virtual ~MessagePattern(); 425 426 /** 427 * Parses a MessageFormat pattern string. 428 * @param pattern a MessageFormat pattern string 429 * @param parseError Struct to receive information on the position 430 * of an error within the pattern. 431 * Can be NULL. 432 * @param errorCode Standard ICU error code. Its input value must 433 * pass the U_SUCCESS() test, or else the function returns 434 * immediately. Check for U_FAILURE() on output or use with 435 * function chaining. (See User Guide for details.) 436 * @return *this 437 * @throws IllegalArgumentException for syntax errors in the pattern string 438 * @throws IndexOutOfBoundsException if certain limits are exceeded 439 * (e.g., argument number too high, argument name too long, etc.) 440 * @throws NumberFormatException if a number could not be parsed 441 * @stable ICU 4.8 442 */ 443 MessagePattern &parse(const UnicodeString &pattern, 444 UParseError *parseError, UErrorCode &errorCode); 445 446 /** 447 * Parses a ChoiceFormat pattern string. 448 * @param pattern a ChoiceFormat pattern string 449 * @param parseError Struct to receive information on the position 450 * of an error within the pattern. 451 * Can be NULL. 452 * @param errorCode Standard ICU error code. Its input value must 453 * pass the U_SUCCESS() test, or else the function returns 454 * immediately. Check for U_FAILURE() on output or use with 455 * function chaining. (See User Guide for details.) 456 * @return *this 457 * @throws IllegalArgumentException for syntax errors in the pattern string 458 * @throws IndexOutOfBoundsException if certain limits are exceeded 459 * (e.g., argument number too high, argument name too long, etc.) 460 * @throws NumberFormatException if a number could not be parsed 461 * @stable ICU 4.8 462 */ 463 MessagePattern &parseChoiceStyle(const UnicodeString &pattern, 464 UParseError *parseError, UErrorCode &errorCode); 465 466 /** 467 * Parses a PluralFormat pattern string. 468 * @param pattern a PluralFormat pattern string 469 * @param parseError Struct to receive information on the position 470 * of an error within the pattern. 471 * Can be NULL. 472 * @param errorCode Standard ICU error code. Its input value must 473 * pass the U_SUCCESS() test, or else the function returns 474 * immediately. Check for U_FAILURE() on output or use with 475 * function chaining. (See User Guide for details.) 476 * @return *this 477 * @throws IllegalArgumentException for syntax errors in the pattern string 478 * @throws IndexOutOfBoundsException if certain limits are exceeded 479 * (e.g., argument number too high, argument name too long, etc.) 480 * @throws NumberFormatException if a number could not be parsed 481 * @stable ICU 4.8 482 */ 483 MessagePattern &parsePluralStyle(const UnicodeString &pattern, 484 UParseError *parseError, UErrorCode &errorCode); 485 486 /** 487 * Parses a SelectFormat pattern string. 488 * @param pattern a SelectFormat pattern string 489 * @param parseError Struct to receive information on the position 490 * of an error within the pattern. 491 * Can be NULL. 492 * @param errorCode Standard ICU error code. Its input value must 493 * pass the U_SUCCESS() test, or else the function returns 494 * immediately. Check for U_FAILURE() on output or use with 495 * function chaining. (See User Guide for details.) 496 * @return *this 497 * @throws IllegalArgumentException for syntax errors in the pattern string 498 * @throws IndexOutOfBoundsException if certain limits are exceeded 499 * (e.g., argument number too high, argument name too long, etc.) 500 * @throws NumberFormatException if a number could not be parsed 501 * @stable ICU 4.8 502 */ 503 MessagePattern &parseSelectStyle(const UnicodeString &pattern, 504 UParseError *parseError, UErrorCode &errorCode); 505 506 /** 507 * Clears this MessagePattern. 508 * countParts() will return 0. 509 * @stable ICU 4.8 510 */ 511 void clear(); 512 513 /** 514 * Clears this MessagePattern and sets the UMessagePatternApostropheMode. 515 * countParts() will return 0. 516 * @param mode The new UMessagePatternApostropheMode. 517 * @stable ICU 4.8 518 */ clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode)519 void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) { 520 clear(); 521 aposMode=mode; 522 } 523 524 /** 525 * @param other another object to compare with. 526 * @return TRUE if this object is equivalent to the other one. 527 * @stable ICU 4.8 528 */ 529 UBool operator==(const MessagePattern &other) const; 530 531 /** 532 * @param other another object to compare with. 533 * @return FALSE if this object is equivalent to the other one. 534 * @stable ICU 4.8 535 */ 536 inline UBool operator!=(const MessagePattern &other) const { 537 return !operator==(other); 538 } 539 540 /** 541 * @return A hash code for this object. 542 * @stable ICU 4.8 543 */ 544 int32_t hashCode() const; 545 546 /** 547 * @return this instance's UMessagePatternApostropheMode. 548 * @stable ICU 4.8 549 */ getApostropheMode()550 UMessagePatternApostropheMode getApostropheMode() const { 551 return aposMode; 552 } 553 554 // Java has package-private jdkAposMode() here. 555 // In C++, this is declared in the MessageImpl class. 556 557 /** 558 * @return the parsed pattern string (null if none was parsed). 559 * @stable ICU 4.8 560 */ getPatternString()561 const UnicodeString &getPatternString() const { 562 return msg; 563 } 564 565 /** 566 * Does the parsed pattern have named arguments like {first_name}? 567 * @return TRUE if the parsed pattern has at least one named argument. 568 * @stable ICU 4.8 569 */ hasNamedArguments()570 UBool hasNamedArguments() const { 571 return hasArgNames; 572 } 573 574 /** 575 * Does the parsed pattern have numbered arguments like {2}? 576 * @return TRUE if the parsed pattern has at least one numbered argument. 577 * @stable ICU 4.8 578 */ hasNumberedArguments()579 UBool hasNumberedArguments() const { 580 return hasArgNumbers; 581 } 582 583 /** 584 * Validates and parses an argument name or argument number string. 585 * An argument name must be a "pattern identifier", that is, it must contain 586 * no Unicode Pattern_Syntax or Pattern_White_Space characters. 587 * If it only contains ASCII digits, then it must be a small integer with no leading zero. 588 * @param name Input string. 589 * @return >=0 if the name is a valid number, 590 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 591 * ARG_NAME_NOT_VALID (-2) if it is neither. 592 * @stable ICU 4.8 593 */ 594 static int32_t validateArgumentName(const UnicodeString &name); 595 596 /** 597 * Returns a version of the parsed pattern string where each ASCII apostrophe 598 * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax. 599 * <p> 600 * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}." 601 * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}." 602 * @return the deep-auto-quoted version of the parsed pattern string. 603 * @see MessageFormat.autoQuoteApostrophe() 604 * @stable ICU 4.8 605 */ 606 UnicodeString autoQuoteApostropheDeep() const; 607 608 class Part; 609 610 /** 611 * Returns the number of "parts" created by parsing the pattern string. 612 * Returns 0 if no pattern has been parsed or clear() was called. 613 * @return the number of pattern parts. 614 * @stable ICU 4.8 615 */ countParts()616 int32_t countParts() const { 617 return partsLength; 618 } 619 620 /** 621 * Gets the i-th pattern "part". 622 * @param i The index of the Part data. (0..countParts()-1) 623 * @return the i-th pattern "part". 624 * @stable ICU 4.8 625 */ getPart(int32_t i)626 const Part &getPart(int32_t i) const { 627 return parts[i]; 628 } 629 630 /** 631 * Returns the UMessagePatternPartType of the i-th pattern "part". 632 * Convenience method for getPart(i).getType(). 633 * @param i The index of the Part data. (0..countParts()-1) 634 * @return The UMessagePatternPartType of the i-th Part. 635 * @stable ICU 4.8 636 */ getPartType(int32_t i)637 UMessagePatternPartType getPartType(int32_t i) const { 638 return getPart(i).type; 639 } 640 641 /** 642 * Returns the pattern index of the specified pattern "part". 643 * Convenience method for getPart(partIndex).getIndex(). 644 * @param partIndex The index of the Part data. (0..countParts()-1) 645 * @return The pattern index of this Part. 646 * @stable ICU 4.8 647 */ getPatternIndex(int32_t partIndex)648 int32_t getPatternIndex(int32_t partIndex) const { 649 return getPart(partIndex).index; 650 } 651 652 /** 653 * Returns the substring of the pattern string indicated by the Part. 654 * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()). 655 * @param part a part of this MessagePattern. 656 * @return the substring associated with part. 657 * @stable ICU 4.8 658 */ getSubstring(const Part & part)659 UnicodeString getSubstring(const Part &part) const { 660 return msg.tempSubString(part.index, part.length); 661 } 662 663 /** 664 * Compares the part's substring with the input string s. 665 * @param part a part of this MessagePattern. 666 * @param s a string. 667 * @return TRUE if getSubstring(part).equals(s). 668 * @stable ICU 4.8 669 */ partSubstringMatches(const Part & part,const UnicodeString & s)670 UBool partSubstringMatches(const Part &part, const UnicodeString &s) const { 671 return 0==msg.compare(part.index, part.length, s); 672 } 673 674 /** 675 * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE. 676 * @param part a part of this MessagePattern. 677 * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part. 678 * @stable ICU 4.8 679 */ 680 double getNumericValue(const Part &part) const; 681 682 /** 683 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 684 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 685 * @return the "offset:" value. 686 * @stable ICU 4.8 687 */ 688 double getPluralOffset(int32_t pluralStart) const; 689 690 /** 691 * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start. 692 * @param start The index of some Part data (0..countParts()-1); 693 * this Part should be of Type ARG_START or MSG_START. 694 * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level, 695 * or start itself if getPartType(msgStart)!=ARG|MSG_START. 696 * @stable ICU 4.8 697 */ getLimitPartIndex(int32_t start)698 int32_t getLimitPartIndex(int32_t start) const { 699 int32_t limit=getPart(start).limitPartIndex; 700 if(limit<start) { 701 return start; 702 } 703 return limit; 704 } 705 706 /** 707 * A message pattern "part", representing a pattern parsing event. 708 * There is a part for the start and end of a message or argument, 709 * for quoting and escaping of and with ASCII apostrophes, 710 * and for syntax elements of "complex" arguments. 711 * @stable ICU 4.8 712 */ 713 class Part : public UMemory { 714 public: 715 /** 716 * Default constructor, do not use. 717 * @internal 718 */ Part()719 Part() {} 720 721 /** 722 * Returns the type of this part. 723 * @return the part type. 724 * @stable ICU 4.8 725 */ getType()726 UMessagePatternPartType getType() const { 727 return type; 728 } 729 730 /** 731 * Returns the pattern string index associated with this Part. 732 * @return this part's pattern string index. 733 * @stable ICU 4.8 734 */ getIndex()735 int32_t getIndex() const { 736 return index; 737 } 738 739 /** 740 * Returns the length of the pattern substring associated with this Part. 741 * This is 0 for some parts. 742 * @return this part's pattern substring length. 743 * @stable ICU 4.8 744 */ getLength()745 int32_t getLength() const { 746 return length; 747 } 748 749 /** 750 * Returns the pattern string limit (exclusive-end) index associated with this Part. 751 * Convenience method for getIndex()+getLength(). 752 * @return this part's pattern string limit index, same as getIndex()+getLength(). 753 * @stable ICU 4.8 754 */ getLimit()755 int32_t getLimit() const { 756 return index+length; 757 } 758 759 /** 760 * Returns a value associated with this part. 761 * See the documentation of each part type for details. 762 * @return the part value. 763 * @stable ICU 4.8 764 */ getValue()765 int32_t getValue() const { 766 return value; 767 } 768 769 /** 770 * Returns the argument type if this part is of type ARG_START or ARG_LIMIT, 771 * otherwise UMSGPAT_ARG_TYPE_NONE. 772 * @return the argument type for this part. 773 * @stable ICU 4.8 774 */ getArgType()775 UMessagePatternArgType getArgType() const { 776 UMessagePatternPartType msgType=getType(); 777 if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) { 778 return (UMessagePatternArgType)value; 779 } else { 780 return UMSGPAT_ARG_TYPE_NONE; 781 } 782 } 783 784 /** 785 * Indicates whether the Part type has a numeric value. 786 * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue(). 787 * @param type The Part type to be tested. 788 * @return TRUE if the Part type has a numeric value. 789 * @stable ICU 4.8 790 */ hasNumericValue(UMessagePatternPartType type)791 static UBool hasNumericValue(UMessagePatternPartType type) { 792 return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE; 793 } 794 795 /** 796 * @param other another object to compare with. 797 * @return TRUE if this object is equivalent to the other one. 798 * @stable ICU 4.8 799 */ 800 UBool operator==(const Part &other) const; 801 802 /** 803 * @param other another object to compare with. 804 * @return FALSE if this object is equivalent to the other one. 805 * @stable ICU 4.8 806 */ 807 inline UBool operator!=(const Part &other) const { 808 return !operator==(other); 809 } 810 811 /** 812 * @return A hash code for this object. 813 * @stable ICU 4.8 814 */ hashCode()815 int32_t hashCode() const { 816 return ((type*37+index)*37+length)*37+value; 817 } 818 819 private: 820 friend class MessagePattern; 821 822 static const int32_t MAX_LENGTH=0xffff; 823 static const int32_t MAX_VALUE=0x7fff; 824 825 // Some fields are not final because they are modified during pattern parsing. 826 // After pattern parsing, the parts are effectively immutable. 827 UMessagePatternPartType type; 828 int32_t index; 829 uint16_t length; 830 int16_t value; 831 int32_t limitPartIndex; 832 }; 833 834 private: 835 void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode); 836 837 void postParse(); 838 839 int32_t parseMessage(int32_t index, int32_t msgStartLength, 840 int32_t nestingLevel, UMessagePatternArgType parentType, 841 UParseError *parseError, UErrorCode &errorCode); 842 843 int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 844 UParseError *parseError, UErrorCode &errorCode); 845 846 int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode); 847 848 int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel, 849 UParseError *parseError, UErrorCode &errorCode); 850 851 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel, 852 UParseError *parseError, UErrorCode &errorCode); 853 854 /** 855 * Validates and parses an argument name or argument number string. 856 * This internal method assumes that the input substring is a "pattern identifier". 857 * @return >=0 if the name is a valid number, 858 * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits, 859 * ARG_NAME_NOT_VALID (-2) if it is neither. 860 * @see #validateArgumentName(String) 861 */ 862 static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit); 863 parseArgNumber(int32_t start,int32_t limit)864 int32_t parseArgNumber(int32_t start, int32_t limit) { 865 return parseArgNumber(msg, start, limit); 866 } 867 868 /** 869 * Parses a number from the specified message substring. 870 * @param start start index into the message string 871 * @param limit limit index into the message string, must be start<limit 872 * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat) 873 * @param parseError 874 * @param errorCode 875 */ 876 void parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 877 UParseError *parseError, UErrorCode &errorCode); 878 879 // Java has package-private appendReducedApostrophes() here. 880 // In C++, this is declared in the MessageImpl class. 881 882 int32_t skipWhiteSpace(int32_t index); 883 884 int32_t skipIdentifier(int32_t index); 885 886 /** 887 * Skips a sequence of characters that could occur in a double value. 888 * Does not fully parse or validate the value. 889 */ 890 int32_t skipDouble(int32_t index); 891 892 static UBool isArgTypeChar(UChar32 c); 893 894 UBool isChoice(int32_t index); 895 896 UBool isPlural(int32_t index); 897 898 UBool isSelect(int32_t index); 899 900 UBool isOrdinal(int32_t index); 901 902 /** 903 * @return TRUE if we are inside a MessageFormat (sub-)pattern, 904 * as opposed to inside a top-level choice/plural/select pattern. 905 */ 906 UBool inMessageFormatPattern(int32_t nestingLevel); 907 908 /** 909 * @return TRUE if we are in a MessageFormat sub-pattern 910 * of a top-level ChoiceFormat pattern. 911 */ 912 UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType); 913 914 void addPart(UMessagePatternPartType type, int32_t index, int32_t length, 915 int32_t value, UErrorCode &errorCode); 916 917 void addLimitPart(int32_t start, 918 UMessagePatternPartType type, int32_t index, int32_t length, 919 int32_t value, UErrorCode &errorCode); 920 921 void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode); 922 923 void setParseError(UParseError *parseError, int32_t index); 924 925 UBool init(UErrorCode &errorCode); 926 UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode); 927 928 UMessagePatternApostropheMode aposMode; 929 UnicodeString msg; 930 // ArrayList<Part> parts=new ArrayList<Part>(); 931 MessagePatternPartsList *partsList; 932 Part *parts; 933 int32_t partsLength; 934 // ArrayList<Double> numericValues; 935 MessagePatternDoubleList *numericValuesList; 936 double *numericValues; 937 int32_t numericValuesLength; 938 UBool hasArgNames; 939 UBool hasArgNumbers; 940 UBool needsAutoQuoting; 941 }; 942 943 U_NAMESPACE_END 944 945 #endif // !UCONFIG_NO_FORMATTING 946 947 #endif /* U_SHOW_CPLUSPLUS_API */ 948 949 #endif // __MESSAGEPATTERN_H__ 950