1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011-2012, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: messagepattern.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2011mar14 14 * created by: Markus W. Scherer 15 */ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_FORMATTING 20 21 #include "unicode/messagepattern.h" 22 #include "unicode/unistr.h" 23 #include "unicode/utf16.h" 24 #include "cmemory.h" 25 #include "cstring.h" 26 #include "messageimpl.h" 27 #include "patternprops.h" 28 #include "putilimp.h" 29 #include "uassert.h" 30 31 U_NAMESPACE_BEGIN 32 33 // Unicode character/code point constants ---------------------------------- *** 34 35 static const UChar u_pound=0x23; 36 static const UChar u_apos=0x27; 37 static const UChar u_plus=0x2B; 38 static const UChar u_comma=0x2C; 39 static const UChar u_minus=0x2D; 40 static const UChar u_dot=0x2E; 41 static const UChar u_colon=0x3A; 42 static const UChar u_lessThan=0x3C; 43 static const UChar u_equal=0x3D; 44 static const UChar u_A=0x41; 45 static const UChar u_C=0x43; 46 static const UChar u_D=0x44; 47 static const UChar u_E=0x45; 48 static const UChar u_H=0x48; 49 static const UChar u_I=0x49; 50 static const UChar u_L=0x4C; 51 static const UChar u_N=0x4E; 52 static const UChar u_O=0x4F; 53 static const UChar u_P=0x50; 54 static const UChar u_R=0x52; 55 static const UChar u_S=0x53; 56 static const UChar u_T=0x54; 57 static const UChar u_U=0x55; 58 static const UChar u_Z=0x5A; 59 static const UChar u_a=0x61; 60 static const UChar u_c=0x63; 61 static const UChar u_d=0x64; 62 static const UChar u_e=0x65; 63 static const UChar u_f=0x66; 64 static const UChar u_h=0x68; 65 static const UChar u_i=0x69; 66 static const UChar u_l=0x6C; 67 static const UChar u_n=0x6E; 68 static const UChar u_o=0x6F; 69 static const UChar u_p=0x70; 70 static const UChar u_r=0x72; 71 static const UChar u_s=0x73; 72 static const UChar u_t=0x74; 73 static const UChar u_u=0x75; 74 static const UChar u_z=0x7A; 75 static const UChar u_leftCurlyBrace=0x7B; 76 static const UChar u_pipe=0x7C; 77 static const UChar u_rightCurlyBrace=0x7D; 78 static const UChar u_lessOrEqual=0x2264; // U+2264 is <= 79 80 static const UChar kOffsetColon[]={ // "offset:" 81 u_o, u_f, u_f, u_s, u_e, u_t, u_colon 82 }; 83 84 static const UChar kOther[]={ // "other" 85 u_o, u_t, u_h, u_e, u_r 86 }; 87 88 // MessagePatternList ------------------------------------------------------ *** 89 90 template<typename T, int32_t stackCapacity> 91 class MessagePatternList : public UMemory { 92 public: MessagePatternList()93 MessagePatternList() {} 94 void copyFrom(const MessagePatternList<T, stackCapacity> &other, 95 int32_t length, 96 UErrorCode &errorCode); 97 UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode); equals(const MessagePatternList<T,stackCapacity> & other,int32_t length) const98 UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const { 99 for(int32_t i=0; i<length; ++i) { 100 if(a[i]!=other.a[i]) { return FALSE; } 101 } 102 return TRUE; 103 } 104 105 MaybeStackArray<T, stackCapacity> a; 106 }; 107 108 template<typename T, int32_t stackCapacity> 109 void copyFrom(const MessagePatternList<T,stackCapacity> & other,int32_t length,UErrorCode & errorCode)110 MessagePatternList<T, stackCapacity>::copyFrom( 111 const MessagePatternList<T, stackCapacity> &other, 112 int32_t length, 113 UErrorCode &errorCode) { 114 if(U_SUCCESS(errorCode) && length>0) { 115 if(length>a.getCapacity() && NULL==a.resize(length)) { 116 errorCode=U_MEMORY_ALLOCATION_ERROR; 117 return; 118 } 119 uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T)); 120 } 121 } 122 123 template<typename T, int32_t stackCapacity> 124 UBool ensureCapacityForOneMore(int32_t oldLength,UErrorCode & errorCode)125 MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) { 126 if(U_FAILURE(errorCode)) { 127 return FALSE; 128 } 129 if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=NULL) { 130 return TRUE; 131 } 132 errorCode=U_MEMORY_ALLOCATION_ERROR; 133 return FALSE; 134 } 135 136 // MessagePatternList specializations -------------------------------------- *** 137 138 class MessagePatternDoubleList : public MessagePatternList<double, 8> { 139 }; 140 141 class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> { 142 }; 143 144 // MessagePattern constructors etc. ---------------------------------------- *** 145 MessagePattern(UErrorCode & errorCode)146 MessagePattern::MessagePattern(UErrorCode &errorCode) 147 : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), 148 partsList(NULL), parts(NULL), partsLength(0), 149 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), 150 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { 151 init(errorCode); 152 } 153 MessagePattern(UMessagePatternApostropheMode mode,UErrorCode & errorCode)154 MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode) 155 : aposMode(mode), 156 partsList(NULL), parts(NULL), partsLength(0), 157 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), 158 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { 159 init(errorCode); 160 } 161 MessagePattern(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)162 MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) 163 : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE), 164 partsList(NULL), parts(NULL), partsLength(0), 165 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), 166 hasArgNames(FALSE), hasArgNumbers(FALSE), needsAutoQuoting(FALSE) { 167 if(init(errorCode)) { 168 parse(pattern, parseError, errorCode); 169 } 170 } 171 172 UBool init(UErrorCode & errorCode)173 MessagePattern::init(UErrorCode &errorCode) { 174 if(U_FAILURE(errorCode)) { 175 return FALSE; 176 } 177 partsList=new MessagePatternPartsList(); 178 if(partsList==NULL) { 179 errorCode=U_MEMORY_ALLOCATION_ERROR; 180 return FALSE; 181 } 182 parts=partsList->a.getAlias(); 183 return TRUE; 184 } 185 MessagePattern(const MessagePattern & other)186 MessagePattern::MessagePattern(const MessagePattern &other) 187 : UObject(other), aposMode(other.aposMode), msg(other.msg), 188 partsList(NULL), parts(NULL), partsLength(0), 189 numericValuesList(NULL), numericValues(NULL), numericValuesLength(0), 190 hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers), 191 needsAutoQuoting(other.needsAutoQuoting) { 192 UErrorCode errorCode=U_ZERO_ERROR; 193 if(!copyStorage(other, errorCode)) { 194 clear(); 195 } 196 } 197 198 MessagePattern & operator =(const MessagePattern & other)199 MessagePattern::operator=(const MessagePattern &other) { 200 if(this==&other) { 201 return *this; 202 } 203 aposMode=other.aposMode; 204 msg=other.msg; 205 hasArgNames=other.hasArgNames; 206 hasArgNumbers=other.hasArgNumbers; 207 needsAutoQuoting=other.needsAutoQuoting; 208 UErrorCode errorCode=U_ZERO_ERROR; 209 if(!copyStorage(other, errorCode)) { 210 clear(); 211 } 212 return *this; 213 } 214 215 UBool copyStorage(const MessagePattern & other,UErrorCode & errorCode)216 MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) { 217 if(U_FAILURE(errorCode)) { 218 return FALSE; 219 } 220 parts=NULL; 221 partsLength=0; 222 numericValues=NULL; 223 numericValuesLength=0; 224 if(partsList==NULL) { 225 partsList=new MessagePatternPartsList(); 226 if(partsList==NULL) { 227 errorCode=U_MEMORY_ALLOCATION_ERROR; 228 return FALSE; 229 } 230 parts=partsList->a.getAlias(); 231 } 232 if(other.partsLength>0) { 233 partsList->copyFrom(*other.partsList, other.partsLength, errorCode); 234 if(U_FAILURE(errorCode)) { 235 return FALSE; 236 } 237 parts=partsList->a.getAlias(); 238 partsLength=other.partsLength; 239 } 240 if(other.numericValuesLength>0) { 241 if(numericValuesList==NULL) { 242 numericValuesList=new MessagePatternDoubleList(); 243 if(numericValuesList==NULL) { 244 errorCode=U_MEMORY_ALLOCATION_ERROR; 245 return FALSE; 246 } 247 numericValues=numericValuesList->a.getAlias(); 248 } 249 numericValuesList->copyFrom( 250 *other.numericValuesList, other.numericValuesLength, errorCode); 251 if(U_FAILURE(errorCode)) { 252 return FALSE; 253 } 254 numericValues=numericValuesList->a.getAlias(); 255 numericValuesLength=other.numericValuesLength; 256 } 257 return TRUE; 258 } 259 ~MessagePattern()260 MessagePattern::~MessagePattern() { 261 delete partsList; 262 delete numericValuesList; 263 } 264 265 // MessagePattern API ------------------------------------------------------ *** 266 267 MessagePattern & parse(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)268 MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { 269 preParse(pattern, parseError, errorCode); 270 parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode); 271 postParse(); 272 return *this; 273 } 274 275 MessagePattern & parseChoiceStyle(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)276 MessagePattern::parseChoiceStyle(const UnicodeString &pattern, 277 UParseError *parseError, UErrorCode &errorCode) { 278 preParse(pattern, parseError, errorCode); 279 parseChoiceStyle(0, 0, parseError, errorCode); 280 postParse(); 281 return *this; 282 } 283 284 MessagePattern & parsePluralStyle(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)285 MessagePattern::parsePluralStyle(const UnicodeString &pattern, 286 UParseError *parseError, UErrorCode &errorCode) { 287 preParse(pattern, parseError, errorCode); 288 parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode); 289 postParse(); 290 return *this; 291 } 292 293 MessagePattern & parseSelectStyle(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)294 MessagePattern::parseSelectStyle(const UnicodeString &pattern, 295 UParseError *parseError, UErrorCode &errorCode) { 296 preParse(pattern, parseError, errorCode); 297 parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode); 298 postParse(); 299 return *this; 300 } 301 302 void clear()303 MessagePattern::clear() { 304 // Mostly the same as preParse(). 305 msg.remove(); 306 hasArgNames=hasArgNumbers=FALSE; 307 needsAutoQuoting=FALSE; 308 partsLength=0; 309 numericValuesLength=0; 310 } 311 312 UBool operator ==(const MessagePattern & other) const313 MessagePattern::operator==(const MessagePattern &other) const { 314 if(this==&other) { 315 return TRUE; 316 } 317 return 318 aposMode==other.aposMode && 319 msg==other.msg && 320 // parts.equals(o.parts) 321 partsLength==other.partsLength && 322 (partsLength==0 || partsList->equals(*other.partsList, partsLength)); 323 // No need to compare numericValues if msg and parts are the same. 324 } 325 326 int32_t hashCode() const327 MessagePattern::hashCode() const { 328 int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength; 329 for(int32_t i=0; i<partsLength; ++i) { 330 hash=hash*37+parts[i].hashCode(); 331 } 332 return hash; 333 } 334 335 int32_t validateArgumentName(const UnicodeString & name)336 MessagePattern::validateArgumentName(const UnicodeString &name) { 337 if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) { 338 return UMSGPAT_ARG_NAME_NOT_VALID; 339 } 340 return parseArgNumber(name, 0, name.length()); 341 } 342 343 UnicodeString autoQuoteApostropheDeep() const344 MessagePattern::autoQuoteApostropheDeep() const { 345 if(!needsAutoQuoting) { 346 return msg; 347 } 348 UnicodeString modified(msg); 349 // Iterate backward so that the insertion indexes do not change. 350 int32_t count=countParts(); 351 for(int32_t i=count; i>0;) { 352 const Part &part=getPart(--i); 353 if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) { 354 modified.insert(part.index, (UChar)part.value); 355 } 356 } 357 return modified; 358 } 359 360 double getNumericValue(const Part & part) const361 MessagePattern::getNumericValue(const Part &part) const { 362 UMessagePatternPartType type=part.type; 363 if(type==UMSGPAT_PART_TYPE_ARG_INT) { 364 return part.value; 365 } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) { 366 return numericValues[part.value]; 367 } else { 368 return UMSGPAT_NO_NUMERIC_VALUE; 369 } 370 } 371 372 /** 373 * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified. 374 * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1) 375 * @return the "offset:" value. 376 * @draft ICU 4.8 377 */ 378 double getPluralOffset(int32_t pluralStart) const379 MessagePattern::getPluralOffset(int32_t pluralStart) const { 380 const Part &part=getPart(pluralStart); 381 if(Part::hasNumericValue(part.type)) { 382 return getNumericValue(part); 383 } else { 384 return 0; 385 } 386 } 387 388 // MessagePattern::Part ---------------------------------------------------- *** 389 390 UBool operator ==(const Part & other) const391 MessagePattern::Part::operator==(const Part &other) const { 392 if(this==&other) { 393 return TRUE; 394 } 395 return 396 type==other.type && 397 index==other.index && 398 length==other.length && 399 value==other.value && 400 limitPartIndex==other.limitPartIndex; 401 } 402 403 // MessagePattern parser --------------------------------------------------- *** 404 405 void preParse(const UnicodeString & pattern,UParseError * parseError,UErrorCode & errorCode)406 MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) { 407 if(U_FAILURE(errorCode)) { 408 return; 409 } 410 if(parseError!=NULL) { 411 parseError->line=0; 412 parseError->offset=0; 413 parseError->preContext[0]=0; 414 parseError->postContext[0]=0; 415 } 416 msg=pattern; 417 hasArgNames=hasArgNumbers=FALSE; 418 needsAutoQuoting=FALSE; 419 partsLength=0; 420 numericValuesLength=0; 421 } 422 423 void postParse()424 MessagePattern::postParse() { 425 if(partsList!=NULL) { 426 parts=partsList->a.getAlias(); 427 } 428 if(numericValuesList!=NULL) { 429 numericValues=numericValuesList->a.getAlias(); 430 } 431 } 432 433 int32_t parseMessage(int32_t index,int32_t msgStartLength,int32_t nestingLevel,UMessagePatternArgType parentType,UParseError * parseError,UErrorCode & errorCode)434 MessagePattern::parseMessage(int32_t index, int32_t msgStartLength, 435 int32_t nestingLevel, UMessagePatternArgType parentType, 436 UParseError *parseError, UErrorCode &errorCode) { 437 if(U_FAILURE(errorCode)) { 438 return 0; 439 } 440 if(nestingLevel>Part::MAX_VALUE) { 441 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 442 return 0; 443 } 444 int32_t msgStart=partsLength; 445 addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode); 446 index+=msgStartLength; 447 for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check 448 if(U_FAILURE(errorCode)) { 449 return 0; 450 } 451 if(index>=msg.length()) { 452 break; 453 } 454 UChar c=msg.charAt(index++); 455 if(c==u_apos) { 456 if(index==msg.length()) { 457 // The apostrophe is the last character in the pattern. 458 // Add a Part for auto-quoting. 459 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, 460 u_apos, errorCode); // value=char to be inserted 461 needsAutoQuoting=TRUE; 462 } else { 463 c=msg.charAt(index); 464 if(c==u_apos) { 465 // double apostrophe, skip the second one 466 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); 467 } else if( 468 aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED || 469 c==u_leftCurlyBrace || c==u_rightCurlyBrace || 470 (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) || 471 (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) 472 ) { 473 // skip the quote-starting apostrophe 474 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode); 475 // find the end of the quoted literal text 476 for(;;) { 477 index=msg.indexOf(u_apos, index+1); 478 if(index>=0) { 479 if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) { 480 // double apostrophe inside quoted literal text 481 // still encodes a single apostrophe, skip the second one 482 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode); 483 } else { 484 // skip the quote-ending apostrophe 485 addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode); 486 break; 487 } 488 } else { 489 // The quoted text reaches to the end of the of the message. 490 index=msg.length(); 491 // Add a Part for auto-quoting. 492 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, 493 u_apos, errorCode); // value=char to be inserted 494 needsAutoQuoting=TRUE; 495 break; 496 } 497 } 498 } else { 499 // Interpret the apostrophe as literal text. 500 // Add a Part for auto-quoting. 501 addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0, 502 u_apos, errorCode); // value=char to be inserted 503 needsAutoQuoting=TRUE; 504 } 505 } 506 } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) { 507 // The unquoted # in a plural message fragment will be replaced 508 // with the (number-offset). 509 addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode); 510 } else if(c==u_leftCurlyBrace) { 511 index=parseArg(index-1, 1, nestingLevel, parseError, errorCode); 512 } else if((nestingLevel>0 && c==u_rightCurlyBrace) || 513 (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) { 514 // Finish the message before the terminator. 515 // In a choice style, report the "}" substring only for the following ARG_LIMIT, 516 // not for this MSG_LIMIT. 517 int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1; 518 addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength, 519 nestingLevel, errorCode); 520 if(parentType==UMSGPAT_ARG_TYPE_CHOICE) { 521 // Let the choice style parser see the '}' or '|'. 522 return index-1; 523 } else { 524 // continue parsing after the '}' 525 return index; 526 } 527 } // else: c is part of literal text 528 } 529 if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) { 530 setParseError(parseError, 0); // Unmatched '{' braces in message. 531 errorCode=U_UNMATCHED_BRACES; 532 return 0; 533 } 534 addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode); 535 return index; 536 } 537 538 int32_t parseArg(int32_t index,int32_t argStartLength,int32_t nestingLevel,UParseError * parseError,UErrorCode & errorCode)539 MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel, 540 UParseError *parseError, UErrorCode &errorCode) { 541 int32_t argStart=partsLength; 542 UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE; 543 addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode); 544 if(U_FAILURE(errorCode)) { 545 return 0; 546 } 547 int32_t nameIndex=index=skipWhiteSpace(index+argStartLength); 548 if(index==msg.length()) { 549 setParseError(parseError, 0); // Unmatched '{' braces in message. 550 errorCode=U_UNMATCHED_BRACES; 551 return 0; 552 } 553 // parse argument name or number 554 index=skipIdentifier(index); 555 int32_t number=parseArgNumber(nameIndex, index); 556 if(number>=0) { 557 int32_t length=index-nameIndex; 558 if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) { 559 setParseError(parseError, nameIndex); // Argument number too large. 560 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 561 return 0; 562 } 563 hasArgNumbers=TRUE; 564 addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode); 565 } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) { 566 int32_t length=index-nameIndex; 567 if(length>Part::MAX_LENGTH) { 568 setParseError(parseError, nameIndex); // Argument name too long. 569 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 570 return 0; 571 } 572 hasArgNames=TRUE; 573 addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode); 574 } else { // number<-1 (ARG_NAME_NOT_VALID) 575 setParseError(parseError, nameIndex); // Bad argument syntax. 576 errorCode=U_PATTERN_SYNTAX_ERROR; 577 return 0; 578 } 579 index=skipWhiteSpace(index); 580 if(index==msg.length()) { 581 setParseError(parseError, 0); // Unmatched '{' braces in message. 582 errorCode=U_UNMATCHED_BRACES; 583 return 0; 584 } 585 UChar c=msg.charAt(index); 586 if(c==u_rightCurlyBrace) { 587 // all done 588 } else if(c!=u_comma) { 589 setParseError(parseError, nameIndex); // Bad argument syntax. 590 errorCode=U_PATTERN_SYNTAX_ERROR; 591 return 0; 592 } else /* ',' */ { 593 // parse argument type: case-sensitive a-zA-Z 594 int32_t typeIndex=index=skipWhiteSpace(index+1); 595 while(index<msg.length() && isArgTypeChar(msg.charAt(index))) { 596 ++index; 597 } 598 int32_t length=index-typeIndex; 599 index=skipWhiteSpace(index); 600 if(index==msg.length()) { 601 setParseError(parseError, 0); // Unmatched '{' braces in message. 602 errorCode=U_UNMATCHED_BRACES; 603 return 0; 604 } 605 if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) { 606 setParseError(parseError, nameIndex); // Bad argument syntax. 607 errorCode=U_PATTERN_SYNTAX_ERROR; 608 return 0; 609 } 610 if(length>Part::MAX_LENGTH) { 611 setParseError(parseError, nameIndex); // Argument type name too long. 612 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 613 return 0; 614 } 615 argType=UMSGPAT_ARG_TYPE_SIMPLE; 616 if(length==6) { 617 // case-insensitive comparisons for complex-type names 618 if(isChoice(typeIndex)) { 619 argType=UMSGPAT_ARG_TYPE_CHOICE; 620 } else if(isPlural(typeIndex)) { 621 argType=UMSGPAT_ARG_TYPE_PLURAL; 622 } else if(isSelect(typeIndex)) { 623 argType=UMSGPAT_ARG_TYPE_SELECT; 624 } 625 } else if(length==13) { 626 if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) { 627 argType=UMSGPAT_ARG_TYPE_SELECTORDINAL; 628 } 629 } 630 // change the ARG_START type from NONE to argType 631 partsList->a[argStart].value=(int16_t)argType; 632 if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { 633 addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode); 634 } 635 // look for an argument style (pattern) 636 if(c==u_rightCurlyBrace) { 637 if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) { 638 setParseError(parseError, nameIndex); // No style field for complex argument. 639 errorCode=U_PATTERN_SYNTAX_ERROR; 640 return 0; 641 } 642 } else /* ',' */ { 643 ++index; 644 if(argType==UMSGPAT_ARG_TYPE_SIMPLE) { 645 index=parseSimpleStyle(index, parseError, errorCode); 646 } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) { 647 index=parseChoiceStyle(index, nestingLevel, parseError, errorCode); 648 } else { 649 index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode); 650 } 651 } 652 } 653 // Argument parsing stopped on the '}'. 654 addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode); 655 return index+1; 656 } 657 658 int32_t parseSimpleStyle(int32_t index,UParseError * parseError,UErrorCode & errorCode)659 MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) { 660 if(U_FAILURE(errorCode)) { 661 return 0; 662 } 663 int32_t start=index; 664 int32_t nestedBraces=0; 665 while(index<msg.length()) { 666 UChar c=msg.charAt(index++); 667 if(c==u_apos) { 668 // Treat apostrophe as quoting but include it in the style part. 669 // Find the end of the quoted literal text. 670 index=msg.indexOf(u_apos, index); 671 if(index<0) { 672 // Quoted literal argument style text reaches to the end of the message. 673 setParseError(parseError, start); 674 errorCode=U_PATTERN_SYNTAX_ERROR; 675 return 0; 676 } 677 // skip the quote-ending apostrophe 678 ++index; 679 } else if(c==u_leftCurlyBrace) { 680 ++nestedBraces; 681 } else if(c==u_rightCurlyBrace) { 682 if(nestedBraces>0) { 683 --nestedBraces; 684 } else { 685 int32_t length=--index-start; 686 if(length>Part::MAX_LENGTH) { 687 setParseError(parseError, start); // Argument style text too long. 688 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 689 return 0; 690 } 691 addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode); 692 return index; 693 } 694 } // c is part of literal text 695 } 696 setParseError(parseError, 0); // Unmatched '{' braces in message. 697 errorCode=U_UNMATCHED_BRACES; 698 return 0; 699 } 700 701 int32_t parseChoiceStyle(int32_t index,int32_t nestingLevel,UParseError * parseError,UErrorCode & errorCode)702 MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel, 703 UParseError *parseError, UErrorCode &errorCode) { 704 if(U_FAILURE(errorCode)) { 705 return 0; 706 } 707 int32_t start=index; 708 index=skipWhiteSpace(index); 709 if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) { 710 setParseError(parseError, 0); // Missing choice argument pattern. 711 errorCode=U_PATTERN_SYNTAX_ERROR; 712 return 0; 713 } 714 for(;;) { 715 // The choice argument style contains |-separated (number, separator, message) triples. 716 // Parse the number. 717 int32_t numberIndex=index; 718 index=skipDouble(index); 719 int32_t length=index-numberIndex; 720 if(length==0) { 721 setParseError(parseError, start); // Bad choice pattern syntax. 722 errorCode=U_PATTERN_SYNTAX_ERROR; 723 return 0; 724 } 725 if(length>Part::MAX_LENGTH) { 726 setParseError(parseError, numberIndex); // Choice number too long. 727 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 728 return 0; 729 } 730 parseDouble(numberIndex, index, TRUE, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE 731 if(U_FAILURE(errorCode)) { 732 return 0; 733 } 734 // Parse the separator. 735 index=skipWhiteSpace(index); 736 if(index==msg.length()) { 737 setParseError(parseError, start); // Bad choice pattern syntax. 738 errorCode=U_PATTERN_SYNTAX_ERROR; 739 return 0; 740 } 741 UChar c=msg.charAt(index); 742 if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <= 743 setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c. 744 errorCode=U_PATTERN_SYNTAX_ERROR; 745 return 0; 746 } 747 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode); 748 // Parse the message fragment. 749 index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode); 750 if(U_FAILURE(errorCode)) { 751 return 0; 752 } 753 // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length(). 754 if(index==msg.length()) { 755 return index; 756 } 757 if(msg.charAt(index)==u_rightCurlyBrace) { 758 if(!inMessageFormatPattern(nestingLevel)) { 759 setParseError(parseError, start); // Bad choice pattern syntax. 760 errorCode=U_PATTERN_SYNTAX_ERROR; 761 return 0; 762 } 763 return index; 764 } // else the terminator is '|' 765 index=skipWhiteSpace(index+1); 766 } 767 } 768 769 int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType,int32_t index,int32_t nestingLevel,UParseError * parseError,UErrorCode & errorCode)770 MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType, 771 int32_t index, int32_t nestingLevel, 772 UParseError *parseError, UErrorCode &errorCode) { 773 if(U_FAILURE(errorCode)) { 774 return 0; 775 } 776 int32_t start=index; 777 UBool isEmpty=TRUE; 778 UBool hasOther=FALSE; 779 for(;;) { 780 // First, collect the selector looking for a small set of terminators. 781 // It would be a little faster to consider the syntax of each possible 782 // token right here, but that makes the code too complicated. 783 index=skipWhiteSpace(index); 784 UBool eos=index==msg.length(); 785 if(eos || msg.charAt(index)==u_rightCurlyBrace) { 786 if(eos==inMessageFormatPattern(nestingLevel)) { 787 setParseError(parseError, start); // Bad plural/select pattern syntax. 788 errorCode=U_PATTERN_SYNTAX_ERROR; 789 return 0; 790 } 791 if(!hasOther) { 792 setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern. 793 errorCode=U_DEFAULT_KEYWORD_MISSING; 794 return 0; 795 } 796 return index; 797 } 798 int32_t selectorIndex=index; 799 if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) { 800 // explicit-value plural selector: =double 801 index=skipDouble(index+1); 802 int32_t length=index-selectorIndex; 803 if(length==1) { 804 setParseError(parseError, start); // Bad plural/select pattern syntax. 805 errorCode=U_PATTERN_SYNTAX_ERROR; 806 return 0; 807 } 808 if(length>Part::MAX_LENGTH) { 809 setParseError(parseError, selectorIndex); // Argument selector too long. 810 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 811 return 0; 812 } 813 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); 814 parseDouble(selectorIndex+1, index, FALSE, 815 parseError, errorCode); // adds ARG_INT or ARG_DOUBLE 816 } else { 817 index=skipIdentifier(index); 818 int32_t length=index-selectorIndex; 819 if(length==0) { 820 setParseError(parseError, start); // Bad plural/select pattern syntax. 821 errorCode=U_PATTERN_SYNTAX_ERROR; 822 return 0; 823 } 824 // Note: The ':' in "offset:" is just beyond the skipIdentifier() range. 825 if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() && 826 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7) 827 ) { 828 // plural offset, not a selector 829 if(!isEmpty) { 830 // Plural argument 'offset:' (if present) must precede key-message pairs. 831 setParseError(parseError, start); 832 errorCode=U_PATTERN_SYNTAX_ERROR; 833 return 0; 834 } 835 // allow whitespace between offset: and its value 836 int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index. 837 index=skipDouble(valueIndex); 838 if(index==valueIndex) { 839 setParseError(parseError, start); // Missing value for plural 'offset:'. 840 errorCode=U_PATTERN_SYNTAX_ERROR; 841 return 0; 842 } 843 if((index-valueIndex)>Part::MAX_LENGTH) { 844 setParseError(parseError, valueIndex); // Plural offset value too long. 845 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 846 return 0; 847 } 848 parseDouble(valueIndex, index, FALSE, 849 parseError, errorCode); // adds ARG_INT or ARG_DOUBLE 850 if(U_FAILURE(errorCode)) { 851 return 0; 852 } 853 isEmpty=FALSE; 854 continue; // no message fragment after the offset 855 } else { 856 // normal selector word 857 if(length>Part::MAX_LENGTH) { 858 setParseError(parseError, selectorIndex); // Argument selector too long. 859 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 860 return 0; 861 } 862 addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode); 863 if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) { 864 hasOther=TRUE; 865 } 866 } 867 } 868 if(U_FAILURE(errorCode)) { 869 return 0; 870 } 871 872 // parse the message fragment following the selector 873 index=skipWhiteSpace(index); 874 if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) { 875 setParseError(parseError, selectorIndex); // No message fragment after plural/select selector. 876 errorCode=U_PATTERN_SYNTAX_ERROR; 877 return 0; 878 } 879 index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode); 880 if(U_FAILURE(errorCode)) { 881 return 0; 882 } 883 isEmpty=FALSE; 884 } 885 } 886 887 int32_t parseArgNumber(const UnicodeString & s,int32_t start,int32_t limit)888 MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) { 889 // If the identifier contains only ASCII digits, then it is an argument _number_ 890 // and must not have leading zeros (except "0" itself). 891 // Otherwise it is an argument _name_. 892 if(start>=limit) { 893 return UMSGPAT_ARG_NAME_NOT_VALID; 894 } 895 int32_t number; 896 // Defer numeric errors until we know there are only digits. 897 UBool badNumber; 898 UChar c=s.charAt(start++); 899 if(c==0x30) { 900 if(start==limit) { 901 return 0; 902 } else { 903 number=0; 904 badNumber=TRUE; // leading zero 905 } 906 } else if(0x31<=c && c<=0x39) { 907 number=c-0x30; 908 badNumber=FALSE; 909 } else { 910 return UMSGPAT_ARG_NAME_NOT_NUMBER; 911 } 912 while(start<limit) { 913 c=s.charAt(start++); 914 if(0x30<=c && c<=0x39) { 915 if(number>=INT32_MAX/10) { 916 badNumber=TRUE; // overflow 917 } 918 number=number*10+(c-0x30); 919 } else { 920 return UMSGPAT_ARG_NAME_NOT_NUMBER; 921 } 922 } 923 // There are only ASCII digits. 924 if(badNumber) { 925 return UMSGPAT_ARG_NAME_NOT_VALID; 926 } else { 927 return number; 928 } 929 } 930 931 void parseDouble(int32_t start,int32_t limit,UBool allowInfinity,UParseError * parseError,UErrorCode & errorCode)932 MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity, 933 UParseError *parseError, UErrorCode &errorCode) { 934 if(U_FAILURE(errorCode)) { 935 return; 936 } 937 U_ASSERT(start<limit); 938 // fake loop for easy exit and single throw statement 939 for(;;) { /*loop doesn't iterate*/ 940 // fast path for small integers and infinity 941 int32_t value=0; 942 int32_t isNegative=0; // not boolean so that we can easily add it to value 943 int32_t index=start; 944 UChar c=msg.charAt(index++); 945 if(c==u_minus) { 946 isNegative=1; 947 if(index==limit) { 948 break; // no number 949 } 950 c=msg.charAt(index++); 951 } else if(c==u_plus) { 952 if(index==limit) { 953 break; // no number 954 } 955 c=msg.charAt(index++); 956 } 957 if(c==0x221e) { // infinity 958 if(allowInfinity && index==limit) { 959 double infinity=uprv_getInfinity(); 960 addArgDoublePart( 961 isNegative!=0 ? -infinity : infinity, 962 start, limit-start, errorCode); 963 return; 964 } else { 965 break; 966 } 967 } 968 // try to parse the number as a small integer but fall back to a double 969 while('0'<=c && c<='9') { 970 value=value*10+(c-'0'); 971 if(value>(Part::MAX_VALUE+isNegative)) { 972 break; // not a small-enough integer 973 } 974 if(index==limit) { 975 addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start, 976 isNegative!=0 ? -value : value, errorCode); 977 return; 978 } 979 c=msg.charAt(index++); 980 } 981 // Let Double.parseDouble() throw a NumberFormatException. 982 char numberChars[128]; 983 int32_t capacity=(int32_t)sizeof(numberChars); 984 int32_t length=limit-start; 985 if(length>=capacity) { 986 break; // number too long 987 } 988 msg.extract(start, length, numberChars, capacity, US_INV); 989 if((int32_t)uprv_strlen(numberChars)<length) { 990 break; // contains non-invariant character that was turned into NUL 991 } 992 char *end; 993 double numericValue=uprv_strtod(numberChars, &end); 994 if(end!=(numberChars+length)) { 995 break; // parsing error 996 } 997 addArgDoublePart(numericValue, start, length, errorCode); 998 return; 999 } 1000 setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value. 1001 errorCode=U_PATTERN_SYNTAX_ERROR; 1002 return; 1003 } 1004 1005 int32_t skipWhiteSpace(int32_t index)1006 MessagePattern::skipWhiteSpace(int32_t index) { 1007 const UChar *s=msg.getBuffer(); 1008 int32_t msgLength=msg.length(); 1009 const UChar *t=PatternProps::skipWhiteSpace(s+index, msgLength-index); 1010 return (int32_t)(t-s); 1011 } 1012 1013 int32_t skipIdentifier(int32_t index)1014 MessagePattern::skipIdentifier(int32_t index) { 1015 const UChar *s=msg.getBuffer(); 1016 int32_t msgLength=msg.length(); 1017 const UChar *t=PatternProps::skipIdentifier(s+index, msgLength-index); 1018 return (int32_t)(t-s); 1019 } 1020 1021 int32_t skipDouble(int32_t index)1022 MessagePattern::skipDouble(int32_t index) { 1023 int32_t msgLength=msg.length(); 1024 while(index<msgLength) { 1025 UChar c=msg.charAt(index); 1026 // U+221E: Allow the infinity symbol, for ChoiceFormat patterns. 1027 if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) { 1028 break; 1029 } 1030 ++index; 1031 } 1032 return index; 1033 } 1034 1035 UBool isArgTypeChar(UChar32 c)1036 MessagePattern::isArgTypeChar(UChar32 c) { 1037 return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z); 1038 } 1039 1040 UBool isChoice(int32_t index)1041 MessagePattern::isChoice(int32_t index) { 1042 UChar c; 1043 return 1044 ((c=msg.charAt(index++))==u_c || c==u_C) && 1045 ((c=msg.charAt(index++))==u_h || c==u_H) && 1046 ((c=msg.charAt(index++))==u_o || c==u_O) && 1047 ((c=msg.charAt(index++))==u_i || c==u_I) && 1048 ((c=msg.charAt(index++))==u_c || c==u_C) && 1049 ((c=msg.charAt(index))==u_e || c==u_E); 1050 } 1051 1052 UBool isPlural(int32_t index)1053 MessagePattern::isPlural(int32_t index) { 1054 UChar c; 1055 return 1056 ((c=msg.charAt(index++))==u_p || c==u_P) && 1057 ((c=msg.charAt(index++))==u_l || c==u_L) && 1058 ((c=msg.charAt(index++))==u_u || c==u_U) && 1059 ((c=msg.charAt(index++))==u_r || c==u_R) && 1060 ((c=msg.charAt(index++))==u_a || c==u_A) && 1061 ((c=msg.charAt(index))==u_l || c==u_L); 1062 } 1063 1064 UBool isSelect(int32_t index)1065 MessagePattern::isSelect(int32_t index) { 1066 UChar c; 1067 return 1068 ((c=msg.charAt(index++))==u_s || c==u_S) && 1069 ((c=msg.charAt(index++))==u_e || c==u_E) && 1070 ((c=msg.charAt(index++))==u_l || c==u_L) && 1071 ((c=msg.charAt(index++))==u_e || c==u_E) && 1072 ((c=msg.charAt(index++))==u_c || c==u_C) && 1073 ((c=msg.charAt(index))==u_t || c==u_T); 1074 } 1075 1076 UBool isOrdinal(int32_t index)1077 MessagePattern::isOrdinal(int32_t index) { 1078 UChar c; 1079 return 1080 ((c=msg.charAt(index++))==u_o || c==u_O) && 1081 ((c=msg.charAt(index++))==u_r || c==u_R) && 1082 ((c=msg.charAt(index++))==u_d || c==u_D) && 1083 ((c=msg.charAt(index++))==u_i || c==u_I) && 1084 ((c=msg.charAt(index++))==u_n || c==u_N) && 1085 ((c=msg.charAt(index++))==u_a || c==u_A) && 1086 ((c=msg.charAt(index))==u_l || c==u_L); 1087 } 1088 1089 UBool inMessageFormatPattern(int32_t nestingLevel)1090 MessagePattern::inMessageFormatPattern(int32_t nestingLevel) { 1091 return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START; 1092 } 1093 1094 UBool inTopLevelChoiceMessage(int32_t nestingLevel,UMessagePatternArgType parentType)1095 MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) { 1096 return 1097 nestingLevel==1 && 1098 parentType==UMSGPAT_ARG_TYPE_CHOICE && 1099 partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START; 1100 } 1101 1102 void addPart(UMessagePatternPartType type,int32_t index,int32_t length,int32_t value,UErrorCode & errorCode)1103 MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length, 1104 int32_t value, UErrorCode &errorCode) { 1105 if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) { 1106 Part &part=partsList->a[partsLength++]; 1107 part.type=type; 1108 part.index=index; 1109 part.length=(uint16_t)length; 1110 part.value=(int16_t)value; 1111 part.limitPartIndex=0; 1112 } 1113 } 1114 1115 void addLimitPart(int32_t start,UMessagePatternPartType type,int32_t index,int32_t length,int32_t value,UErrorCode & errorCode)1116 MessagePattern::addLimitPart(int32_t start, 1117 UMessagePatternPartType type, int32_t index, int32_t length, 1118 int32_t value, UErrorCode &errorCode) { 1119 partsList->a[start].limitPartIndex=partsLength; 1120 addPart(type, index, length, value, errorCode); 1121 } 1122 1123 void addArgDoublePart(double numericValue,int32_t start,int32_t length,UErrorCode & errorCode)1124 MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length, 1125 UErrorCode &errorCode) { 1126 if(U_FAILURE(errorCode)) { 1127 return; 1128 } 1129 int32_t numericIndex=numericValuesLength; 1130 if(numericValuesList==NULL) { 1131 numericValuesList=new MessagePatternDoubleList(); 1132 if(numericValuesList==NULL) { 1133 errorCode=U_MEMORY_ALLOCATION_ERROR; 1134 return; 1135 } 1136 } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) { 1137 return; 1138 } else { 1139 if(numericIndex>Part::MAX_VALUE) { 1140 errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 1141 return; 1142 } 1143 } 1144 numericValuesList->a[numericValuesLength++]=numericValue; 1145 addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode); 1146 } 1147 1148 void setParseError(UParseError * parseError,int32_t index)1149 MessagePattern::setParseError(UParseError *parseError, int32_t index) { 1150 if(parseError==NULL) { 1151 return; 1152 } 1153 parseError->offset=index; 1154 1155 // Set preContext to some of msg before index. 1156 // Avoid splitting a surrogate pair. 1157 int32_t length=index; 1158 if(length>=U_PARSE_CONTEXT_LEN) { 1159 length=U_PARSE_CONTEXT_LEN-1; 1160 if(length>0 && U16_IS_TRAIL(msg[index-length])) { 1161 --length; 1162 } 1163 } 1164 msg.extract(index-length, length, parseError->preContext); 1165 parseError->preContext[length]=0; 1166 1167 // Set postContext to some of msg starting at index. 1168 length=msg.length()-index; 1169 if(length>=U_PARSE_CONTEXT_LEN) { 1170 length=U_PARSE_CONTEXT_LEN-1; 1171 if(length>0 && U16_IS_LEAD(msg[index+length-1])) { 1172 --length; 1173 } 1174 } 1175 msg.extract(index, length, parseError->postContext); 1176 parseError->postContext[length]=0; 1177 } 1178 1179 // MessageImpl ------------------------------------------------------------- *** 1180 1181 void appendReducedApostrophes(const UnicodeString & s,int32_t start,int32_t limit,UnicodeString & sb)1182 MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit, 1183 UnicodeString &sb) { 1184 int32_t doubleApos=-1; 1185 for(;;) { 1186 int32_t i=s.indexOf(u_apos, start); 1187 if(i<0 || i>=limit) { 1188 sb.append(s, start, limit-start); 1189 break; 1190 } 1191 if(i==doubleApos) { 1192 // Double apostrophe at start-1 and start==i, append one. 1193 sb.append(u_apos); 1194 ++start; 1195 doubleApos=-1; 1196 } else { 1197 // Append text between apostrophes and skip this one. 1198 sb.append(s, start, i-start); 1199 doubleApos=start=i+1; 1200 } 1201 } 1202 } 1203 1204 // Ported from second half of ICU4J SelectFormat.format(String). 1205 UnicodeString & appendSubMessageWithoutSkipSyntax(const MessagePattern & msgPattern,int32_t msgStart,UnicodeString & result)1206 MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern, 1207 int32_t msgStart, 1208 UnicodeString &result) { 1209 const UnicodeString &msgString=msgPattern.getPatternString(); 1210 int32_t prevIndex=msgPattern.getPart(msgStart).getLimit(); 1211 for(int32_t i=msgStart;;) { 1212 const MessagePattern::Part &part=msgPattern.getPart(++i); 1213 UMessagePatternPartType type=part.getType(); 1214 int32_t index=part.getIndex(); 1215 if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) { 1216 return result.append(msgString, prevIndex, index-prevIndex); 1217 } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) { 1218 result.append(msgString, prevIndex, index-prevIndex); 1219 prevIndex=part.getLimit(); 1220 } else if(type==UMSGPAT_PART_TYPE_ARG_START) { 1221 result.append(msgString, prevIndex, index-prevIndex); 1222 prevIndex=index; 1223 i=msgPattern.getLimitPartIndex(i); 1224 index=msgPattern.getPart(i).getLimit(); 1225 appendReducedApostrophes(msgString, prevIndex, index, result); 1226 prevIndex=index; 1227 } 1228 } 1229 } 1230 1231 U_NAMESPACE_END 1232 1233 #endif // !UCONFIG_NO_FORMATTING 1234