1 /* 2 ******************************************************************************* 3 * Copyright (C) 2009-2015, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 * 7 * File PLURFMT.CPP 8 ******************************************************************************* 9 */ 10 11 #include "unicode/decimfmt.h" 12 #include "unicode/messagepattern.h" 13 #include "unicode/plurfmt.h" 14 #include "unicode/plurrule.h" 15 #include "unicode/utypes.h" 16 #include "cmemory.h" 17 #include "messageimpl.h" 18 #include "nfrule.h" 19 #include "plurrule_impl.h" 20 #include "uassert.h" 21 #include "uhash.h" 22 #include "precision.h" 23 #include "visibledigits.h" 24 25 #if !UCONFIG_NO_FORMATTING 26 27 U_NAMESPACE_BEGIN 28 29 static const UChar OTHER_STRING[] = { 30 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other" 31 }; 32 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat) 34 35 PluralFormat::PluralFormat(UErrorCode& status) 36 : locale(Locale::getDefault()), 37 msgPattern(status), 38 numberFormat(NULL), 39 offset(0) { 40 init(NULL, UPLURAL_TYPE_CARDINAL, status); 41 } 42 PluralFormat(const Locale & loc,UErrorCode & status)43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status) 44 : locale(loc), 45 msgPattern(status), 46 numberFormat(NULL), 47 offset(0) { 48 init(NULL, UPLURAL_TYPE_CARDINAL, status); 49 } 50 PluralFormat(const PluralRules & rules,UErrorCode & status)51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status) 52 : locale(Locale::getDefault()), 53 msgPattern(status), 54 numberFormat(NULL), 55 offset(0) { 56 init(&rules, UPLURAL_TYPE_COUNT, status); 57 } 58 PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)59 PluralFormat::PluralFormat(const Locale& loc, 60 const PluralRules& rules, 61 UErrorCode& status) 62 : locale(loc), 63 msgPattern(status), 64 numberFormat(NULL), 65 offset(0) { 66 init(&rules, UPLURAL_TYPE_COUNT, status); 67 } 68 PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)69 PluralFormat::PluralFormat(const Locale& loc, 70 UPluralType type, 71 UErrorCode& status) 72 : locale(loc), 73 msgPattern(status), 74 numberFormat(NULL), 75 offset(0) { 76 init(NULL, type, status); 77 } 78 PluralFormat(const UnicodeString & pat,UErrorCode & status)79 PluralFormat::PluralFormat(const UnicodeString& pat, 80 UErrorCode& status) 81 : locale(Locale::getDefault()), 82 msgPattern(status), 83 numberFormat(NULL), 84 offset(0) { 85 init(NULL, UPLURAL_TYPE_CARDINAL, status); 86 applyPattern(pat, status); 87 } 88 PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)89 PluralFormat::PluralFormat(const Locale& loc, 90 const UnicodeString& pat, 91 UErrorCode& status) 92 : locale(loc), 93 msgPattern(status), 94 numberFormat(NULL), 95 offset(0) { 96 init(NULL, UPLURAL_TYPE_CARDINAL, status); 97 applyPattern(pat, status); 98 } 99 PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)100 PluralFormat::PluralFormat(const PluralRules& rules, 101 const UnicodeString& pat, 102 UErrorCode& status) 103 : locale(Locale::getDefault()), 104 msgPattern(status), 105 numberFormat(NULL), 106 offset(0) { 107 init(&rules, UPLURAL_TYPE_COUNT, status); 108 applyPattern(pat, status); 109 } 110 PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)111 PluralFormat::PluralFormat(const Locale& loc, 112 const PluralRules& rules, 113 const UnicodeString& pat, 114 UErrorCode& status) 115 : locale(loc), 116 msgPattern(status), 117 numberFormat(NULL), 118 offset(0) { 119 init(&rules, UPLURAL_TYPE_COUNT, status); 120 applyPattern(pat, status); 121 } 122 PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)123 PluralFormat::PluralFormat(const Locale& loc, 124 UPluralType type, 125 const UnicodeString& pat, 126 UErrorCode& status) 127 : locale(loc), 128 msgPattern(status), 129 numberFormat(NULL), 130 offset(0) { 131 init(NULL, type, status); 132 applyPattern(pat, status); 133 } 134 PluralFormat(const PluralFormat & other)135 PluralFormat::PluralFormat(const PluralFormat& other) 136 : Format(other), 137 locale(other.locale), 138 msgPattern(other.msgPattern), 139 numberFormat(NULL), 140 offset(other.offset) { 141 copyObjects(other); 142 } 143 144 void copyObjects(const PluralFormat & other)145 PluralFormat::copyObjects(const PluralFormat& other) { 146 UErrorCode status = U_ZERO_ERROR; 147 if (numberFormat != NULL) { 148 delete numberFormat; 149 } 150 if (pluralRulesWrapper.pluralRules != NULL) { 151 delete pluralRulesWrapper.pluralRules; 152 } 153 154 if (other.numberFormat == NULL) { 155 numberFormat = NumberFormat::createInstance(locale, status); 156 } else { 157 numberFormat = (NumberFormat*)other.numberFormat->clone(); 158 } 159 if (other.pluralRulesWrapper.pluralRules == NULL) { 160 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status); 161 } else { 162 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone(); 163 } 164 } 165 166 ~PluralFormat()167 PluralFormat::~PluralFormat() { 168 delete numberFormat; 169 } 170 171 void init(const PluralRules * rules,UPluralType type,UErrorCode & status)172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) { 173 if (U_FAILURE(status)) { 174 return; 175 } 176 177 if (rules==NULL) { 178 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status); 179 } else { 180 pluralRulesWrapper.pluralRules = rules->clone(); 181 if (pluralRulesWrapper.pluralRules == NULL) { 182 status = U_MEMORY_ALLOCATION_ERROR; 183 return; 184 } 185 } 186 187 numberFormat= NumberFormat::createInstance(locale, status); 188 } 189 190 void applyPattern(const UnicodeString & newPattern,UErrorCode & status)191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) { 192 msgPattern.parsePluralStyle(newPattern, NULL, status); 193 if (U_FAILURE(status)) { 194 msgPattern.clear(); 195 offset = 0; 196 return; 197 } 198 offset = msgPattern.getPluralOffset(0); 199 } 200 201 UnicodeString& format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const202 PluralFormat::format(const Formattable& obj, 203 UnicodeString& appendTo, 204 FieldPosition& pos, 205 UErrorCode& status) const 206 { 207 if (U_FAILURE(status)) return appendTo; 208 209 if (obj.isNumeric()) { 210 return format(obj, obj.getDouble(), appendTo, pos, status); 211 } else { 212 status = U_ILLEGAL_ARGUMENT_ERROR; 213 return appendTo; 214 } 215 } 216 217 UnicodeString format(int32_t number,UErrorCode & status) const218 PluralFormat::format(int32_t number, UErrorCode& status) const { 219 FieldPosition fpos(0); 220 UnicodeString result; 221 return format(Formattable(number), number, result, fpos, status); 222 } 223 224 UnicodeString format(double number,UErrorCode & status) const225 PluralFormat::format(double number, UErrorCode& status) const { 226 FieldPosition fpos(0); 227 UnicodeString result; 228 return format(Formattable(number), number, result, fpos, status); 229 } 230 231 232 UnicodeString& format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const233 PluralFormat::format(int32_t number, 234 UnicodeString& appendTo, 235 FieldPosition& pos, 236 UErrorCode& status) const { 237 return format(Formattable(number), (double)number, appendTo, pos, status); 238 } 239 240 UnicodeString& format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const241 PluralFormat::format(double number, 242 UnicodeString& appendTo, 243 FieldPosition& pos, 244 UErrorCode& status) const { 245 return format(Formattable(number), (double)number, appendTo, pos, status); 246 } 247 248 UnicodeString& format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const249 PluralFormat::format(const Formattable& numberObject, double number, 250 UnicodeString& appendTo, 251 FieldPosition& pos, 252 UErrorCode& status) const { 253 if (U_FAILURE(status)) { 254 return appendTo; 255 } 256 if (msgPattern.countParts() == 0) { 257 return numberFormat->format(numberObject, appendTo, pos, status); 258 } 259 // Get the appropriate sub-message. 260 // Select it based on the formatted number-offset. 261 double numberMinusOffset = number - offset; 262 UnicodeString numberString; 263 FieldPosition ignorePos; 264 FixedPrecision fp; 265 VisibleDigitsWithExponent dec; 266 fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status); 267 if (U_FAILURE(status)) { 268 return appendTo; 269 } 270 if (offset == 0) { 271 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 272 if(decFmt != NULL) { 273 decFmt->initVisibleDigitsWithExponent( 274 numberObject, dec, status); 275 if (U_FAILURE(status)) { 276 return appendTo; 277 } 278 decFmt->format(dec, numberString, ignorePos, status); 279 } else { 280 numberFormat->format( 281 numberObject, numberString, ignorePos, status); // could be BigDecimal etc. 282 } 283 } else { 284 DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat); 285 if(decFmt != NULL) { 286 decFmt->initVisibleDigitsWithExponent( 287 numberMinusOffset, dec, status); 288 if (U_FAILURE(status)) { 289 return appendTo; 290 } 291 decFmt->format(dec, numberString, ignorePos, status); 292 } else { 293 numberFormat->format( 294 numberMinusOffset, numberString, ignorePos, status); 295 } 296 } 297 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status); 298 if (U_FAILURE(status)) { return appendTo; } 299 // Replace syntactic # signs in the top level of this sub-message 300 // (not in nested arguments) with the formatted number-offset. 301 const UnicodeString& pattern = msgPattern.getPatternString(); 302 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit(); 303 for (;;) { 304 const MessagePattern::Part& part = msgPattern.getPart(++partIndex); 305 const UMessagePatternPartType type = part.getType(); 306 int32_t index = part.getIndex(); 307 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) { 308 return appendTo.append(pattern, prevIndex, index - prevIndex); 309 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) || 310 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) { 311 appendTo.append(pattern, prevIndex, index - prevIndex); 312 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) { 313 appendTo.append(numberString); 314 } 315 prevIndex = part.getLimit(); 316 } else if (type == UMSGPAT_PART_TYPE_ARG_START) { 317 appendTo.append(pattern, prevIndex, index - prevIndex); 318 prevIndex = index; 319 partIndex = msgPattern.getLimitPartIndex(partIndex); 320 index = msgPattern.getPart(partIndex).getLimit(); 321 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo); 322 prevIndex = index; 323 } 324 } 325 } 326 327 UnicodeString& toPattern(UnicodeString & appendTo)328 PluralFormat::toPattern(UnicodeString& appendTo) { 329 if (0 == msgPattern.countParts()) { 330 appendTo.setToBogus(); 331 } else { 332 appendTo.append(msgPattern.getPatternString()); 333 } 334 return appendTo; 335 } 336 337 void setLocale(const Locale & loc,UErrorCode & status)338 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) { 339 if (U_FAILURE(status)) { 340 return; 341 } 342 locale = loc; 343 msgPattern.clear(); 344 delete numberFormat; 345 offset = 0; 346 numberFormat = NULL; 347 pluralRulesWrapper.reset(); 348 init(NULL, UPLURAL_TYPE_CARDINAL, status); 349 } 350 351 void setNumberFormat(const NumberFormat * format,UErrorCode & status)352 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) { 353 if (U_FAILURE(status)) { 354 return; 355 } 356 NumberFormat* nf = (NumberFormat*)format->clone(); 357 if (nf != NULL) { 358 delete numberFormat; 359 numberFormat = nf; 360 } else { 361 status = U_MEMORY_ALLOCATION_ERROR; 362 } 363 } 364 365 Format* clone() const366 PluralFormat::clone() const 367 { 368 return new PluralFormat(*this); 369 } 370 371 372 PluralFormat& operator =(const PluralFormat & other)373 PluralFormat::operator=(const PluralFormat& other) { 374 if (this != &other) { 375 locale = other.locale; 376 msgPattern = other.msgPattern; 377 offset = other.offset; 378 copyObjects(other); 379 } 380 381 return *this; 382 } 383 384 UBool operator ==(const Format & other) const385 PluralFormat::operator==(const Format& other) const { 386 if (this == &other) { 387 return TRUE; 388 } 389 if (!Format::operator==(other)) { 390 return FALSE; 391 } 392 const PluralFormat& o = (const PluralFormat&)other; 393 return 394 locale == o.locale && 395 msgPattern == o.msgPattern && // implies same offset 396 (numberFormat == NULL) == (o.numberFormat == NULL) && 397 (numberFormat == NULL || *numberFormat == *o.numberFormat) && 398 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) && 399 (pluralRulesWrapper.pluralRules == NULL || 400 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules); 401 } 402 403 UBool operator !=(const Format & other) const404 PluralFormat::operator!=(const Format& other) const { 405 return !operator==(other); 406 } 407 408 void parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const409 PluralFormat::parseObject(const UnicodeString& /*source*/, 410 Formattable& /*result*/, 411 ParsePosition& pos) const 412 { 413 // Parsing not supported. 414 pos.setErrorIndex(pos.getIndex()); 415 } 416 findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)417 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex, 418 const PluralSelector& selector, void *context, 419 double number, UErrorCode& ec) { 420 if (U_FAILURE(ec)) { 421 return 0; 422 } 423 int32_t count=pattern.countParts(); 424 double offset; 425 const MessagePattern::Part* part=&pattern.getPart(partIndex); 426 if (MessagePattern::Part::hasNumericValue(part->getType())) { 427 offset=pattern.getNumericValue(*part); 428 ++partIndex; 429 } else { 430 offset=0; 431 } 432 // The keyword is empty until we need to match against a non-explicit, not-"other" value. 433 // Then we get the keyword from the selector. 434 // (In other words, we never call the selector if we match against an explicit value, 435 // or if the only non-explicit keyword is "other".) 436 UnicodeString keyword; 437 UnicodeString other(FALSE, OTHER_STRING, 5); 438 // When we find a match, we set msgStart>0 and also set this boolean to true 439 // to avoid matching the keyword again (duplicates are allowed) 440 // while we continue to look for an explicit-value match. 441 UBool haveKeywordMatch=FALSE; 442 // msgStart is 0 until we find any appropriate sub-message. 443 // We remember the first "other" sub-message if we have not seen any 444 // appropriate sub-message before. 445 // We remember the first matching-keyword sub-message if we have not seen 446 // one of those before. 447 // (The parser allows [does not check for] duplicate keywords. 448 // We just have to make sure to take the first one.) 449 // We avoid matching the keyword twice by also setting haveKeywordMatch=true 450 // at the first keyword match. 451 // We keep going until we find an explicit-value match or reach the end of the plural style. 452 int32_t msgStart=0; 453 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples 454 // until ARG_LIMIT or end of plural-only pattern. 455 do { 456 part=&pattern.getPart(partIndex++); 457 const UMessagePatternPartType type = part->getType(); 458 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) { 459 break; 460 } 461 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR); 462 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message 463 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) { 464 // explicit value like "=2" 465 part=&pattern.getPart(partIndex++); 466 if(number==pattern.getNumericValue(*part)) { 467 // matches explicit value 468 return partIndex; 469 } 470 } else if(!haveKeywordMatch) { 471 // plural keyword like "few" or "other" 472 // Compare "other" first and call the selector if this is not "other". 473 if(pattern.partSubstringMatches(*part, other)) { 474 if(msgStart==0) { 475 msgStart=partIndex; 476 if(0 == keyword.compare(other)) { 477 // This is the first "other" sub-message, 478 // and the selected keyword is also "other". 479 // Do not match "other" again. 480 haveKeywordMatch=TRUE; 481 } 482 } 483 } else { 484 if(keyword.isEmpty()) { 485 keyword=selector.select(context, number-offset, ec); 486 if(msgStart!=0 && (0 == keyword.compare(other))) { 487 // We have already seen an "other" sub-message. 488 // Do not match "other" again. 489 haveKeywordMatch=TRUE; 490 // Skip keyword matching but do getLimitPartIndex(). 491 } 492 } 493 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) { 494 // keyword matches 495 msgStart=partIndex; 496 // Do not match this keyword again. 497 haveKeywordMatch=TRUE; 498 } 499 } 500 } 501 partIndex=pattern.getLimitPartIndex(partIndex); 502 } while(++partIndex<count); 503 return msgStart; 504 } 505 parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const506 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const { 507 // If no pattern was applied, return null. 508 if (msgPattern.countParts() == 0) { 509 pos.setBeginIndex(-1); 510 pos.setEndIndex(-1); 511 return; 512 } 513 int partIndex = 0; 514 int currMatchIndex; 515 int count=msgPattern.countParts(); 516 int startingAt = pos.getBeginIndex(); 517 if (startingAt < 0) { 518 startingAt = 0; 519 } 520 521 // The keyword is null until we need to match against a non-explicit, not-"other" value. 522 // Then we get the keyword from the selector. 523 // (In other words, we never call the selector if we match against an explicit value, 524 // or if the only non-explicit keyword is "other".) 525 UnicodeString keyword; 526 UnicodeString matchedWord; 527 const UnicodeString& pattern = msgPattern.getPatternString(); 528 int matchedIndex = -1; 529 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples 530 // until the end of the plural-only pattern. 531 while (partIndex < count) { 532 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++); 533 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) { 534 // Bad format 535 continue; 536 } 537 538 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++); 539 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) { 540 // Bad format 541 continue; 542 } 543 544 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++); 545 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) { 546 // Bad format 547 continue; 548 } 549 550 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 551 if (rbnfLenientScanner != NULL) { 552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us. 553 int32_t length = -1; 554 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length); 555 } 556 else { 557 currMatchIndex = source.indexOf(currArg, startingAt); 558 } 559 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) { 560 matchedIndex = currMatchIndex; 561 matchedWord = currArg; 562 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit()); 563 } 564 } 565 if (matchedIndex >= 0) { 566 pos.setBeginIndex(matchedIndex); 567 pos.setEndIndex(matchedIndex + matchedWord.length()); 568 result.setString(keyword); 569 return; 570 } 571 572 // Not found! 573 pos.setBeginIndex(-1); 574 pos.setEndIndex(-1); 575 } 576 ~PluralSelector()577 PluralFormat::PluralSelector::~PluralSelector() {} 578 ~PluralSelectorAdapter()579 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() { 580 delete pluralRules; 581 } 582 select(void * context,double number,UErrorCode &) const583 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number, 584 UErrorCode& /*ec*/) const { 585 (void)number; // unused except in the assertion 586 VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context); 587 return pluralRules->select(*dec); 588 } 589 reset()590 void PluralFormat::PluralSelectorAdapter::reset() { 591 delete pluralRules; 592 pluralRules = NULL; 593 } 594 595 596 U_NAMESPACE_END 597 598 599 #endif /* #if !UCONFIG_NO_FORMATTING */ 600 601 //eof 602