1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File PLURFMT.CPP
10 *******************************************************************************
11 */
12
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
18 #include "cmemory.h"
19 #include "messageimpl.h"
20 #include "nfrule.h"
21 #include "plurrule_impl.h"
22 #include "uassert.h"
23 #include "uhash.h"
24 #include "number_decimalquantity.h"
25 #include "number_utils.h"
26 #include "number_utypes.h"
27
28 #if !UCONFIG_NO_FORMATTING
29
30 U_NAMESPACE_BEGIN
31
32 using number::impl::DecimalQuantity;
33
34 static const char16_t OTHER_STRING[] = {
35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
36 };
37
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39
40 PluralFormat::PluralFormat(UErrorCode& status)
41 : locale(Locale::getDefault()),
42 msgPattern(status),
43 numberFormat(nullptr),
44 offset(0) {
45 init(nullptr, UPLURAL_TYPE_CARDINAL, status);
46 }
47
PluralFormat(const Locale & loc,UErrorCode & status)48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49 : locale(loc),
50 msgPattern(status),
51 numberFormat(nullptr),
52 offset(0) {
53 init(nullptr, UPLURAL_TYPE_CARDINAL, status);
54 }
55
PluralFormat(const PluralRules & rules,UErrorCode & status)56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57 : locale(Locale::getDefault()),
58 msgPattern(status),
59 numberFormat(nullptr),
60 offset(0) {
61 init(&rules, UPLURAL_TYPE_COUNT, status);
62 }
63
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)64 PluralFormat::PluralFormat(const Locale& loc,
65 const PluralRules& rules,
66 UErrorCode& status)
67 : locale(loc),
68 msgPattern(status),
69 numberFormat(nullptr),
70 offset(0) {
71 init(&rules, UPLURAL_TYPE_COUNT, status);
72 }
73
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)74 PluralFormat::PluralFormat(const Locale& loc,
75 UPluralType type,
76 UErrorCode& status)
77 : locale(loc),
78 msgPattern(status),
79 numberFormat(nullptr),
80 offset(0) {
81 init(nullptr, type, status);
82 }
83
PluralFormat(const UnicodeString & pat,UErrorCode & status)84 PluralFormat::PluralFormat(const UnicodeString& pat,
85 UErrorCode& status)
86 : locale(Locale::getDefault()),
87 msgPattern(status),
88 numberFormat(nullptr),
89 offset(0) {
90 init(nullptr, UPLURAL_TYPE_CARDINAL, status);
91 applyPattern(pat, status);
92 }
93
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)94 PluralFormat::PluralFormat(const Locale& loc,
95 const UnicodeString& pat,
96 UErrorCode& status)
97 : locale(loc),
98 msgPattern(status),
99 numberFormat(nullptr),
100 offset(0) {
101 init(nullptr, UPLURAL_TYPE_CARDINAL, status);
102 applyPattern(pat, status);
103 }
104
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)105 PluralFormat::PluralFormat(const PluralRules& rules,
106 const UnicodeString& pat,
107 UErrorCode& status)
108 : locale(Locale::getDefault()),
109 msgPattern(status),
110 numberFormat(nullptr),
111 offset(0) {
112 init(&rules, UPLURAL_TYPE_COUNT, status);
113 applyPattern(pat, status);
114 }
115
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)116 PluralFormat::PluralFormat(const Locale& loc,
117 const PluralRules& rules,
118 const UnicodeString& pat,
119 UErrorCode& status)
120 : locale(loc),
121 msgPattern(status),
122 numberFormat(nullptr),
123 offset(0) {
124 init(&rules, UPLURAL_TYPE_COUNT, status);
125 applyPattern(pat, status);
126 }
127
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)128 PluralFormat::PluralFormat(const Locale& loc,
129 UPluralType type,
130 const UnicodeString& pat,
131 UErrorCode& status)
132 : locale(loc),
133 msgPattern(status),
134 numberFormat(nullptr),
135 offset(0) {
136 init(nullptr, type, status);
137 applyPattern(pat, status);
138 }
139
PluralFormat(const PluralFormat & other)140 PluralFormat::PluralFormat(const PluralFormat& other)
141 : Format(other),
142 locale(other.locale),
143 msgPattern(other.msgPattern),
144 numberFormat(nullptr),
145 offset(other.offset) {
146 copyObjects(other);
147 }
148
149 void
copyObjects(const PluralFormat & other)150 PluralFormat::copyObjects(const PluralFormat& other) {
151 UErrorCode status = U_ZERO_ERROR;
152 delete numberFormat;
153 delete pluralRulesWrapper.pluralRules;
154 if (other.numberFormat == nullptr) {
155 numberFormat = NumberFormat::createInstance(locale, status);
156 } else {
157 numberFormat = other.numberFormat->clone();
158 }
159 if (other.pluralRulesWrapper.pluralRules == nullptr) {
160 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
161 } else {
162 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
163 }
164 }
165
166
~PluralFormat()167 PluralFormat::~PluralFormat() {
168 delete numberFormat;
169 }
170
171 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173 if (U_FAILURE(status)) {
174 return;
175 }
176
177 if (rules==nullptr) {
178 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
179 } else {
180 pluralRulesWrapper.pluralRules = rules->clone();
181 if (pluralRulesWrapper.pluralRules == nullptr) {
182 status = U_MEMORY_ALLOCATION_ERROR;
183 return;
184 }
185 }
186
187 numberFormat= NumberFormat::createInstance(locale, status);
188 }
189
190 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192 msgPattern.parsePluralStyle(newPattern, nullptr, status);
193 if (U_FAILURE(status)) {
194 msgPattern.clear();
195 offset = 0;
196 return;
197 }
198 offset = msgPattern.getPluralOffset(0);
199 }
200
201 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const202 PluralFormat::format(const Formattable& obj,
203 UnicodeString& appendTo,
204 FieldPosition& pos,
205 UErrorCode& status) const
206 {
207 if (U_FAILURE(status)) return appendTo;
208
209 if (obj.isNumeric()) {
210 return format(obj, obj.getDouble(), appendTo, pos, status);
211 } else {
212 status = U_ILLEGAL_ARGUMENT_ERROR;
213 return appendTo;
214 }
215 }
216
217 UnicodeString
format(int32_t number,UErrorCode & status) const218 PluralFormat::format(int32_t number, UErrorCode& status) const {
219 FieldPosition fpos(FieldPosition::DONT_CARE);
220 UnicodeString result;
221 return format(Formattable(number), number, result, fpos, status);
222 }
223
224 UnicodeString
format(double number,UErrorCode & status) const225 PluralFormat::format(double number, UErrorCode& status) const {
226 FieldPosition fpos(FieldPosition::DONT_CARE);
227 UnicodeString result;
228 return format(Formattable(number), number, result, fpos, status);
229 }
230
231
232 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const233 PluralFormat::format(int32_t number,
234 UnicodeString& appendTo,
235 FieldPosition& pos,
236 UErrorCode& status) const {
237 return format(Formattable(number), static_cast<double>(number), appendTo, pos, status);
238 }
239
240 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const241 PluralFormat::format(double number,
242 UnicodeString& appendTo,
243 FieldPosition& pos,
244 UErrorCode& status) const {
245 return format(Formattable(number), number, appendTo, pos, status);
246 }
247
248 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const249 PluralFormat::format(const Formattable& numberObject, double number,
250 UnicodeString& appendTo,
251 FieldPosition& pos,
252 UErrorCode& status) const {
253 if (U_FAILURE(status)) {
254 return appendTo;
255 }
256 if (msgPattern.countParts() == 0) {
257 return numberFormat->format(numberObject, appendTo, pos, status);
258 }
259
260 // Get the appropriate sub-message.
261 // Select it based on the formatted number-offset.
262 double numberMinusOffset = number - offset;
263 // Call NumberFormatter to get both the DecimalQuantity and the string.
264 // This call site needs to use more internal APIs than the Java equivalent.
265 number::impl::UFormattedNumberData data;
266 if (offset == 0) {
267 // could be BigDecimal etc.
268 numberObject.populateDecimalQuantity(data.quantity, status);
269 } else {
270 data.quantity.setToDouble(numberMinusOffset);
271 }
272 UnicodeString numberString;
273 auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
274 if(decFmt != nullptr) {
275 const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status);
276 if (U_FAILURE(status)) {
277 return appendTo;
278 }
279 lnf->formatImpl(&data, status); // mutates &data
280 if (U_FAILURE(status)) {
281 return appendTo;
282 }
283 numberString = data.getStringRef().toUnicodeString();
284 } else {
285 if (offset == 0) {
286 numberFormat->format(numberObject, numberString, status);
287 } else {
288 numberFormat->format(numberMinusOffset, numberString, status);
289 }
290 }
291
292 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
293 if (U_FAILURE(status)) { return appendTo; }
294 // Replace syntactic # signs in the top level of this sub-message
295 // (not in nested arguments) with the formatted number-offset.
296 const UnicodeString& pattern = msgPattern.getPatternString();
297 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
298 for (;;) {
299 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
300 const UMessagePatternPartType type = part.getType();
301 int32_t index = part.getIndex();
302 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
303 return appendTo.append(pattern, prevIndex, index - prevIndex);
304 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
305 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
306 appendTo.append(pattern, prevIndex, index - prevIndex);
307 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
308 appendTo.append(numberString);
309 }
310 prevIndex = part.getLimit();
311 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
312 appendTo.append(pattern, prevIndex, index - prevIndex);
313 prevIndex = index;
314 partIndex = msgPattern.getLimitPartIndex(partIndex);
315 index = msgPattern.getPart(partIndex).getLimit();
316 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
317 prevIndex = index;
318 }
319 }
320 }
321
322 UnicodeString&
toPattern(UnicodeString & appendTo)323 PluralFormat::toPattern(UnicodeString& appendTo) {
324 if (0 == msgPattern.countParts()) {
325 appendTo.setToBogus();
326 } else {
327 appendTo.append(msgPattern.getPatternString());
328 }
329 return appendTo;
330 }
331
332 void
setLocale(const Locale & loc,UErrorCode & status)333 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
334 if (U_FAILURE(status)) {
335 return;
336 }
337 locale = loc;
338 msgPattern.clear();
339 delete numberFormat;
340 offset = 0;
341 numberFormat = nullptr;
342 pluralRulesWrapper.reset();
343 init(nullptr, UPLURAL_TYPE_CARDINAL, status);
344 }
345
346 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)347 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
348 if (U_FAILURE(status)) {
349 return;
350 }
351 NumberFormat* nf = format->clone();
352 if (nf != nullptr) {
353 delete numberFormat;
354 numberFormat = nf;
355 } else {
356 status = U_MEMORY_ALLOCATION_ERROR;
357 }
358 }
359
360 PluralFormat*
clone() const361 PluralFormat::clone() const
362 {
363 return new PluralFormat(*this);
364 }
365
366
367 PluralFormat&
operator =(const PluralFormat & other)368 PluralFormat::operator=(const PluralFormat& other) {
369 if (this != &other) {
370 locale = other.locale;
371 msgPattern = other.msgPattern;
372 offset = other.offset;
373 copyObjects(other);
374 }
375
376 return *this;
377 }
378
379 bool
operator ==(const Format & other) const380 PluralFormat::operator==(const Format& other) const {
381 if (this == &other) {
382 return true;
383 }
384 if (!Format::operator==(other)) {
385 return false;
386 }
387 const PluralFormat& o = (const PluralFormat&)other;
388 return
389 locale == o.locale &&
390 msgPattern == o.msgPattern && // implies same offset
391 (numberFormat == nullptr) == (o.numberFormat == nullptr) &&
392 (numberFormat == nullptr || *numberFormat == *o.numberFormat) &&
393 (pluralRulesWrapper.pluralRules == nullptr) == (o.pluralRulesWrapper.pluralRules == nullptr) &&
394 (pluralRulesWrapper.pluralRules == nullptr ||
395 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
396 }
397
398 bool
operator !=(const Format & other) const399 PluralFormat::operator!=(const Format& other) const {
400 return !operator==(other);
401 }
402
403 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const404 PluralFormat::parseObject(const UnicodeString& /*source*/,
405 Formattable& /*result*/,
406 ParsePosition& pos) const
407 {
408 // Parsing not supported.
409 pos.setErrorIndex(pos.getIndex());
410 }
411
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)412 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
413 const PluralSelector& selector, void *context,
414 double number, UErrorCode& ec) {
415 if (U_FAILURE(ec)) {
416 return 0;
417 }
418 int32_t count=pattern.countParts();
419 double offset;
420 const MessagePattern::Part* part=&pattern.getPart(partIndex);
421 if (MessagePattern::Part::hasNumericValue(part->getType())) {
422 offset=pattern.getNumericValue(*part);
423 ++partIndex;
424 } else {
425 offset=0;
426 }
427 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
428 // Then we get the keyword from the selector.
429 // (In other words, we never call the selector if we match against an explicit value,
430 // or if the only non-explicit keyword is "other".)
431 UnicodeString keyword;
432 UnicodeString other(false, OTHER_STRING, 5);
433 // When we find a match, we set msgStart>0 and also set this boolean to true
434 // to avoid matching the keyword again (duplicates are allowed)
435 // while we continue to look for an explicit-value match.
436 UBool haveKeywordMatch=false;
437 // msgStart is 0 until we find any appropriate sub-message.
438 // We remember the first "other" sub-message if we have not seen any
439 // appropriate sub-message before.
440 // We remember the first matching-keyword sub-message if we have not seen
441 // one of those before.
442 // (The parser allows [does not check for] duplicate keywords.
443 // We just have to make sure to take the first one.)
444 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
445 // at the first keyword match.
446 // We keep going until we find an explicit-value match or reach the end of the plural style.
447 int32_t msgStart=0;
448 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
449 // until ARG_LIMIT or end of plural-only pattern.
450 do {
451 part=&pattern.getPart(partIndex++);
452 const UMessagePatternPartType type = part->getType();
453 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
454 break;
455 }
456 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
457 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
458 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
459 // explicit value like "=2"
460 part=&pattern.getPart(partIndex++);
461 if(number==pattern.getNumericValue(*part)) {
462 // matches explicit value
463 return partIndex;
464 }
465 } else if(!haveKeywordMatch) {
466 // plural keyword like "few" or "other"
467 // Compare "other" first and call the selector if this is not "other".
468 if(pattern.partSubstringMatches(*part, other)) {
469 if(msgStart==0) {
470 msgStart=partIndex;
471 if(0 == keyword.compare(other)) {
472 // This is the first "other" sub-message,
473 // and the selected keyword is also "other".
474 // Do not match "other" again.
475 haveKeywordMatch=true;
476 }
477 }
478 } else {
479 if(keyword.isEmpty()) {
480 keyword=selector.select(context, number-offset, ec);
481 if(msgStart!=0 && (0 == keyword.compare(other))) {
482 // We have already seen an "other" sub-message.
483 // Do not match "other" again.
484 haveKeywordMatch=true;
485 // Skip keyword matching but do getLimitPartIndex().
486 }
487 }
488 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
489 // keyword matches
490 msgStart=partIndex;
491 // Do not match this keyword again.
492 haveKeywordMatch=true;
493 }
494 }
495 }
496 partIndex=pattern.getLimitPartIndex(partIndex);
497 } while(++partIndex<count);
498 return msgStart;
499 }
500
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const501 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
502 // If no pattern was applied, return null.
503 if (msgPattern.countParts() == 0) {
504 pos.setBeginIndex(-1);
505 pos.setEndIndex(-1);
506 return;
507 }
508 int partIndex = 0;
509 int currMatchIndex;
510 int count=msgPattern.countParts();
511 int startingAt = pos.getBeginIndex();
512 if (startingAt < 0) {
513 startingAt = 0;
514 }
515
516 // The keyword is null until we need to match against a non-explicit, not-"other" value.
517 // Then we get the keyword from the selector.
518 // (In other words, we never call the selector if we match against an explicit value,
519 // or if the only non-explicit keyword is "other".)
520 UnicodeString keyword;
521 UnicodeString matchedWord;
522 const UnicodeString& pattern = msgPattern.getPatternString();
523 int matchedIndex = -1;
524 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
525 // until the end of the plural-only pattern.
526 while (partIndex < count) {
527 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
528 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
529 // Bad format
530 continue;
531 }
532
533 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
534 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
535 // Bad format
536 continue;
537 }
538
539 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
540 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
541 // Bad format
542 continue;
543 }
544
545 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
546 if (rbnfLenientScanner != nullptr) {
547 // Check if non-lenient rule finds the text before call lenient parsing
548 int32_t tempIndex = source.indexOf(currArg, startingAt);
549 if (tempIndex >= 0) {
550 currMatchIndex = tempIndex;
551 } else {
552 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
553 int32_t length = -1;
554 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
555 }
556 }
557 else {
558 currMatchIndex = source.indexOf(currArg, startingAt);
559 }
560 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
561 matchedIndex = currMatchIndex;
562 matchedWord = currArg;
563 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
564 }
565 }
566 if (matchedIndex >= 0) {
567 pos.setBeginIndex(matchedIndex);
568 pos.setEndIndex(matchedIndex + matchedWord.length());
569 result.setString(keyword);
570 return;
571 }
572
573 // Not found!
574 pos.setBeginIndex(-1);
575 pos.setEndIndex(-1);
576 }
577
~PluralSelector()578 PluralFormat::PluralSelector::~PluralSelector() {}
579
~PluralSelectorAdapter()580 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
581 delete pluralRules;
582 }
583
select(void * context,double number,UErrorCode &) const584 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
585 UErrorCode& /*ec*/) const {
586 (void)number; // unused except in the assertion
587 IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
588 return pluralRules->select(*dec);
589 }
590
reset()591 void PluralFormat::PluralSelectorAdapter::reset() {
592 delete pluralRules;
593 pluralRules = nullptr;
594 }
595
596
597 U_NAMESPACE_END
598
599
600 #endif /* #if !UCONFIG_NO_FORMATTING */
601
602 //eof
603