1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File PLURFMT.CPP
10 *******************************************************************************
11 */
12
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
18 #include "cmemory.h"
19 #include "messageimpl.h"
20 #include "nfrule.h"
21 #include "plurrule_impl.h"
22 #include "uassert.h"
23 #include "uhash.h"
24 #include "number_decimalquantity.h"
25 #include "number_utils.h"
26 #include "number_utypes.h"
27
28 #if !UCONFIG_NO_FORMATTING
29
30 U_NAMESPACE_BEGIN
31
32 using number::impl::DecimalQuantity;
33
34 static const UChar OTHER_STRING[] = {
35 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
36 };
37
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39
40 PluralFormat::PluralFormat(UErrorCode& status)
41 : locale(Locale::getDefault()),
42 msgPattern(status),
43 numberFormat(NULL),
44 offset(0) {
45 init(NULL, UPLURAL_TYPE_CARDINAL, status);
46 }
47
PluralFormat(const Locale & loc,UErrorCode & status)48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49 : locale(loc),
50 msgPattern(status),
51 numberFormat(NULL),
52 offset(0) {
53 init(NULL, UPLURAL_TYPE_CARDINAL, status);
54 }
55
PluralFormat(const PluralRules & rules,UErrorCode & status)56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57 : locale(Locale::getDefault()),
58 msgPattern(status),
59 numberFormat(NULL),
60 offset(0) {
61 init(&rules, UPLURAL_TYPE_COUNT, status);
62 }
63
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)64 PluralFormat::PluralFormat(const Locale& loc,
65 const PluralRules& rules,
66 UErrorCode& status)
67 : locale(loc),
68 msgPattern(status),
69 numberFormat(NULL),
70 offset(0) {
71 init(&rules, UPLURAL_TYPE_COUNT, status);
72 }
73
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)74 PluralFormat::PluralFormat(const Locale& loc,
75 UPluralType type,
76 UErrorCode& status)
77 : locale(loc),
78 msgPattern(status),
79 numberFormat(NULL),
80 offset(0) {
81 init(NULL, type, status);
82 }
83
PluralFormat(const UnicodeString & pat,UErrorCode & status)84 PluralFormat::PluralFormat(const UnicodeString& pat,
85 UErrorCode& status)
86 : locale(Locale::getDefault()),
87 msgPattern(status),
88 numberFormat(NULL),
89 offset(0) {
90 init(NULL, UPLURAL_TYPE_CARDINAL, status);
91 applyPattern(pat, status);
92 }
93
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)94 PluralFormat::PluralFormat(const Locale& loc,
95 const UnicodeString& pat,
96 UErrorCode& status)
97 : locale(loc),
98 msgPattern(status),
99 numberFormat(NULL),
100 offset(0) {
101 init(NULL, UPLURAL_TYPE_CARDINAL, status);
102 applyPattern(pat, status);
103 }
104
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)105 PluralFormat::PluralFormat(const PluralRules& rules,
106 const UnicodeString& pat,
107 UErrorCode& status)
108 : locale(Locale::getDefault()),
109 msgPattern(status),
110 numberFormat(NULL),
111 offset(0) {
112 init(&rules, UPLURAL_TYPE_COUNT, status);
113 applyPattern(pat, status);
114 }
115
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)116 PluralFormat::PluralFormat(const Locale& loc,
117 const PluralRules& rules,
118 const UnicodeString& pat,
119 UErrorCode& status)
120 : locale(loc),
121 msgPattern(status),
122 numberFormat(NULL),
123 offset(0) {
124 init(&rules, UPLURAL_TYPE_COUNT, status);
125 applyPattern(pat, status);
126 }
127
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)128 PluralFormat::PluralFormat(const Locale& loc,
129 UPluralType type,
130 const UnicodeString& pat,
131 UErrorCode& status)
132 : locale(loc),
133 msgPattern(status),
134 numberFormat(NULL),
135 offset(0) {
136 init(NULL, type, status);
137 applyPattern(pat, status);
138 }
139
PluralFormat(const PluralFormat & other)140 PluralFormat::PluralFormat(const PluralFormat& other)
141 : Format(other),
142 locale(other.locale),
143 msgPattern(other.msgPattern),
144 numberFormat(NULL),
145 offset(other.offset) {
146 copyObjects(other);
147 }
148
149 void
copyObjects(const PluralFormat & other)150 PluralFormat::copyObjects(const PluralFormat& other) {
151 UErrorCode status = U_ZERO_ERROR;
152 if (numberFormat != NULL) {
153 delete numberFormat;
154 }
155 if (pluralRulesWrapper.pluralRules != NULL) {
156 delete pluralRulesWrapper.pluralRules;
157 }
158
159 if (other.numberFormat == NULL) {
160 numberFormat = NumberFormat::createInstance(locale, status);
161 } else {
162 numberFormat = (NumberFormat*)other.numberFormat->clone();
163 }
164 if (other.pluralRulesWrapper.pluralRules == NULL) {
165 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
166 } else {
167 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
168 }
169 }
170
171
~PluralFormat()172 PluralFormat::~PluralFormat() {
173 delete numberFormat;
174 }
175
176 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)177 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
178 if (U_FAILURE(status)) {
179 return;
180 }
181
182 if (rules==NULL) {
183 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
184 } else {
185 pluralRulesWrapper.pluralRules = rules->clone();
186 if (pluralRulesWrapper.pluralRules == NULL) {
187 status = U_MEMORY_ALLOCATION_ERROR;
188 return;
189 }
190 }
191
192 numberFormat= NumberFormat::createInstance(locale, status);
193 }
194
195 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)196 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
197 msgPattern.parsePluralStyle(newPattern, NULL, status);
198 if (U_FAILURE(status)) {
199 msgPattern.clear();
200 offset = 0;
201 return;
202 }
203 offset = msgPattern.getPluralOffset(0);
204 }
205
206 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const207 PluralFormat::format(const Formattable& obj,
208 UnicodeString& appendTo,
209 FieldPosition& pos,
210 UErrorCode& status) const
211 {
212 if (U_FAILURE(status)) return appendTo;
213
214 if (obj.isNumeric()) {
215 return format(obj, obj.getDouble(), appendTo, pos, status);
216 } else {
217 status = U_ILLEGAL_ARGUMENT_ERROR;
218 return appendTo;
219 }
220 }
221
222 UnicodeString
format(int32_t number,UErrorCode & status) const223 PluralFormat::format(int32_t number, UErrorCode& status) const {
224 FieldPosition fpos(FieldPosition::DONT_CARE);
225 UnicodeString result;
226 return format(Formattable(number), number, result, fpos, status);
227 }
228
229 UnicodeString
format(double number,UErrorCode & status) const230 PluralFormat::format(double number, UErrorCode& status) const {
231 FieldPosition fpos(FieldPosition::DONT_CARE);
232 UnicodeString result;
233 return format(Formattable(number), number, result, fpos, status);
234 }
235
236
237 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const238 PluralFormat::format(int32_t number,
239 UnicodeString& appendTo,
240 FieldPosition& pos,
241 UErrorCode& status) const {
242 return format(Formattable(number), (double)number, appendTo, pos, status);
243 }
244
245 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const246 PluralFormat::format(double number,
247 UnicodeString& appendTo,
248 FieldPosition& pos,
249 UErrorCode& status) const {
250 return format(Formattable(number), (double)number, appendTo, pos, status);
251 }
252
253 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const254 PluralFormat::format(const Formattable& numberObject, double number,
255 UnicodeString& appendTo,
256 FieldPosition& pos,
257 UErrorCode& status) const {
258 if (U_FAILURE(status)) {
259 return appendTo;
260 }
261 if (msgPattern.countParts() == 0) {
262 return numberFormat->format(numberObject, appendTo, pos, status);
263 }
264
265 // Get the appropriate sub-message.
266 // Select it based on the formatted number-offset.
267 double numberMinusOffset = number - offset;
268 // Call NumberFormatter to get both the DecimalQuantity and the string.
269 // This call site needs to use more internal APIs than the Java equivalent.
270 number::impl::UFormattedNumberData data;
271 if (offset == 0) {
272 // could be BigDecimal etc.
273 numberObject.populateDecimalQuantity(data.quantity, status);
274 } else {
275 data.quantity.setToDouble(numberMinusOffset);
276 }
277 UnicodeString numberString;
278 auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
279 if(decFmt != nullptr) {
280 decFmt->toNumberFormatter().formatImpl(&data, status); // mutates &data
281 numberString = data.string.toUnicodeString();
282 } else {
283 if (offset == 0) {
284 numberFormat->format(numberObject, numberString, status);
285 } else {
286 numberFormat->format(numberMinusOffset, numberString, status);
287 }
288 }
289
290 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
291 if (U_FAILURE(status)) { return appendTo; }
292 // Replace syntactic # signs in the top level of this sub-message
293 // (not in nested arguments) with the formatted number-offset.
294 const UnicodeString& pattern = msgPattern.getPatternString();
295 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
296 for (;;) {
297 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
298 const UMessagePatternPartType type = part.getType();
299 int32_t index = part.getIndex();
300 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
301 return appendTo.append(pattern, prevIndex, index - prevIndex);
302 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
303 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
304 appendTo.append(pattern, prevIndex, index - prevIndex);
305 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
306 appendTo.append(numberString);
307 }
308 prevIndex = part.getLimit();
309 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
310 appendTo.append(pattern, prevIndex, index - prevIndex);
311 prevIndex = index;
312 partIndex = msgPattern.getLimitPartIndex(partIndex);
313 index = msgPattern.getPart(partIndex).getLimit();
314 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
315 prevIndex = index;
316 }
317 }
318 }
319
320 UnicodeString&
toPattern(UnicodeString & appendTo)321 PluralFormat::toPattern(UnicodeString& appendTo) {
322 if (0 == msgPattern.countParts()) {
323 appendTo.setToBogus();
324 } else {
325 appendTo.append(msgPattern.getPatternString());
326 }
327 return appendTo;
328 }
329
330 void
setLocale(const Locale & loc,UErrorCode & status)331 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
332 if (U_FAILURE(status)) {
333 return;
334 }
335 locale = loc;
336 msgPattern.clear();
337 delete numberFormat;
338 offset = 0;
339 numberFormat = NULL;
340 pluralRulesWrapper.reset();
341 init(NULL, UPLURAL_TYPE_CARDINAL, status);
342 }
343
344 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)345 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
346 if (U_FAILURE(status)) {
347 return;
348 }
349 NumberFormat* nf = (NumberFormat*)format->clone();
350 if (nf != NULL) {
351 delete numberFormat;
352 numberFormat = nf;
353 } else {
354 status = U_MEMORY_ALLOCATION_ERROR;
355 }
356 }
357
358 Format*
clone() const359 PluralFormat::clone() const
360 {
361 return new PluralFormat(*this);
362 }
363
364
365 PluralFormat&
operator =(const PluralFormat & other)366 PluralFormat::operator=(const PluralFormat& other) {
367 if (this != &other) {
368 locale = other.locale;
369 msgPattern = other.msgPattern;
370 offset = other.offset;
371 copyObjects(other);
372 }
373
374 return *this;
375 }
376
377 UBool
operator ==(const Format & other) const378 PluralFormat::operator==(const Format& other) const {
379 if (this == &other) {
380 return TRUE;
381 }
382 if (!Format::operator==(other)) {
383 return FALSE;
384 }
385 const PluralFormat& o = (const PluralFormat&)other;
386 return
387 locale == o.locale &&
388 msgPattern == o.msgPattern && // implies same offset
389 (numberFormat == NULL) == (o.numberFormat == NULL) &&
390 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
391 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
392 (pluralRulesWrapper.pluralRules == NULL ||
393 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
394 }
395
396 UBool
operator !=(const Format & other) const397 PluralFormat::operator!=(const Format& other) const {
398 return !operator==(other);
399 }
400
401 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const402 PluralFormat::parseObject(const UnicodeString& /*source*/,
403 Formattable& /*result*/,
404 ParsePosition& pos) const
405 {
406 // Parsing not supported.
407 pos.setErrorIndex(pos.getIndex());
408 }
409
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)410 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
411 const PluralSelector& selector, void *context,
412 double number, UErrorCode& ec) {
413 if (U_FAILURE(ec)) {
414 return 0;
415 }
416 int32_t count=pattern.countParts();
417 double offset;
418 const MessagePattern::Part* part=&pattern.getPart(partIndex);
419 if (MessagePattern::Part::hasNumericValue(part->getType())) {
420 offset=pattern.getNumericValue(*part);
421 ++partIndex;
422 } else {
423 offset=0;
424 }
425 // The keyword is empty until we need to match against a non-explicit, not-"other" value.
426 // Then we get the keyword from the selector.
427 // (In other words, we never call the selector if we match against an explicit value,
428 // or if the only non-explicit keyword is "other".)
429 UnicodeString keyword;
430 UnicodeString other(FALSE, OTHER_STRING, 5);
431 // When we find a match, we set msgStart>0 and also set this boolean to true
432 // to avoid matching the keyword again (duplicates are allowed)
433 // while we continue to look for an explicit-value match.
434 UBool haveKeywordMatch=FALSE;
435 // msgStart is 0 until we find any appropriate sub-message.
436 // We remember the first "other" sub-message if we have not seen any
437 // appropriate sub-message before.
438 // We remember the first matching-keyword sub-message if we have not seen
439 // one of those before.
440 // (The parser allows [does not check for] duplicate keywords.
441 // We just have to make sure to take the first one.)
442 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
443 // at the first keyword match.
444 // We keep going until we find an explicit-value match or reach the end of the plural style.
445 int32_t msgStart=0;
446 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
447 // until ARG_LIMIT or end of plural-only pattern.
448 do {
449 part=&pattern.getPart(partIndex++);
450 const UMessagePatternPartType type = part->getType();
451 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
452 break;
453 }
454 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
455 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
456 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
457 // explicit value like "=2"
458 part=&pattern.getPart(partIndex++);
459 if(number==pattern.getNumericValue(*part)) {
460 // matches explicit value
461 return partIndex;
462 }
463 } else if(!haveKeywordMatch) {
464 // plural keyword like "few" or "other"
465 // Compare "other" first and call the selector if this is not "other".
466 if(pattern.partSubstringMatches(*part, other)) {
467 if(msgStart==0) {
468 msgStart=partIndex;
469 if(0 == keyword.compare(other)) {
470 // This is the first "other" sub-message,
471 // and the selected keyword is also "other".
472 // Do not match "other" again.
473 haveKeywordMatch=TRUE;
474 }
475 }
476 } else {
477 if(keyword.isEmpty()) {
478 keyword=selector.select(context, number-offset, ec);
479 if(msgStart!=0 && (0 == keyword.compare(other))) {
480 // We have already seen an "other" sub-message.
481 // Do not match "other" again.
482 haveKeywordMatch=TRUE;
483 // Skip keyword matching but do getLimitPartIndex().
484 }
485 }
486 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
487 // keyword matches
488 msgStart=partIndex;
489 // Do not match this keyword again.
490 haveKeywordMatch=TRUE;
491 }
492 }
493 }
494 partIndex=pattern.getLimitPartIndex(partIndex);
495 } while(++partIndex<count);
496 return msgStart;
497 }
498
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const499 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
500 // If no pattern was applied, return null.
501 if (msgPattern.countParts() == 0) {
502 pos.setBeginIndex(-1);
503 pos.setEndIndex(-1);
504 return;
505 }
506 int partIndex = 0;
507 int currMatchIndex;
508 int count=msgPattern.countParts();
509 int startingAt = pos.getBeginIndex();
510 if (startingAt < 0) {
511 startingAt = 0;
512 }
513
514 // The keyword is null until we need to match against a non-explicit, not-"other" value.
515 // Then we get the keyword from the selector.
516 // (In other words, we never call the selector if we match against an explicit value,
517 // or if the only non-explicit keyword is "other".)
518 UnicodeString keyword;
519 UnicodeString matchedWord;
520 const UnicodeString& pattern = msgPattern.getPatternString();
521 int matchedIndex = -1;
522 // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
523 // until the end of the plural-only pattern.
524 while (partIndex < count) {
525 const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
526 if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
527 // Bad format
528 continue;
529 }
530
531 const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
532 if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
533 // Bad format
534 continue;
535 }
536
537 const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
538 if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
539 // Bad format
540 continue;
541 }
542
543 UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
544 if (rbnfLenientScanner != NULL) {
545 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
546 int32_t length = -1;
547 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
548 }
549 else {
550 currMatchIndex = source.indexOf(currArg, startingAt);
551 }
552 if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
553 matchedIndex = currMatchIndex;
554 matchedWord = currArg;
555 keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
556 }
557 }
558 if (matchedIndex >= 0) {
559 pos.setBeginIndex(matchedIndex);
560 pos.setEndIndex(matchedIndex + matchedWord.length());
561 result.setString(keyword);
562 return;
563 }
564
565 // Not found!
566 pos.setBeginIndex(-1);
567 pos.setEndIndex(-1);
568 }
569
~PluralSelector()570 PluralFormat::PluralSelector::~PluralSelector() {}
571
~PluralSelectorAdapter()572 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
573 delete pluralRules;
574 }
575
select(void * context,double number,UErrorCode &) const576 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
577 UErrorCode& /*ec*/) const {
578 (void)number; // unused except in the assertion
579 IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
580 return pluralRules->select(*dec);
581 }
582
reset()583 void PluralFormat::PluralSelectorAdapter::reset() {
584 delete pluralRules;
585 pluralRules = NULL;
586 }
587
588
589 U_NAMESPACE_END
590
591
592 #endif /* #if !UCONFIG_NO_FORMATTING */
593
594 //eof
595