• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File PLURFMT.CPP
10 *******************************************************************************
11 */
12 
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
18 #include "cmemory.h"
19 #include "messageimpl.h"
20 #include "nfrule.h"
21 #include "plurrule_impl.h"
22 #include "uassert.h"
23 #include "uhash.h"
24 #include "number_decimalquantity.h"
25 #include "number_utils.h"
26 #include "number_utypes.h"
27 
28 #if !UCONFIG_NO_FORMATTING
29 
30 U_NAMESPACE_BEGIN
31 
32 using number::impl::DecimalQuantity;
33 
34 static const UChar OTHER_STRING[] = {
35     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
36 };
37 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39 
40 PluralFormat::PluralFormat(UErrorCode& status)
41         : locale(Locale::getDefault()),
42           msgPattern(status),
43           numberFormat(NULL),
44           offset(0) {
45     init(NULL, UPLURAL_TYPE_CARDINAL, status);
46 }
47 
PluralFormat(const Locale & loc,UErrorCode & status)48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49         : locale(loc),
50           msgPattern(status),
51           numberFormat(NULL),
52           offset(0) {
53     init(NULL, UPLURAL_TYPE_CARDINAL, status);
54 }
55 
PluralFormat(const PluralRules & rules,UErrorCode & status)56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57         : locale(Locale::getDefault()),
58           msgPattern(status),
59           numberFormat(NULL),
60           offset(0) {
61     init(&rules, UPLURAL_TYPE_COUNT, status);
62 }
63 
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)64 PluralFormat::PluralFormat(const Locale& loc,
65                            const PluralRules& rules,
66                            UErrorCode& status)
67         : locale(loc),
68           msgPattern(status),
69           numberFormat(NULL),
70           offset(0) {
71     init(&rules, UPLURAL_TYPE_COUNT, status);
72 }
73 
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)74 PluralFormat::PluralFormat(const Locale& loc,
75                            UPluralType type,
76                            UErrorCode& status)
77         : locale(loc),
78           msgPattern(status),
79           numberFormat(NULL),
80           offset(0) {
81     init(NULL, type, status);
82 }
83 
PluralFormat(const UnicodeString & pat,UErrorCode & status)84 PluralFormat::PluralFormat(const UnicodeString& pat,
85                            UErrorCode& status)
86         : locale(Locale::getDefault()),
87           msgPattern(status),
88           numberFormat(NULL),
89           offset(0) {
90     init(NULL, UPLURAL_TYPE_CARDINAL, status);
91     applyPattern(pat, status);
92 }
93 
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)94 PluralFormat::PluralFormat(const Locale& loc,
95                            const UnicodeString& pat,
96                            UErrorCode& status)
97         : locale(loc),
98           msgPattern(status),
99           numberFormat(NULL),
100           offset(0) {
101     init(NULL, UPLURAL_TYPE_CARDINAL, status);
102     applyPattern(pat, status);
103 }
104 
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)105 PluralFormat::PluralFormat(const PluralRules& rules,
106                            const UnicodeString& pat,
107                            UErrorCode& status)
108         : locale(Locale::getDefault()),
109           msgPattern(status),
110           numberFormat(NULL),
111           offset(0) {
112     init(&rules, UPLURAL_TYPE_COUNT, status);
113     applyPattern(pat, status);
114 }
115 
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)116 PluralFormat::PluralFormat(const Locale& loc,
117                            const PluralRules& rules,
118                            const UnicodeString& pat,
119                            UErrorCode& status)
120         : locale(loc),
121           msgPattern(status),
122           numberFormat(NULL),
123           offset(0) {
124     init(&rules, UPLURAL_TYPE_COUNT, status);
125     applyPattern(pat, status);
126 }
127 
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)128 PluralFormat::PluralFormat(const Locale& loc,
129                            UPluralType type,
130                            const UnicodeString& pat,
131                            UErrorCode& status)
132         : locale(loc),
133           msgPattern(status),
134           numberFormat(NULL),
135           offset(0) {
136     init(NULL, type, status);
137     applyPattern(pat, status);
138 }
139 
PluralFormat(const PluralFormat & other)140 PluralFormat::PluralFormat(const PluralFormat& other)
141         : Format(other),
142           locale(other.locale),
143           msgPattern(other.msgPattern),
144           numberFormat(NULL),
145           offset(other.offset) {
146     copyObjects(other);
147 }
148 
149 void
copyObjects(const PluralFormat & other)150 PluralFormat::copyObjects(const PluralFormat& other) {
151     UErrorCode status = U_ZERO_ERROR;
152     if (numberFormat != NULL) {
153         delete numberFormat;
154     }
155     if (pluralRulesWrapper.pluralRules != NULL) {
156         delete pluralRulesWrapper.pluralRules;
157     }
158 
159     if (other.numberFormat == NULL) {
160         numberFormat = NumberFormat::createInstance(locale, status);
161     } else {
162         numberFormat = (NumberFormat*)other.numberFormat->clone();
163     }
164     if (other.pluralRulesWrapper.pluralRules == NULL) {
165         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
166     } else {
167         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
168     }
169 }
170 
171 
~PluralFormat()172 PluralFormat::~PluralFormat() {
173     delete numberFormat;
174 }
175 
176 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)177 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
178     if (U_FAILURE(status)) {
179         return;
180     }
181 
182     if (rules==NULL) {
183         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
184     } else {
185         pluralRulesWrapper.pluralRules = rules->clone();
186         if (pluralRulesWrapper.pluralRules == NULL) {
187             status = U_MEMORY_ALLOCATION_ERROR;
188             return;
189         }
190     }
191 
192     numberFormat= NumberFormat::createInstance(locale, status);
193 }
194 
195 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)196 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
197     msgPattern.parsePluralStyle(newPattern, NULL, status);
198     if (U_FAILURE(status)) {
199         msgPattern.clear();
200         offset = 0;
201         return;
202     }
203     offset = msgPattern.getPluralOffset(0);
204 }
205 
206 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const207 PluralFormat::format(const Formattable& obj,
208                    UnicodeString& appendTo,
209                    FieldPosition& pos,
210                    UErrorCode& status) const
211 {
212     if (U_FAILURE(status)) return appendTo;
213 
214     if (obj.isNumeric()) {
215         return format(obj, obj.getDouble(), appendTo, pos, status);
216     } else {
217         status = U_ILLEGAL_ARGUMENT_ERROR;
218         return appendTo;
219     }
220 }
221 
222 UnicodeString
format(int32_t number,UErrorCode & status) const223 PluralFormat::format(int32_t number, UErrorCode& status) const {
224     FieldPosition fpos(FieldPosition::DONT_CARE);
225     UnicodeString result;
226     return format(Formattable(number), number, result, fpos, status);
227 }
228 
229 UnicodeString
format(double number,UErrorCode & status) const230 PluralFormat::format(double number, UErrorCode& status) const {
231     FieldPosition fpos(FieldPosition::DONT_CARE);
232     UnicodeString result;
233     return format(Formattable(number), number, result, fpos, status);
234 }
235 
236 
237 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const238 PluralFormat::format(int32_t number,
239                      UnicodeString& appendTo,
240                      FieldPosition& pos,
241                      UErrorCode& status) const {
242     return format(Formattable(number), (double)number, appendTo, pos, status);
243 }
244 
245 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const246 PluralFormat::format(double number,
247                      UnicodeString& appendTo,
248                      FieldPosition& pos,
249                      UErrorCode& status) const {
250     return format(Formattable(number), (double)number, appendTo, pos, status);
251 }
252 
253 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const254 PluralFormat::format(const Formattable& numberObject, double number,
255                      UnicodeString& appendTo,
256                      FieldPosition& pos,
257                      UErrorCode& status) const {
258     if (U_FAILURE(status)) {
259         return appendTo;
260     }
261     if (msgPattern.countParts() == 0) {
262         return numberFormat->format(numberObject, appendTo, pos, status);
263     }
264 
265     // Get the appropriate sub-message.
266     // Select it based on the formatted number-offset.
267     double numberMinusOffset = number - offset;
268     // Call NumberFormatter to get both the DecimalQuantity and the string.
269     // This call site needs to use more internal APIs than the Java equivalent.
270     number::impl::UFormattedNumberData data;
271     if (offset == 0) {
272         // could be BigDecimal etc.
273         numberObject.populateDecimalQuantity(data.quantity, status);
274     } else {
275         data.quantity.setToDouble(numberMinusOffset);
276     }
277     UnicodeString numberString;
278     auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
279     if(decFmt != nullptr) {
280         decFmt->toNumberFormatter().formatImpl(&data, status); // mutates &data
281         numberString = data.string.toUnicodeString();
282     } else {
283         if (offset == 0) {
284             numberFormat->format(numberObject, numberString, status);
285         } else {
286             numberFormat->format(numberMinusOffset, numberString, status);
287         }
288     }
289 
290     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
291     if (U_FAILURE(status)) { return appendTo; }
292     // Replace syntactic # signs in the top level of this sub-message
293     // (not in nested arguments) with the formatted number-offset.
294     const UnicodeString& pattern = msgPattern.getPatternString();
295     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
296     for (;;) {
297         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
298         const UMessagePatternPartType type = part.getType();
299         int32_t index = part.getIndex();
300         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
301             return appendTo.append(pattern, prevIndex, index - prevIndex);
302         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
303             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
304             appendTo.append(pattern, prevIndex, index - prevIndex);
305             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
306                 appendTo.append(numberString);
307             }
308             prevIndex = part.getLimit();
309         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
310             appendTo.append(pattern, prevIndex, index - prevIndex);
311             prevIndex = index;
312             partIndex = msgPattern.getLimitPartIndex(partIndex);
313             index = msgPattern.getPart(partIndex).getLimit();
314             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
315             prevIndex = index;
316         }
317     }
318 }
319 
320 UnicodeString&
toPattern(UnicodeString & appendTo)321 PluralFormat::toPattern(UnicodeString& appendTo) {
322     if (0 == msgPattern.countParts()) {
323         appendTo.setToBogus();
324     } else {
325         appendTo.append(msgPattern.getPatternString());
326     }
327     return appendTo;
328 }
329 
330 void
setLocale(const Locale & loc,UErrorCode & status)331 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
332     if (U_FAILURE(status)) {
333         return;
334     }
335     locale = loc;
336     msgPattern.clear();
337     delete numberFormat;
338     offset = 0;
339     numberFormat = NULL;
340     pluralRulesWrapper.reset();
341     init(NULL, UPLURAL_TYPE_CARDINAL, status);
342 }
343 
344 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)345 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
346     if (U_FAILURE(status)) {
347         return;
348     }
349     NumberFormat* nf = (NumberFormat*)format->clone();
350     if (nf != NULL) {
351         delete numberFormat;
352         numberFormat = nf;
353     } else {
354         status = U_MEMORY_ALLOCATION_ERROR;
355     }
356 }
357 
358 Format*
clone() const359 PluralFormat::clone() const
360 {
361     return new PluralFormat(*this);
362 }
363 
364 
365 PluralFormat&
operator =(const PluralFormat & other)366 PluralFormat::operator=(const PluralFormat& other) {
367     if (this != &other) {
368         locale = other.locale;
369         msgPattern = other.msgPattern;
370         offset = other.offset;
371         copyObjects(other);
372     }
373 
374     return *this;
375 }
376 
377 UBool
operator ==(const Format & other) const378 PluralFormat::operator==(const Format& other) const {
379     if (this == &other) {
380         return TRUE;
381     }
382     if (!Format::operator==(other)) {
383         return FALSE;
384     }
385     const PluralFormat& o = (const PluralFormat&)other;
386     return
387         locale == o.locale &&
388         msgPattern == o.msgPattern &&  // implies same offset
389         (numberFormat == NULL) == (o.numberFormat == NULL) &&
390         (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
391         (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
392         (pluralRulesWrapper.pluralRules == NULL ||
393             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
394 }
395 
396 UBool
operator !=(const Format & other) const397 PluralFormat::operator!=(const Format& other) const {
398     return  !operator==(other);
399 }
400 
401 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const402 PluralFormat::parseObject(const UnicodeString& /*source*/,
403                         Formattable& /*result*/,
404                         ParsePosition& pos) const
405 {
406     // Parsing not supported.
407     pos.setErrorIndex(pos.getIndex());
408 }
409 
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)410 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
411                                      const PluralSelector& selector, void *context,
412                                      double number, UErrorCode& ec) {
413     if (U_FAILURE(ec)) {
414         return 0;
415     }
416     int32_t count=pattern.countParts();
417     double offset;
418     const MessagePattern::Part* part=&pattern.getPart(partIndex);
419     if (MessagePattern::Part::hasNumericValue(part->getType())) {
420         offset=pattern.getNumericValue(*part);
421         ++partIndex;
422     } else {
423         offset=0;
424     }
425     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
426     // Then we get the keyword from the selector.
427     // (In other words, we never call the selector if we match against an explicit value,
428     // or if the only non-explicit keyword is "other".)
429     UnicodeString keyword;
430     UnicodeString other(FALSE, OTHER_STRING, 5);
431     // When we find a match, we set msgStart>0 and also set this boolean to true
432     // to avoid matching the keyword again (duplicates are allowed)
433     // while we continue to look for an explicit-value match.
434     UBool haveKeywordMatch=FALSE;
435     // msgStart is 0 until we find any appropriate sub-message.
436     // We remember the first "other" sub-message if we have not seen any
437     // appropriate sub-message before.
438     // We remember the first matching-keyword sub-message if we have not seen
439     // one of those before.
440     // (The parser allows [does not check for] duplicate keywords.
441     // We just have to make sure to take the first one.)
442     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
443     // at the first keyword match.
444     // We keep going until we find an explicit-value match or reach the end of the plural style.
445     int32_t msgStart=0;
446     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
447     // until ARG_LIMIT or end of plural-only pattern.
448     do {
449         part=&pattern.getPart(partIndex++);
450         const UMessagePatternPartType type = part->getType();
451         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
452             break;
453         }
454         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
455         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
456         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
457             // explicit value like "=2"
458             part=&pattern.getPart(partIndex++);
459             if(number==pattern.getNumericValue(*part)) {
460                 // matches explicit value
461                 return partIndex;
462             }
463         } else if(!haveKeywordMatch) {
464             // plural keyword like "few" or "other"
465             // Compare "other" first and call the selector if this is not "other".
466             if(pattern.partSubstringMatches(*part, other)) {
467                 if(msgStart==0) {
468                     msgStart=partIndex;
469                     if(0 == keyword.compare(other)) {
470                         // This is the first "other" sub-message,
471                         // and the selected keyword is also "other".
472                         // Do not match "other" again.
473                         haveKeywordMatch=TRUE;
474                     }
475                 }
476             } else {
477                 if(keyword.isEmpty()) {
478                     keyword=selector.select(context, number-offset, ec);
479                     if(msgStart!=0 && (0 == keyword.compare(other))) {
480                         // We have already seen an "other" sub-message.
481                         // Do not match "other" again.
482                         haveKeywordMatch=TRUE;
483                         // Skip keyword matching but do getLimitPartIndex().
484                     }
485                 }
486                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
487                     // keyword matches
488                     msgStart=partIndex;
489                     // Do not match this keyword again.
490                     haveKeywordMatch=TRUE;
491                 }
492             }
493         }
494         partIndex=pattern.getLimitPartIndex(partIndex);
495     } while(++partIndex<count);
496     return msgStart;
497 }
498 
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const499 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
500     // If no pattern was applied, return null.
501     if (msgPattern.countParts() == 0) {
502         pos.setBeginIndex(-1);
503         pos.setEndIndex(-1);
504         return;
505     }
506     int partIndex = 0;
507     int currMatchIndex;
508     int count=msgPattern.countParts();
509     int startingAt = pos.getBeginIndex();
510     if (startingAt < 0) {
511         startingAt = 0;
512     }
513 
514     // The keyword is null until we need to match against a non-explicit, not-"other" value.
515     // Then we get the keyword from the selector.
516     // (In other words, we never call the selector if we match against an explicit value,
517     // or if the only non-explicit keyword is "other".)
518     UnicodeString keyword;
519     UnicodeString matchedWord;
520     const UnicodeString& pattern = msgPattern.getPatternString();
521     int matchedIndex = -1;
522     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
523     // until the end of the plural-only pattern.
524     while (partIndex < count) {
525         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
526         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
527             // Bad format
528             continue;
529         }
530 
531         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
532         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
533             // Bad format
534             continue;
535         }
536 
537         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
538         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
539             // Bad format
540             continue;
541         }
542 
543         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
544         if (rbnfLenientScanner != NULL) {
545             // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
546             int32_t length = -1;
547             currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
548         }
549         else {
550             currMatchIndex = source.indexOf(currArg, startingAt);
551         }
552         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
553             matchedIndex = currMatchIndex;
554             matchedWord = currArg;
555             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
556         }
557     }
558     if (matchedIndex >= 0) {
559         pos.setBeginIndex(matchedIndex);
560         pos.setEndIndex(matchedIndex + matchedWord.length());
561         result.setString(keyword);
562         return;
563     }
564 
565     // Not found!
566     pos.setBeginIndex(-1);
567     pos.setEndIndex(-1);
568 }
569 
~PluralSelector()570 PluralFormat::PluralSelector::~PluralSelector() {}
571 
~PluralSelectorAdapter()572 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
573     delete pluralRules;
574 }
575 
select(void * context,double number,UErrorCode &) const576 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
577                                                           UErrorCode& /*ec*/) const {
578     (void)number;  // unused except in the assertion
579     IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
580     return pluralRules->select(*dec);
581 }
582 
reset()583 void PluralFormat::PluralSelectorAdapter::reset() {
584     delete pluralRules;
585     pluralRules = NULL;
586 }
587 
588 
589 U_NAMESPACE_END
590 
591 
592 #endif /* #if !UCONFIG_NO_FORMATTING */
593 
594 //eof
595