• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2009-2015, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File PLURFMT.CPP
10 *******************************************************************************
11 */
12 
13 #include "unicode/decimfmt.h"
14 #include "unicode/messagepattern.h"
15 #include "unicode/plurfmt.h"
16 #include "unicode/plurrule.h"
17 #include "unicode/utypes.h"
18 #include "cmemory.h"
19 #include "messageimpl.h"
20 #include "nfrule.h"
21 #include "plurrule_impl.h"
22 #include "uassert.h"
23 #include "uhash.h"
24 #include "number_decimalquantity.h"
25 #include "number_utils.h"
26 #include "number_utypes.h"
27 
28 #if !UCONFIG_NO_FORMATTING
29 
30 U_NAMESPACE_BEGIN
31 
32 using number::impl::DecimalQuantity;
33 
34 static const char16_t OTHER_STRING[] = {
35     0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
36 };
37 
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)38 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
39 
40 PluralFormat::PluralFormat(UErrorCode& status)
41         : locale(Locale::getDefault()),
42           msgPattern(status),
43           numberFormat(nullptr),
44           offset(0) {
45     init(nullptr, UPLURAL_TYPE_CARDINAL, status);
46 }
47 
PluralFormat(const Locale & loc,UErrorCode & status)48 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
49         : locale(loc),
50           msgPattern(status),
51           numberFormat(nullptr),
52           offset(0) {
53     init(nullptr, UPLURAL_TYPE_CARDINAL, status);
54 }
55 
PluralFormat(const PluralRules & rules,UErrorCode & status)56 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
57         : locale(Locale::getDefault()),
58           msgPattern(status),
59           numberFormat(nullptr),
60           offset(0) {
61     init(&rules, UPLURAL_TYPE_COUNT, status);
62 }
63 
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)64 PluralFormat::PluralFormat(const Locale& loc,
65                            const PluralRules& rules,
66                            UErrorCode& status)
67         : locale(loc),
68           msgPattern(status),
69           numberFormat(nullptr),
70           offset(0) {
71     init(&rules, UPLURAL_TYPE_COUNT, status);
72 }
73 
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)74 PluralFormat::PluralFormat(const Locale& loc,
75                            UPluralType type,
76                            UErrorCode& status)
77         : locale(loc),
78           msgPattern(status),
79           numberFormat(nullptr),
80           offset(0) {
81     init(nullptr, type, status);
82 }
83 
PluralFormat(const UnicodeString & pat,UErrorCode & status)84 PluralFormat::PluralFormat(const UnicodeString& pat,
85                            UErrorCode& status)
86         : locale(Locale::getDefault()),
87           msgPattern(status),
88           numberFormat(nullptr),
89           offset(0) {
90     init(nullptr, UPLURAL_TYPE_CARDINAL, status);
91     applyPattern(pat, status);
92 }
93 
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)94 PluralFormat::PluralFormat(const Locale& loc,
95                            const UnicodeString& pat,
96                            UErrorCode& status)
97         : locale(loc),
98           msgPattern(status),
99           numberFormat(nullptr),
100           offset(0) {
101     init(nullptr, UPLURAL_TYPE_CARDINAL, status);
102     applyPattern(pat, status);
103 }
104 
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)105 PluralFormat::PluralFormat(const PluralRules& rules,
106                            const UnicodeString& pat,
107                            UErrorCode& status)
108         : locale(Locale::getDefault()),
109           msgPattern(status),
110           numberFormat(nullptr),
111           offset(0) {
112     init(&rules, UPLURAL_TYPE_COUNT, status);
113     applyPattern(pat, status);
114 }
115 
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)116 PluralFormat::PluralFormat(const Locale& loc,
117                            const PluralRules& rules,
118                            const UnicodeString& pat,
119                            UErrorCode& status)
120         : locale(loc),
121           msgPattern(status),
122           numberFormat(nullptr),
123           offset(0) {
124     init(&rules, UPLURAL_TYPE_COUNT, status);
125     applyPattern(pat, status);
126 }
127 
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)128 PluralFormat::PluralFormat(const Locale& loc,
129                            UPluralType type,
130                            const UnicodeString& pat,
131                            UErrorCode& status)
132         : locale(loc),
133           msgPattern(status),
134           numberFormat(nullptr),
135           offset(0) {
136     init(nullptr, type, status);
137     applyPattern(pat, status);
138 }
139 
PluralFormat(const PluralFormat & other)140 PluralFormat::PluralFormat(const PluralFormat& other)
141         : Format(other),
142           locale(other.locale),
143           msgPattern(other.msgPattern),
144           numberFormat(nullptr),
145           offset(other.offset) {
146     copyObjects(other);
147 }
148 
149 void
copyObjects(const PluralFormat & other)150 PluralFormat::copyObjects(const PluralFormat& other) {
151     UErrorCode status = U_ZERO_ERROR;
152     delete numberFormat;
153     delete pluralRulesWrapper.pluralRules;
154     if (other.numberFormat == nullptr) {
155         numberFormat = NumberFormat::createInstance(locale, status);
156     } else {
157         numberFormat = other.numberFormat->clone();
158     }
159     if (other.pluralRulesWrapper.pluralRules == nullptr) {
160         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
161     } else {
162         pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
163     }
164 }
165 
166 
~PluralFormat()167 PluralFormat::~PluralFormat() {
168     delete numberFormat;
169 }
170 
171 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173     if (U_FAILURE(status)) {
174         return;
175     }
176 
177     if (rules==nullptr) {
178         pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
179     } else {
180         pluralRulesWrapper.pluralRules = rules->clone();
181         if (pluralRulesWrapper.pluralRules == nullptr) {
182             status = U_MEMORY_ALLOCATION_ERROR;
183             return;
184         }
185     }
186 
187     numberFormat= NumberFormat::createInstance(locale, status);
188 }
189 
190 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192     msgPattern.parsePluralStyle(newPattern, nullptr, status);
193     if (U_FAILURE(status)) {
194         msgPattern.clear();
195         offset = 0;
196         return;
197     }
198     offset = msgPattern.getPluralOffset(0);
199 }
200 
201 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const202 PluralFormat::format(const Formattable& obj,
203                    UnicodeString& appendTo,
204                    FieldPosition& pos,
205                    UErrorCode& status) const
206 {
207     if (U_FAILURE(status)) return appendTo;
208 
209     if (obj.isNumeric()) {
210         return format(obj, obj.getDouble(), appendTo, pos, status);
211     } else {
212         status = U_ILLEGAL_ARGUMENT_ERROR;
213         return appendTo;
214     }
215 }
216 
217 UnicodeString
format(int32_t number,UErrorCode & status) const218 PluralFormat::format(int32_t number, UErrorCode& status) const {
219     FieldPosition fpos(FieldPosition::DONT_CARE);
220     UnicodeString result;
221     return format(Formattable(number), number, result, fpos, status);
222 }
223 
224 UnicodeString
format(double number,UErrorCode & status) const225 PluralFormat::format(double number, UErrorCode& status) const {
226     FieldPosition fpos(FieldPosition::DONT_CARE);
227     UnicodeString result;
228     return format(Formattable(number), number, result, fpos, status);
229 }
230 
231 
232 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const233 PluralFormat::format(int32_t number,
234                      UnicodeString& appendTo,
235                      FieldPosition& pos,
236                      UErrorCode& status) const {
237     return format(Formattable(number), static_cast<double>(number), appendTo, pos, status);
238 }
239 
240 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const241 PluralFormat::format(double number,
242                      UnicodeString& appendTo,
243                      FieldPosition& pos,
244                      UErrorCode& status) const {
245     return format(Formattable(number), number, appendTo, pos, status);
246 }
247 
248 UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const249 PluralFormat::format(const Formattable& numberObject, double number,
250                      UnicodeString& appendTo,
251                      FieldPosition& pos,
252                      UErrorCode& status) const {
253     if (U_FAILURE(status)) {
254         return appendTo;
255     }
256     if (msgPattern.countParts() == 0) {
257         return numberFormat->format(numberObject, appendTo, pos, status);
258     }
259 
260     // Get the appropriate sub-message.
261     // Select it based on the formatted number-offset.
262     double numberMinusOffset = number - offset;
263     // Call NumberFormatter to get both the DecimalQuantity and the string.
264     // This call site needs to use more internal APIs than the Java equivalent.
265     number::impl::UFormattedNumberData data;
266     if (offset == 0) {
267         // could be BigDecimal etc.
268         numberObject.populateDecimalQuantity(data.quantity, status);
269     } else {
270         data.quantity.setToDouble(numberMinusOffset);
271     }
272     UnicodeString numberString;
273     auto *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
274     if(decFmt != nullptr) {
275         const number::LocalizedNumberFormatter* lnf = decFmt->toNumberFormatter(status);
276         if (U_FAILURE(status)) {
277             return appendTo;
278         }
279         lnf->formatImpl(&data, status); // mutates &data
280         if (U_FAILURE(status)) {
281             return appendTo;
282         }
283         numberString = data.getStringRef().toUnicodeString();
284     } else {
285         if (offset == 0) {
286             numberFormat->format(numberObject, numberString, status);
287         } else {
288             numberFormat->format(numberMinusOffset, numberString, status);
289         }
290     }
291 
292     int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &data.quantity, number, status);
293     if (U_FAILURE(status)) { return appendTo; }
294     // Replace syntactic # signs in the top level of this sub-message
295     // (not in nested arguments) with the formatted number-offset.
296     const UnicodeString& pattern = msgPattern.getPatternString();
297     int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
298     for (;;) {
299         const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
300         const UMessagePatternPartType type = part.getType();
301         int32_t index = part.getIndex();
302         if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
303             return appendTo.append(pattern, prevIndex, index - prevIndex);
304         } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
305             (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
306             appendTo.append(pattern, prevIndex, index - prevIndex);
307             if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
308                 appendTo.append(numberString);
309             }
310             prevIndex = part.getLimit();
311         } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
312             appendTo.append(pattern, prevIndex, index - prevIndex);
313             prevIndex = index;
314             partIndex = msgPattern.getLimitPartIndex(partIndex);
315             index = msgPattern.getPart(partIndex).getLimit();
316             MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
317             prevIndex = index;
318         }
319     }
320 }
321 
322 UnicodeString&
toPattern(UnicodeString & appendTo)323 PluralFormat::toPattern(UnicodeString& appendTo) {
324     if (0 == msgPattern.countParts()) {
325         appendTo.setToBogus();
326     } else {
327         appendTo.append(msgPattern.getPatternString());
328     }
329     return appendTo;
330 }
331 
332 void
setLocale(const Locale & loc,UErrorCode & status)333 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
334     if (U_FAILURE(status)) {
335         return;
336     }
337     locale = loc;
338     msgPattern.clear();
339     delete numberFormat;
340     offset = 0;
341     numberFormat = nullptr;
342     pluralRulesWrapper.reset();
343     init(nullptr, UPLURAL_TYPE_CARDINAL, status);
344 }
345 
346 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)347 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
348     if (U_FAILURE(status)) {
349         return;
350     }
351     NumberFormat* nf = format->clone();
352     if (nf != nullptr) {
353         delete numberFormat;
354         numberFormat = nf;
355     } else {
356         status = U_MEMORY_ALLOCATION_ERROR;
357     }
358 }
359 
360 PluralFormat*
clone() const361 PluralFormat::clone() const
362 {
363     return new PluralFormat(*this);
364 }
365 
366 
367 PluralFormat&
operator =(const PluralFormat & other)368 PluralFormat::operator=(const PluralFormat& other) {
369     if (this != &other) {
370         locale = other.locale;
371         msgPattern = other.msgPattern;
372         offset = other.offset;
373         copyObjects(other);
374     }
375 
376     return *this;
377 }
378 
379 bool
operator ==(const Format & other) const380 PluralFormat::operator==(const Format& other) const {
381     if (this == &other) {
382         return true;
383     }
384     if (!Format::operator==(other)) {
385         return false;
386     }
387     const PluralFormat& o = (const PluralFormat&)other;
388     return
389         locale == o.locale &&
390         msgPattern == o.msgPattern &&  // implies same offset
391         (numberFormat == nullptr) == (o.numberFormat == nullptr) &&
392         (numberFormat == nullptr || *numberFormat == *o.numberFormat) &&
393         (pluralRulesWrapper.pluralRules == nullptr) == (o.pluralRulesWrapper.pluralRules == nullptr) &&
394         (pluralRulesWrapper.pluralRules == nullptr ||
395             *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
396 }
397 
398 bool
operator !=(const Format & other) const399 PluralFormat::operator!=(const Format& other) const {
400     return  !operator==(other);
401 }
402 
403 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const404 PluralFormat::parseObject(const UnicodeString& /*source*/,
405                         Formattable& /*result*/,
406                         ParsePosition& pos) const
407 {
408     // Parsing not supported.
409     pos.setErrorIndex(pos.getIndex());
410 }
411 
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)412 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
413                                      const PluralSelector& selector, void *context,
414                                      double number, UErrorCode& ec) {
415     if (U_FAILURE(ec)) {
416         return 0;
417     }
418     int32_t count=pattern.countParts();
419     double offset;
420     const MessagePattern::Part* part=&pattern.getPart(partIndex);
421     if (MessagePattern::Part::hasNumericValue(part->getType())) {
422         offset=pattern.getNumericValue(*part);
423         ++partIndex;
424     } else {
425         offset=0;
426     }
427     // The keyword is empty until we need to match against a non-explicit, not-"other" value.
428     // Then we get the keyword from the selector.
429     // (In other words, we never call the selector if we match against an explicit value,
430     // or if the only non-explicit keyword is "other".)
431     UnicodeString keyword;
432     UnicodeString other(false, OTHER_STRING, 5);
433     // When we find a match, we set msgStart>0 and also set this boolean to true
434     // to avoid matching the keyword again (duplicates are allowed)
435     // while we continue to look for an explicit-value match.
436     UBool haveKeywordMatch=false;
437     // msgStart is 0 until we find any appropriate sub-message.
438     // We remember the first "other" sub-message if we have not seen any
439     // appropriate sub-message before.
440     // We remember the first matching-keyword sub-message if we have not seen
441     // one of those before.
442     // (The parser allows [does not check for] duplicate keywords.
443     // We just have to make sure to take the first one.)
444     // We avoid matching the keyword twice by also setting haveKeywordMatch=true
445     // at the first keyword match.
446     // We keep going until we find an explicit-value match or reach the end of the plural style.
447     int32_t msgStart=0;
448     // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
449     // until ARG_LIMIT or end of plural-only pattern.
450     do {
451         part=&pattern.getPart(partIndex++);
452         const UMessagePatternPartType type = part->getType();
453         if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
454             break;
455         }
456         U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
457         // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
458         if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
459             // explicit value like "=2"
460             part=&pattern.getPart(partIndex++);
461             if(number==pattern.getNumericValue(*part)) {
462                 // matches explicit value
463                 return partIndex;
464             }
465         } else if(!haveKeywordMatch) {
466             // plural keyword like "few" or "other"
467             // Compare "other" first and call the selector if this is not "other".
468             if(pattern.partSubstringMatches(*part, other)) {
469                 if(msgStart==0) {
470                     msgStart=partIndex;
471                     if(0 == keyword.compare(other)) {
472                         // This is the first "other" sub-message,
473                         // and the selected keyword is also "other".
474                         // Do not match "other" again.
475                         haveKeywordMatch=true;
476                     }
477                 }
478             } else {
479                 if(keyword.isEmpty()) {
480                     keyword=selector.select(context, number-offset, ec);
481                     if(msgStart!=0 && (0 == keyword.compare(other))) {
482                         // We have already seen an "other" sub-message.
483                         // Do not match "other" again.
484                         haveKeywordMatch=true;
485                         // Skip keyword matching but do getLimitPartIndex().
486                     }
487                 }
488                 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
489                     // keyword matches
490                     msgStart=partIndex;
491                     // Do not match this keyword again.
492                     haveKeywordMatch=true;
493                 }
494             }
495         }
496         partIndex=pattern.getLimitPartIndex(partIndex);
497     } while(++partIndex<count);
498     return msgStart;
499 }
500 
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const501 void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
502     // If no pattern was applied, return null.
503     if (msgPattern.countParts() == 0) {
504         pos.setBeginIndex(-1);
505         pos.setEndIndex(-1);
506         return;
507     }
508     int partIndex = 0;
509     int currMatchIndex;
510     int count=msgPattern.countParts();
511     int startingAt = pos.getBeginIndex();
512     if (startingAt < 0) {
513         startingAt = 0;
514     }
515 
516     // The keyword is null until we need to match against a non-explicit, not-"other" value.
517     // Then we get the keyword from the selector.
518     // (In other words, we never call the selector if we match against an explicit value,
519     // or if the only non-explicit keyword is "other".)
520     UnicodeString keyword;
521     UnicodeString matchedWord;
522     const UnicodeString& pattern = msgPattern.getPatternString();
523     int matchedIndex = -1;
524     // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
525     // until the end of the plural-only pattern.
526     while (partIndex < count) {
527         const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
528         if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
529             // Bad format
530             continue;
531         }
532 
533         const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
534         if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
535             // Bad format
536             continue;
537         }
538 
539         const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
540         if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
541             // Bad format
542             continue;
543         }
544 
545         UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
546         if (rbnfLenientScanner != nullptr) {
547             // Check if non-lenient rule finds the text before call lenient parsing
548             int32_t tempIndex = source.indexOf(currArg, startingAt);
549             if (tempIndex >= 0) {
550                 currMatchIndex = tempIndex;
551             } else {
552                 // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
553                 int32_t length = -1;
554                 currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
555             }
556         }
557         else {
558             currMatchIndex = source.indexOf(currArg, startingAt);
559         }
560         if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
561             matchedIndex = currMatchIndex;
562             matchedWord = currArg;
563             keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
564         }
565     }
566     if (matchedIndex >= 0) {
567         pos.setBeginIndex(matchedIndex);
568         pos.setEndIndex(matchedIndex + matchedWord.length());
569         result.setString(keyword);
570         return;
571     }
572 
573     // Not found!
574     pos.setBeginIndex(-1);
575     pos.setEndIndex(-1);
576 }
577 
~PluralSelector()578 PluralFormat::PluralSelector::~PluralSelector() {}
579 
~PluralSelectorAdapter()580 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
581     delete pluralRules;
582 }
583 
select(void * context,double number,UErrorCode &) const584 UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
585                                                           UErrorCode& /*ec*/) const {
586     (void)number;  // unused except in the assertion
587     IFixedDecimal *dec=static_cast<IFixedDecimal *>(context);
588     return pluralRules->select(*dec);
589 }
590 
reset()591 void PluralFormat::PluralSelectorAdapter::reset() {
592     delete pluralRules;
593     pluralRules = nullptr;
594 }
595 
596 
597 U_NAMESPACE_END
598 
599 
600 #endif /* #if !UCONFIG_NO_FORMATTING */
601 
602 //eof
603