• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /*
2  *******************************************************************************
3  * Copyright (C) 2009-2015, International Business Machines Corporation and
4  * others. All Rights Reserved.
5  *******************************************************************************
6  *
7  * File PLURFMT.CPP
8  *******************************************************************************
9  */
10  
11  #include "unicode/decimfmt.h"
12  #include "unicode/messagepattern.h"
13  #include "unicode/plurfmt.h"
14  #include "unicode/plurrule.h"
15  #include "unicode/utypes.h"
16  #include "cmemory.h"
17  #include "messageimpl.h"
18  #include "nfrule.h"
19  #include "plurrule_impl.h"
20  #include "uassert.h"
21  #include "uhash.h"
22  #include "precision.h"
23  #include "visibledigits.h"
24  
25  #if !UCONFIG_NO_FORMATTING
26  
27  U_NAMESPACE_BEGIN
28  
29  static const UChar OTHER_STRING[] = {
30      0x6F, 0x74, 0x68, 0x65, 0x72, 0  // "other"
31  };
32  
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)33  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
34  
35  PluralFormat::PluralFormat(UErrorCode& status)
36          : locale(Locale::getDefault()),
37            msgPattern(status),
38            numberFormat(NULL),
39            offset(0) {
40      init(NULL, UPLURAL_TYPE_CARDINAL, status);
41  }
42  
PluralFormat(const Locale & loc,UErrorCode & status)43  PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
44          : locale(loc),
45            msgPattern(status),
46            numberFormat(NULL),
47            offset(0) {
48      init(NULL, UPLURAL_TYPE_CARDINAL, status);
49  }
50  
PluralFormat(const PluralRules & rules,UErrorCode & status)51  PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
52          : locale(Locale::getDefault()),
53            msgPattern(status),
54            numberFormat(NULL),
55            offset(0) {
56      init(&rules, UPLURAL_TYPE_COUNT, status);
57  }
58  
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)59  PluralFormat::PluralFormat(const Locale& loc,
60                             const PluralRules& rules,
61                             UErrorCode& status)
62          : locale(loc),
63            msgPattern(status),
64            numberFormat(NULL),
65            offset(0) {
66      init(&rules, UPLURAL_TYPE_COUNT, status);
67  }
68  
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)69  PluralFormat::PluralFormat(const Locale& loc,
70                             UPluralType type,
71                             UErrorCode& status)
72          : locale(loc),
73            msgPattern(status),
74            numberFormat(NULL),
75            offset(0) {
76      init(NULL, type, status);
77  }
78  
PluralFormat(const UnicodeString & pat,UErrorCode & status)79  PluralFormat::PluralFormat(const UnicodeString& pat,
80                             UErrorCode& status)
81          : locale(Locale::getDefault()),
82            msgPattern(status),
83            numberFormat(NULL),
84            offset(0) {
85      init(NULL, UPLURAL_TYPE_CARDINAL, status);
86      applyPattern(pat, status);
87  }
88  
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)89  PluralFormat::PluralFormat(const Locale& loc,
90                             const UnicodeString& pat,
91                             UErrorCode& status)
92          : locale(loc),
93            msgPattern(status),
94            numberFormat(NULL),
95            offset(0) {
96      init(NULL, UPLURAL_TYPE_CARDINAL, status);
97      applyPattern(pat, status);
98  }
99  
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)100  PluralFormat::PluralFormat(const PluralRules& rules,
101                             const UnicodeString& pat,
102                             UErrorCode& status)
103          : locale(Locale::getDefault()),
104            msgPattern(status),
105            numberFormat(NULL),
106            offset(0) {
107      init(&rules, UPLURAL_TYPE_COUNT, status);
108      applyPattern(pat, status);
109  }
110  
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)111  PluralFormat::PluralFormat(const Locale& loc,
112                             const PluralRules& rules,
113                             const UnicodeString& pat,
114                             UErrorCode& status)
115          : locale(loc),
116            msgPattern(status),
117            numberFormat(NULL),
118            offset(0) {
119      init(&rules, UPLURAL_TYPE_COUNT, status);
120      applyPattern(pat, status);
121  }
122  
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)123  PluralFormat::PluralFormat(const Locale& loc,
124                             UPluralType type,
125                             const UnicodeString& pat,
126                             UErrorCode& status)
127          : locale(loc),
128            msgPattern(status),
129            numberFormat(NULL),
130            offset(0) {
131      init(NULL, type, status);
132      applyPattern(pat, status);
133  }
134  
PluralFormat(const PluralFormat & other)135  PluralFormat::PluralFormat(const PluralFormat& other)
136          : Format(other),
137            locale(other.locale),
138            msgPattern(other.msgPattern),
139            numberFormat(NULL),
140            offset(other.offset) {
141      copyObjects(other);
142  }
143  
144  void
copyObjects(const PluralFormat & other)145  PluralFormat::copyObjects(const PluralFormat& other) {
146      UErrorCode status = U_ZERO_ERROR;
147      if (numberFormat != NULL) {
148          delete numberFormat;
149      }
150      if (pluralRulesWrapper.pluralRules != NULL) {
151          delete pluralRulesWrapper.pluralRules;
152      }
153  
154      if (other.numberFormat == NULL) {
155          numberFormat = NumberFormat::createInstance(locale, status);
156      } else {
157          numberFormat = (NumberFormat*)other.numberFormat->clone();
158      }
159      if (other.pluralRulesWrapper.pluralRules == NULL) {
160          pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
161      } else {
162          pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
163      }
164  }
165  
166  
~PluralFormat()167  PluralFormat::~PluralFormat() {
168      delete numberFormat;
169  }
170  
171  void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)172  PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173      if (U_FAILURE(status)) {
174          return;
175      }
176  
177      if (rules==NULL) {
178          pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
179      } else {
180          pluralRulesWrapper.pluralRules = rules->clone();
181          if (pluralRulesWrapper.pluralRules == NULL) {
182              status = U_MEMORY_ALLOCATION_ERROR;
183              return;
184          }
185      }
186  
187      numberFormat= NumberFormat::createInstance(locale, status);
188  }
189  
190  void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)191  PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192      msgPattern.parsePluralStyle(newPattern, NULL, status);
193      if (U_FAILURE(status)) {
194          msgPattern.clear();
195          offset = 0;
196          return;
197      }
198      offset = msgPattern.getPluralOffset(0);
199  }
200  
201  UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const202  PluralFormat::format(const Formattable& obj,
203                     UnicodeString& appendTo,
204                     FieldPosition& pos,
205                     UErrorCode& status) const
206  {
207      if (U_FAILURE(status)) return appendTo;
208  
209      if (obj.isNumeric()) {
210          return format(obj, obj.getDouble(), appendTo, pos, status);
211      } else {
212          status = U_ILLEGAL_ARGUMENT_ERROR;
213          return appendTo;
214      }
215  }
216  
217  UnicodeString
format(int32_t number,UErrorCode & status) const218  PluralFormat::format(int32_t number, UErrorCode& status) const {
219      FieldPosition fpos(0);
220      UnicodeString result;
221      return format(Formattable(number), number, result, fpos, status);
222  }
223  
224  UnicodeString
format(double number,UErrorCode & status) const225  PluralFormat::format(double number, UErrorCode& status) const {
226      FieldPosition fpos(0);
227      UnicodeString result;
228      return format(Formattable(number), number, result, fpos, status);
229  }
230  
231  
232  UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const233  PluralFormat::format(int32_t number,
234                       UnicodeString& appendTo,
235                       FieldPosition& pos,
236                       UErrorCode& status) const {
237      return format(Formattable(number), (double)number, appendTo, pos, status);
238  }
239  
240  UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const241  PluralFormat::format(double number,
242                       UnicodeString& appendTo,
243                       FieldPosition& pos,
244                       UErrorCode& status) const {
245      return format(Formattable(number), (double)number, appendTo, pos, status);
246  }
247  
248  UnicodeString&
format(const Formattable & numberObject,double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const249  PluralFormat::format(const Formattable& numberObject, double number,
250                       UnicodeString& appendTo,
251                       FieldPosition& pos,
252                       UErrorCode& status) const {
253      if (U_FAILURE(status)) {
254          return appendTo;
255      }
256      if (msgPattern.countParts() == 0) {
257          return numberFormat->format(numberObject, appendTo, pos, status);
258      }
259      // Get the appropriate sub-message.
260      // Select it based on the formatted number-offset.
261      double numberMinusOffset = number - offset;
262      UnicodeString numberString;
263      FieldPosition ignorePos;
264      FixedPrecision fp;
265      VisibleDigitsWithExponent dec;
266      fp.initVisibleDigitsWithExponent(numberMinusOffset, dec, status);
267      if (U_FAILURE(status)) {
268          return appendTo;
269      }
270      if (offset == 0) {
271          DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
272          if(decFmt != NULL) {
273              decFmt->initVisibleDigitsWithExponent(
274                      numberObject, dec, status);
275              if (U_FAILURE(status)) {
276                  return appendTo;
277              }
278              decFmt->format(dec, numberString, ignorePos, status);
279          } else {
280              numberFormat->format(
281                      numberObject, numberString, ignorePos, status);  // could be BigDecimal etc.
282          }
283      } else {
284          DecimalFormat *decFmt = dynamic_cast<DecimalFormat *>(numberFormat);
285          if(decFmt != NULL) {
286              decFmt->initVisibleDigitsWithExponent(
287                      numberMinusOffset, dec, status);
288              if (U_FAILURE(status)) {
289                  return appendTo;
290              }
291              decFmt->format(dec, numberString, ignorePos, status);
292          } else {
293              numberFormat->format(
294                      numberMinusOffset, numberString, ignorePos, status);
295          }
296      }
297      int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, &dec, number, status);
298      if (U_FAILURE(status)) { return appendTo; }
299      // Replace syntactic # signs in the top level of this sub-message
300      // (not in nested arguments) with the formatted number-offset.
301      const UnicodeString& pattern = msgPattern.getPatternString();
302      int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
303      for (;;) {
304          const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
305          const UMessagePatternPartType type = part.getType();
306          int32_t index = part.getIndex();
307          if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
308              return appendTo.append(pattern, prevIndex, index - prevIndex);
309          } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
310              (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
311              appendTo.append(pattern, prevIndex, index - prevIndex);
312              if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
313                  appendTo.append(numberString);
314              }
315              prevIndex = part.getLimit();
316          } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
317              appendTo.append(pattern, prevIndex, index - prevIndex);
318              prevIndex = index;
319              partIndex = msgPattern.getLimitPartIndex(partIndex);
320              index = msgPattern.getPart(partIndex).getLimit();
321              MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
322              prevIndex = index;
323          }
324      }
325  }
326  
327  UnicodeString&
toPattern(UnicodeString & appendTo)328  PluralFormat::toPattern(UnicodeString& appendTo) {
329      if (0 == msgPattern.countParts()) {
330          appendTo.setToBogus();
331      } else {
332          appendTo.append(msgPattern.getPatternString());
333      }
334      return appendTo;
335  }
336  
337  void
setLocale(const Locale & loc,UErrorCode & status)338  PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
339      if (U_FAILURE(status)) {
340          return;
341      }
342      locale = loc;
343      msgPattern.clear();
344      delete numberFormat;
345      offset = 0;
346      numberFormat = NULL;
347      pluralRulesWrapper.reset();
348      init(NULL, UPLURAL_TYPE_CARDINAL, status);
349  }
350  
351  void
setNumberFormat(const NumberFormat * format,UErrorCode & status)352  PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
353      if (U_FAILURE(status)) {
354          return;
355      }
356      NumberFormat* nf = (NumberFormat*)format->clone();
357      if (nf != NULL) {
358          delete numberFormat;
359          numberFormat = nf;
360      } else {
361          status = U_MEMORY_ALLOCATION_ERROR;
362      }
363  }
364  
365  Format*
clone() const366  PluralFormat::clone() const
367  {
368      return new PluralFormat(*this);
369  }
370  
371  
372  PluralFormat&
operator =(const PluralFormat & other)373  PluralFormat::operator=(const PluralFormat& other) {
374      if (this != &other) {
375          locale = other.locale;
376          msgPattern = other.msgPattern;
377          offset = other.offset;
378          copyObjects(other);
379      }
380  
381      return *this;
382  }
383  
384  UBool
operator ==(const Format & other) const385  PluralFormat::operator==(const Format& other) const {
386      if (this == &other) {
387          return TRUE;
388      }
389      if (!Format::operator==(other)) {
390          return FALSE;
391      }
392      const PluralFormat& o = (const PluralFormat&)other;
393      return
394          locale == o.locale &&
395          msgPattern == o.msgPattern &&  // implies same offset
396          (numberFormat == NULL) == (o.numberFormat == NULL) &&
397          (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
398          (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
399          (pluralRulesWrapper.pluralRules == NULL ||
400              *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
401  }
402  
403  UBool
operator !=(const Format & other) const404  PluralFormat::operator!=(const Format& other) const {
405      return  !operator==(other);
406  }
407  
408  void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const409  PluralFormat::parseObject(const UnicodeString& /*source*/,
410                          Formattable& /*result*/,
411                          ParsePosition& pos) const
412  {
413      // Parsing not supported.
414      pos.setErrorIndex(pos.getIndex());
415  }
416  
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,void * context,double number,UErrorCode & ec)417  int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
418                                       const PluralSelector& selector, void *context,
419                                       double number, UErrorCode& ec) {
420      if (U_FAILURE(ec)) {
421          return 0;
422      }
423      int32_t count=pattern.countParts();
424      double offset;
425      const MessagePattern::Part* part=&pattern.getPart(partIndex);
426      if (MessagePattern::Part::hasNumericValue(part->getType())) {
427          offset=pattern.getNumericValue(*part);
428          ++partIndex;
429      } else {
430          offset=0;
431      }
432      // The keyword is empty until we need to match against a non-explicit, not-"other" value.
433      // Then we get the keyword from the selector.
434      // (In other words, we never call the selector if we match against an explicit value,
435      // or if the only non-explicit keyword is "other".)
436      UnicodeString keyword;
437      UnicodeString other(FALSE, OTHER_STRING, 5);
438      // When we find a match, we set msgStart>0 and also set this boolean to true
439      // to avoid matching the keyword again (duplicates are allowed)
440      // while we continue to look for an explicit-value match.
441      UBool haveKeywordMatch=FALSE;
442      // msgStart is 0 until we find any appropriate sub-message.
443      // We remember the first "other" sub-message if we have not seen any
444      // appropriate sub-message before.
445      // We remember the first matching-keyword sub-message if we have not seen
446      // one of those before.
447      // (The parser allows [does not check for] duplicate keywords.
448      // We just have to make sure to take the first one.)
449      // We avoid matching the keyword twice by also setting haveKeywordMatch=true
450      // at the first keyword match.
451      // We keep going until we find an explicit-value match or reach the end of the plural style.
452      int32_t msgStart=0;
453      // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
454      // until ARG_LIMIT or end of plural-only pattern.
455      do {
456          part=&pattern.getPart(partIndex++);
457          const UMessagePatternPartType type = part->getType();
458          if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
459              break;
460          }
461          U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
462          // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
463          if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
464              // explicit value like "=2"
465              part=&pattern.getPart(partIndex++);
466              if(number==pattern.getNumericValue(*part)) {
467                  // matches explicit value
468                  return partIndex;
469              }
470          } else if(!haveKeywordMatch) {
471              // plural keyword like "few" or "other"
472              // Compare "other" first and call the selector if this is not "other".
473              if(pattern.partSubstringMatches(*part, other)) {
474                  if(msgStart==0) {
475                      msgStart=partIndex;
476                      if(0 == keyword.compare(other)) {
477                          // This is the first "other" sub-message,
478                          // and the selected keyword is also "other".
479                          // Do not match "other" again.
480                          haveKeywordMatch=TRUE;
481                      }
482                  }
483              } else {
484                  if(keyword.isEmpty()) {
485                      keyword=selector.select(context, number-offset, ec);
486                      if(msgStart!=0 && (0 == keyword.compare(other))) {
487                          // We have already seen an "other" sub-message.
488                          // Do not match "other" again.
489                          haveKeywordMatch=TRUE;
490                          // Skip keyword matching but do getLimitPartIndex().
491                      }
492                  }
493                  if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
494                      // keyword matches
495                      msgStart=partIndex;
496                      // Do not match this keyword again.
497                      haveKeywordMatch=TRUE;
498                  }
499              }
500          }
501          partIndex=pattern.getLimitPartIndex(partIndex);
502      } while(++partIndex<count);
503      return msgStart;
504  }
505  
parseType(const UnicodeString & source,const NFRule * rbnfLenientScanner,Formattable & result,FieldPosition & pos) const506  void PluralFormat::parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner, Formattable& result, FieldPosition& pos) const {
507      // If no pattern was applied, return null.
508      if (msgPattern.countParts() == 0) {
509          pos.setBeginIndex(-1);
510          pos.setEndIndex(-1);
511          return;
512      }
513      int partIndex = 0;
514      int currMatchIndex;
515      int count=msgPattern.countParts();
516      int startingAt = pos.getBeginIndex();
517      if (startingAt < 0) {
518          startingAt = 0;
519      }
520  
521      // The keyword is null until we need to match against a non-explicit, not-"other" value.
522      // Then we get the keyword from the selector.
523      // (In other words, we never call the selector if we match against an explicit value,
524      // or if the only non-explicit keyword is "other".)
525      UnicodeString keyword;
526      UnicodeString matchedWord;
527      const UnicodeString& pattern = msgPattern.getPatternString();
528      int matchedIndex = -1;
529      // Iterate over (ARG_SELECTOR ARG_START message ARG_LIMIT) tuples
530      // until the end of the plural-only pattern.
531      while (partIndex < count) {
532          const MessagePattern::Part* partSelector = &msgPattern.getPart(partIndex++);
533          if (partSelector->getType() != UMSGPAT_PART_TYPE_ARG_SELECTOR) {
534              // Bad format
535              continue;
536          }
537  
538          const MessagePattern::Part* partStart = &msgPattern.getPart(partIndex++);
539          if (partStart->getType() != UMSGPAT_PART_TYPE_MSG_START) {
540              // Bad format
541              continue;
542          }
543  
544          const MessagePattern::Part* partLimit = &msgPattern.getPart(partIndex++);
545          if (partLimit->getType() != UMSGPAT_PART_TYPE_MSG_LIMIT) {
546              // Bad format
547              continue;
548          }
549  
550          UnicodeString currArg = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
551          if (rbnfLenientScanner != NULL) {
552              // If lenient parsing is turned ON, we've got some time consuming parsing ahead of us.
553              int32_t length = -1;
554              currMatchIndex = rbnfLenientScanner->findTextLenient(source, currArg, startingAt, &length);
555          }
556          else {
557              currMatchIndex = source.indexOf(currArg, startingAt);
558          }
559          if (currMatchIndex >= 0 && currMatchIndex >= matchedIndex && currArg.length() > matchedWord.length()) {
560              matchedIndex = currMatchIndex;
561              matchedWord = currArg;
562              keyword = pattern.tempSubString(partStart->getLimit(), partLimit->getIndex() - partStart->getLimit());
563          }
564      }
565      if (matchedIndex >= 0) {
566          pos.setBeginIndex(matchedIndex);
567          pos.setEndIndex(matchedIndex + matchedWord.length());
568          result.setString(keyword);
569          return;
570      }
571  
572      // Not found!
573      pos.setBeginIndex(-1);
574      pos.setEndIndex(-1);
575  }
576  
~PluralSelector()577  PluralFormat::PluralSelector::~PluralSelector() {}
578  
~PluralSelectorAdapter()579  PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
580      delete pluralRules;
581  }
582  
select(void * context,double number,UErrorCode &) const583  UnicodeString PluralFormat::PluralSelectorAdapter::select(void *context, double number,
584                                                            UErrorCode& /*ec*/) const {
585      (void)number;  // unused except in the assertion
586      VisibleDigitsWithExponent *dec=static_cast<VisibleDigitsWithExponent *>(context);
587      return pluralRules->select(*dec);
588  }
589  
reset()590  void PluralFormat::PluralSelectorAdapter::reset() {
591      delete pluralRules;
592      pluralRules = NULL;
593  }
594  
595  
596  U_NAMESPACE_END
597  
598  
599  #endif /* #if !UCONFIG_NO_FORMATTING */
600  
601  //eof
602