1 /*
2 *******************************************************************************
3 * Copyright (C) 2009-2012, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURFMT.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 *******************************************************************************
13 */
14
15 #include "unicode/messagepattern.h"
16 #include "unicode/plurfmt.h"
17 #include "unicode/plurrule.h"
18 #include "unicode/utypes.h"
19 #include "cmemory.h"
20 #include "messageimpl.h"
21 #include "plurrule_impl.h"
22 #include "uassert.h"
23 #include "uhash.h"
24
25 #if !UCONFIG_NO_FORMATTING
26
27 U_NAMESPACE_BEGIN
28
29 static const UChar OTHER_STRING[] = {
30 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
31 };
32
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)33 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralFormat)
34
35 PluralFormat::PluralFormat(UErrorCode& status)
36 : locale(Locale::getDefault()),
37 msgPattern(status),
38 numberFormat(NULL),
39 offset(0) {
40 init(NULL, UPLURAL_TYPE_CARDINAL, status);
41 }
42
PluralFormat(const Locale & loc,UErrorCode & status)43 PluralFormat::PluralFormat(const Locale& loc, UErrorCode& status)
44 : locale(loc),
45 msgPattern(status),
46 numberFormat(NULL),
47 offset(0) {
48 init(NULL, UPLURAL_TYPE_CARDINAL, status);
49 }
50
PluralFormat(const PluralRules & rules,UErrorCode & status)51 PluralFormat::PluralFormat(const PluralRules& rules, UErrorCode& status)
52 : locale(Locale::getDefault()),
53 msgPattern(status),
54 numberFormat(NULL),
55 offset(0) {
56 init(&rules, UPLURAL_TYPE_COUNT, status);
57 }
58
PluralFormat(const Locale & loc,const PluralRules & rules,UErrorCode & status)59 PluralFormat::PluralFormat(const Locale& loc,
60 const PluralRules& rules,
61 UErrorCode& status)
62 : locale(loc),
63 msgPattern(status),
64 numberFormat(NULL),
65 offset(0) {
66 init(&rules, UPLURAL_TYPE_COUNT, status);
67 }
68
PluralFormat(const Locale & loc,UPluralType type,UErrorCode & status)69 PluralFormat::PluralFormat(const Locale& loc,
70 UPluralType type,
71 UErrorCode& status)
72 : locale(loc),
73 msgPattern(status),
74 numberFormat(NULL),
75 offset(0) {
76 init(NULL, type, status);
77 }
78
PluralFormat(const UnicodeString & pat,UErrorCode & status)79 PluralFormat::PluralFormat(const UnicodeString& pat,
80 UErrorCode& status)
81 : locale(Locale::getDefault()),
82 msgPattern(status),
83 numberFormat(NULL),
84 offset(0) {
85 init(NULL, UPLURAL_TYPE_CARDINAL, status);
86 applyPattern(pat, status);
87 }
88
PluralFormat(const Locale & loc,const UnicodeString & pat,UErrorCode & status)89 PluralFormat::PluralFormat(const Locale& loc,
90 const UnicodeString& pat,
91 UErrorCode& status)
92 : locale(loc),
93 msgPattern(status),
94 numberFormat(NULL),
95 offset(0) {
96 init(NULL, UPLURAL_TYPE_CARDINAL, status);
97 applyPattern(pat, status);
98 }
99
PluralFormat(const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)100 PluralFormat::PluralFormat(const PluralRules& rules,
101 const UnicodeString& pat,
102 UErrorCode& status)
103 : locale(Locale::getDefault()),
104 msgPattern(status),
105 numberFormat(NULL),
106 offset(0) {
107 init(&rules, UPLURAL_TYPE_COUNT, status);
108 applyPattern(pat, status);
109 }
110
PluralFormat(const Locale & loc,const PluralRules & rules,const UnicodeString & pat,UErrorCode & status)111 PluralFormat::PluralFormat(const Locale& loc,
112 const PluralRules& rules,
113 const UnicodeString& pat,
114 UErrorCode& status)
115 : locale(loc),
116 msgPattern(status),
117 numberFormat(NULL),
118 offset(0) {
119 init(&rules, UPLURAL_TYPE_COUNT, status);
120 applyPattern(pat, status);
121 }
122
PluralFormat(const Locale & loc,UPluralType type,const UnicodeString & pat,UErrorCode & status)123 PluralFormat::PluralFormat(const Locale& loc,
124 UPluralType type,
125 const UnicodeString& pat,
126 UErrorCode& status)
127 : locale(loc),
128 msgPattern(status),
129 numberFormat(NULL),
130 offset(0) {
131 init(NULL, type, status);
132 applyPattern(pat, status);
133 }
134
PluralFormat(const PluralFormat & other)135 PluralFormat::PluralFormat(const PluralFormat& other)
136 : Format(other),
137 locale(other.locale),
138 msgPattern(other.msgPattern),
139 numberFormat(NULL),
140 offset(other.offset) {
141 copyObjects(other);
142 }
143
144 void
copyObjects(const PluralFormat & other)145 PluralFormat::copyObjects(const PluralFormat& other) {
146 UErrorCode status = U_ZERO_ERROR;
147 if (numberFormat != NULL) {
148 delete numberFormat;
149 }
150 if (pluralRulesWrapper.pluralRules != NULL) {
151 delete pluralRulesWrapper.pluralRules;
152 }
153
154 if (other.numberFormat == NULL) {
155 numberFormat = NumberFormat::createInstance(locale, status);
156 } else {
157 numberFormat = (NumberFormat*)other.numberFormat->clone();
158 }
159 if (other.pluralRulesWrapper.pluralRules == NULL) {
160 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, status);
161 } else {
162 pluralRulesWrapper.pluralRules = other.pluralRulesWrapper.pluralRules->clone();
163 }
164 }
165
166
~PluralFormat()167 PluralFormat::~PluralFormat() {
168 delete numberFormat;
169 }
170
171 void
init(const PluralRules * rules,UPluralType type,UErrorCode & status)172 PluralFormat::init(const PluralRules* rules, UPluralType type, UErrorCode& status) {
173 if (U_FAILURE(status)) {
174 return;
175 }
176
177 if (rules==NULL) {
178 pluralRulesWrapper.pluralRules = PluralRules::forLocale(locale, type, status);
179 } else {
180 pluralRulesWrapper.pluralRules = rules->clone();
181 if (pluralRulesWrapper.pluralRules == NULL) {
182 status = U_MEMORY_ALLOCATION_ERROR;
183 return;
184 }
185 }
186
187 numberFormat= NumberFormat::createInstance(locale, status);
188 }
189
190 void
applyPattern(const UnicodeString & newPattern,UErrorCode & status)191 PluralFormat::applyPattern(const UnicodeString& newPattern, UErrorCode& status) {
192 msgPattern.parsePluralStyle(newPattern, NULL, status);
193 if (U_FAILURE(status)) {
194 msgPattern.clear();
195 offset = 0;
196 return;
197 }
198 offset = msgPattern.getPluralOffset(0);
199 }
200
201 UnicodeString&
format(const Formattable & obj,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const202 PluralFormat::format(const Formattable& obj,
203 UnicodeString& appendTo,
204 FieldPosition& pos,
205 UErrorCode& status) const
206 {
207 if (U_FAILURE(status)) return appendTo;
208
209 if (obj.isNumeric()) {
210 return format(obj.getDouble(), appendTo, pos, status);
211 } else {
212 status = U_ILLEGAL_ARGUMENT_ERROR;
213 return appendTo;
214 }
215 }
216
217 UnicodeString
format(int32_t number,UErrorCode & status) const218 PluralFormat::format(int32_t number, UErrorCode& status) const {
219 FieldPosition fpos(0);
220 UnicodeString result;
221 return format(number, result, fpos, status);
222 }
223
224 UnicodeString
format(double number,UErrorCode & status) const225 PluralFormat::format(double number, UErrorCode& status) const {
226 FieldPosition fpos(0);
227 UnicodeString result;
228 return format(number, result, fpos, status);
229 }
230
231
232 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const233 PluralFormat::format(int32_t number,
234 UnicodeString& appendTo,
235 FieldPosition& pos,
236 UErrorCode& status) const {
237 return format((double)number, appendTo, pos, status);
238 }
239
240 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const241 PluralFormat::format(double number,
242 UnicodeString& appendTo,
243 FieldPosition& pos,
244 UErrorCode& status) const {
245 if (U_FAILURE(status)) {
246 return appendTo;
247 }
248 if (msgPattern.countParts() == 0) {
249 return numberFormat->format(number, appendTo, pos);
250 }
251 // Get the appropriate sub-message.
252 int32_t partIndex = findSubMessage(msgPattern, 0, pluralRulesWrapper, number, status);
253 // Replace syntactic # signs in the top level of this sub-message
254 // (not in nested arguments) with the formatted number-offset.
255 const UnicodeString& pattern = msgPattern.getPatternString();
256 number -= offset;
257 int32_t prevIndex = msgPattern.getPart(partIndex).getLimit();
258 for (;;) {
259 const MessagePattern::Part& part = msgPattern.getPart(++partIndex);
260 const UMessagePatternPartType type = part.getType();
261 int32_t index = part.getIndex();
262 if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
263 return appendTo.append(pattern, prevIndex, index - prevIndex);
264 } else if ((type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) ||
265 (type == UMSGPAT_PART_TYPE_SKIP_SYNTAX && MessageImpl::jdkAposMode(msgPattern))) {
266 appendTo.append(pattern, prevIndex, index - prevIndex);
267 if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
268 numberFormat->format(number, appendTo);
269 }
270 prevIndex = part.getLimit();
271 } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
272 appendTo.append(pattern, prevIndex, index - prevIndex);
273 prevIndex = index;
274 partIndex = msgPattern.getLimitPartIndex(partIndex);
275 index = msgPattern.getPart(partIndex).getLimit();
276 MessageImpl::appendReducedApostrophes(pattern, prevIndex, index, appendTo);
277 prevIndex = index;
278 }
279 }
280 }
281
282 UnicodeString&
toPattern(UnicodeString & appendTo)283 PluralFormat::toPattern(UnicodeString& appendTo) {
284 if (0 == msgPattern.countParts()) {
285 appendTo.setToBogus();
286 } else {
287 appendTo.append(msgPattern.getPatternString());
288 }
289 return appendTo;
290 }
291
292 void
setLocale(const Locale & loc,UErrorCode & status)293 PluralFormat::setLocale(const Locale& loc, UErrorCode& status) {
294 if (U_FAILURE(status)) {
295 return;
296 }
297 locale = loc;
298 msgPattern.clear();
299 delete numberFormat;
300 offset = 0;
301 numberFormat = NULL;
302 pluralRulesWrapper.reset();
303 init(NULL, UPLURAL_TYPE_CARDINAL, status);
304 }
305
306 void
setNumberFormat(const NumberFormat * format,UErrorCode & status)307 PluralFormat::setNumberFormat(const NumberFormat* format, UErrorCode& status) {
308 if (U_FAILURE(status)) {
309 return;
310 }
311 NumberFormat* nf = (NumberFormat*)format->clone();
312 if (nf != NULL) {
313 delete numberFormat;
314 numberFormat = nf;
315 } else {
316 status = U_MEMORY_ALLOCATION_ERROR;
317 }
318 }
319
320 Format*
clone() const321 PluralFormat::clone() const
322 {
323 return new PluralFormat(*this);
324 }
325
326
327 PluralFormat&
operator =(const PluralFormat & other)328 PluralFormat::operator=(const PluralFormat& other) {
329 if (this != &other) {
330 locale = other.locale;
331 msgPattern = other.msgPattern;
332 offset = other.offset;
333 copyObjects(other);
334 }
335
336 return *this;
337 }
338
339 UBool
operator ==(const Format & other) const340 PluralFormat::operator==(const Format& other) const {
341 if (this == &other) {
342 return TRUE;
343 }
344 if (!Format::operator==(other)) {
345 return FALSE;
346 }
347 const PluralFormat& o = (const PluralFormat&)other;
348 return
349 locale == o.locale &&
350 msgPattern == o.msgPattern && // implies same offset
351 (numberFormat == NULL) == (o.numberFormat == NULL) &&
352 (numberFormat == NULL || *numberFormat == *o.numberFormat) &&
353 (pluralRulesWrapper.pluralRules == NULL) == (o.pluralRulesWrapper.pluralRules == NULL) &&
354 (pluralRulesWrapper.pluralRules == NULL ||
355 *pluralRulesWrapper.pluralRules == *o.pluralRulesWrapper.pluralRules);
356 }
357
358 UBool
operator !=(const Format & other) const359 PluralFormat::operator!=(const Format& other) const {
360 return !operator==(other);
361 }
362
363 void
parseObject(const UnicodeString &,Formattable &,ParsePosition & pos) const364 PluralFormat::parseObject(const UnicodeString& /*source*/,
365 Formattable& /*result*/,
366 ParsePosition& pos) const
367 {
368 // Parsing not supported.
369 pos.setErrorIndex(pos.getIndex());
370 }
371
findSubMessage(const MessagePattern & pattern,int32_t partIndex,const PluralSelector & selector,double number,UErrorCode & ec)372 int32_t PluralFormat::findSubMessage(const MessagePattern& pattern, int32_t partIndex,
373 const PluralSelector& selector, double number, UErrorCode& ec) {
374 if (U_FAILURE(ec)) {
375 return 0;
376 }
377 int32_t count=pattern.countParts();
378 double offset;
379 const MessagePattern::Part* part=&pattern.getPart(partIndex);
380 if (MessagePattern::Part::hasNumericValue(part->getType())) {
381 offset=pattern.getNumericValue(*part);
382 ++partIndex;
383 } else {
384 offset=0;
385 }
386 // The keyword is empty until we need to match against non-explicit, not-"other" value.
387 // Then we get the keyword from the selector.
388 // (In other words, we never call the selector if we match against an explicit value,
389 // or if the only non-explicit keyword is "other".)
390 UnicodeString keyword;
391 UnicodeString other(FALSE, OTHER_STRING, 5);
392 // When we find a match, we set msgStart>0 and also set this boolean to true
393 // to avoid matching the keyword again (duplicates are allowed)
394 // while we continue to look for an explicit-value match.
395 UBool haveKeywordMatch=FALSE;
396 // msgStart is 0 until we find any appropriate sub-message.
397 // We remember the first "other" sub-message if we have not seen any
398 // appropriate sub-message before.
399 // We remember the first matching-keyword sub-message if we have not seen
400 // one of those before.
401 // (The parser allows [does not check for] duplicate keywords.
402 // We just have to make sure to take the first one.)
403 // We avoid matching the keyword twice by also setting haveKeywordMatch=true
404 // at the first keyword match.
405 // We keep going until we find an explicit-value match or reach the end of the plural style.
406 int32_t msgStart=0;
407 // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
408 // until ARG_LIMIT or end of plural-only pattern.
409 do {
410 part=&pattern.getPart(partIndex++);
411 const UMessagePatternPartType type = part->getType();
412 if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
413 break;
414 }
415 U_ASSERT (type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
416 // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
417 if(MessagePattern::Part::hasNumericValue(pattern.getPartType(partIndex))) {
418 // explicit value like "=2"
419 part=&pattern.getPart(partIndex++);
420 if(number==pattern.getNumericValue(*part)) {
421 // matches explicit value
422 return partIndex;
423 }
424 } else if(!haveKeywordMatch) {
425 // plural keyword like "few" or "other"
426 // Compare "other" first and call the selector if this is not "other".
427 if(pattern.partSubstringMatches(*part, other)) {
428 if(msgStart==0) {
429 msgStart=partIndex;
430 if(0 == keyword.compare(other)) {
431 // This is the first "other" sub-message,
432 // and the selected keyword is also "other".
433 // Do not match "other" again.
434 haveKeywordMatch=TRUE;
435 }
436 }
437 } else {
438 if(keyword.isEmpty()) {
439 keyword=selector.select(number-offset, ec);
440 if(msgStart!=0 && (0 == keyword.compare(other))) {
441 // We have already seen an "other" sub-message.
442 // Do not match "other" again.
443 haveKeywordMatch=TRUE;
444 // Skip keyword matching but do getLimitPartIndex().
445 }
446 }
447 if(!haveKeywordMatch && pattern.partSubstringMatches(*part, keyword)) {
448 // keyword matches
449 msgStart=partIndex;
450 // Do not match this keyword again.
451 haveKeywordMatch=TRUE;
452 }
453 }
454 }
455 partIndex=pattern.getLimitPartIndex(partIndex);
456 } while(++partIndex<count);
457 return msgStart;
458 }
459
~PluralSelector()460 PluralFormat::PluralSelector::~PluralSelector() {}
461
~PluralSelectorAdapter()462 PluralFormat::PluralSelectorAdapter::~PluralSelectorAdapter() {
463 delete pluralRules;
464 }
465
select(double number,UErrorCode &) const466 UnicodeString PluralFormat::PluralSelectorAdapter::select(double number,
467 UErrorCode& /*ec*/) const {
468 return pluralRules->select(number);
469 }
470
reset()471 void PluralFormat::PluralSelectorAdapter::reset() {
472 delete pluralRules;
473 pluralRules = NULL;
474 }
475
476
477 U_NAMESPACE_END
478
479
480 #endif /* #if !UCONFIG_NO_FORMATTING */
481
482 //eof
483