1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2007-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 *
9 * File plurrule.cpp
10 */
11
12 #include <math.h>
13 #include <stdio.h>
14
15 #include "unicode/utypes.h"
16 #include "unicode/localpointer.h"
17 #include "unicode/plurrule.h"
18 #include "unicode/upluralrules.h"
19 #include "unicode/ures.h"
20 #include "unicode/numfmt.h"
21 #include "unicode/decimfmt.h"
22 #include "charstr.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "hash.h"
26 #include "locutil.h"
27 #include "mutex.h"
28 #include "patternprops.h"
29 #include "plurrule_impl.h"
30 #include "putilimp.h"
31 #include "ucln_in.h"
32 #include "ustrfmt.h"
33 #include "uassert.h"
34 #include "uvectr32.h"
35 #include "sharedpluralrules.h"
36 #include "unifiedcache.h"
37 #include "number_decimalquantity.h"
38
39 #if !UCONFIG_NO_FORMATTING
40
41 U_NAMESPACE_BEGIN
42
43 using namespace icu::pluralimpl;
44 using icu::number::impl::DecimalQuantity;
45
46 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
47 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
48 static const UChar PK_IN[]={LOW_I,LOW_N,0};
49 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
50 static const UChar PK_IS[]={LOW_I,LOW_S,0};
51 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
52 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
53 static const UChar PK_OR[]={LOW_O,LOW_R,0};
54 static const UChar PK_VAR_N[]={LOW_N,0};
55 static const UChar PK_VAR_I[]={LOW_I,0};
56 static const UChar PK_VAR_F[]={LOW_F,0};
57 static const UChar PK_VAR_T[]={LOW_T,0};
58 static const UChar PK_VAR_V[]={LOW_V,0};
59 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
60 static const UChar PK_DECIMAL[]={LOW_D,LOW_E,LOW_C,LOW_I,LOW_M,LOW_A,LOW_L,0};
61 static const UChar PK_INTEGER[]={LOW_I,LOW_N,LOW_T,LOW_E,LOW_G,LOW_E,LOW_R,0};
62
63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)64 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
65
66 PluralRules::PluralRules(UErrorCode& /*status*/)
67 : UObject(),
68 mRules(nullptr),
69 mInternalStatus(U_ZERO_ERROR)
70 {
71 }
72
PluralRules(const PluralRules & other)73 PluralRules::PluralRules(const PluralRules& other)
74 : UObject(other),
75 mRules(nullptr),
76 mInternalStatus(U_ZERO_ERROR)
77 {
78 *this=other;
79 }
80
~PluralRules()81 PluralRules::~PluralRules() {
82 delete mRules;
83 }
84
~SharedPluralRules()85 SharedPluralRules::~SharedPluralRules() {
86 delete ptr;
87 }
88
89 PluralRules*
clone() const90 PluralRules::clone() const {
91 PluralRules* newObj = new PluralRules(*this);
92 // Since clone doesn't have a 'status' parameter, the best we can do is return nullptr if
93 // the newly created object was not fully constructed properly (an error occurred).
94 if (newObj != nullptr && U_FAILURE(newObj->mInternalStatus)) {
95 delete newObj;
96 newObj = nullptr;
97 }
98 return newObj;
99 }
100
101 PluralRules&
operator =(const PluralRules & other)102 PluralRules::operator=(const PluralRules& other) {
103 if (this != &other) {
104 delete mRules;
105 mRules = nullptr;
106 mInternalStatus = other.mInternalStatus;
107 if (U_FAILURE(mInternalStatus)) {
108 // bail out early if the object we were copying from was already 'invalid'.
109 return *this;
110 }
111 if (other.mRules != nullptr) {
112 mRules = new RuleChain(*other.mRules);
113 if (mRules == nullptr) {
114 mInternalStatus = U_MEMORY_ALLOCATION_ERROR;
115 }
116 else if (U_FAILURE(mRules->fInternalStatus)) {
117 // If the RuleChain wasn't fully copied, then set our status to failure as well.
118 mInternalStatus = mRules->fInternalStatus;
119 }
120 }
121 }
122 return *this;
123 }
124
getAvailableLocales(UErrorCode & status)125 StringEnumeration* PluralRules::getAvailableLocales(UErrorCode &status) {
126 if (U_FAILURE(status)) {
127 return nullptr;
128 }
129 LocalPointer<StringEnumeration> result(new PluralAvailableLocalesEnumeration(status), status);
130 if (U_FAILURE(status)) {
131 return nullptr;
132 }
133 return result.orphan();
134 }
135
136
137 PluralRules* U_EXPORT2
createRules(const UnicodeString & description,UErrorCode & status)138 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
139 if (U_FAILURE(status)) {
140 return nullptr;
141 }
142 PluralRuleParser parser;
143 LocalPointer<PluralRules> newRules(new PluralRules(status), status);
144 if (U_FAILURE(status)) {
145 return nullptr;
146 }
147 parser.parse(description, newRules.getAlias(), status);
148 if (U_FAILURE(status)) {
149 newRules.adoptInstead(nullptr);
150 }
151 return newRules.orphan();
152 }
153
154
155 PluralRules* U_EXPORT2
createDefaultRules(UErrorCode & status)156 PluralRules::createDefaultRules(UErrorCode& status) {
157 return createRules(UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1), status);
158 }
159
160 /******************************************************************************/
161 /* Create PluralRules cache */
162
163 template<> U_I18N_API
createObject(const void *,UErrorCode & status) const164 const SharedPluralRules *LocaleCacheKey<SharedPluralRules>::createObject(
165 const void * /*unused*/, UErrorCode &status) const {
166 const char *localeId = fLoc.getName();
167 LocalPointer<PluralRules> pr(PluralRules::internalForLocale(localeId, UPLURAL_TYPE_CARDINAL, status), status);
168 if (U_FAILURE(status)) {
169 return nullptr;
170 }
171 LocalPointer<SharedPluralRules> result(new SharedPluralRules(pr.getAlias()), status);
172 if (U_FAILURE(status)) {
173 return nullptr;
174 }
175 pr.orphan(); // result was successfully created so it nows pr.
176 result->addRef();
177 return result.orphan();
178 }
179
180 /* end plural rules cache */
181 /******************************************************************************/
182
183 const SharedPluralRules* U_EXPORT2
createSharedInstance(const Locale & locale,UPluralType type,UErrorCode & status)184 PluralRules::createSharedInstance(
185 const Locale& locale, UPluralType type, UErrorCode& status) {
186 if (U_FAILURE(status)) {
187 return nullptr;
188 }
189 if (type != UPLURAL_TYPE_CARDINAL) {
190 status = U_UNSUPPORTED_ERROR;
191 return nullptr;
192 }
193 const SharedPluralRules *result = nullptr;
194 UnifiedCache::getByLocale(locale, result, status);
195 return result;
196 }
197
198 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UErrorCode & status)199 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
200 return forLocale(locale, UPLURAL_TYPE_CARDINAL, status);
201 }
202
203 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UPluralType type,UErrorCode & status)204 PluralRules::forLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
205 if (type != UPLURAL_TYPE_CARDINAL) {
206 return internalForLocale(locale, type, status);
207 }
208 const SharedPluralRules *shared = createSharedInstance(
209 locale, type, status);
210 if (U_FAILURE(status)) {
211 return nullptr;
212 }
213 PluralRules *result = (*shared)->clone();
214 shared->removeRef();
215 if (result == nullptr) {
216 status = U_MEMORY_ALLOCATION_ERROR;
217 }
218 return result;
219 }
220
221 PluralRules* U_EXPORT2
internalForLocale(const Locale & locale,UPluralType type,UErrorCode & status)222 PluralRules::internalForLocale(const Locale& locale, UPluralType type, UErrorCode& status) {
223 if (U_FAILURE(status)) {
224 return nullptr;
225 }
226 if (type >= UPLURAL_TYPE_COUNT) {
227 status = U_ILLEGAL_ARGUMENT_ERROR;
228 return nullptr;
229 }
230 LocalPointer<PluralRules> newObj(new PluralRules(status), status);
231 if (U_FAILURE(status)) {
232 return nullptr;
233 }
234 UnicodeString locRule = newObj->getRuleFromResource(locale, type, status);
235 // TODO: which other errors, if any, should be returned?
236 if (locRule.length() == 0) {
237 // If an out-of-memory error occurred, then stop and report the failure.
238 if (status == U_MEMORY_ALLOCATION_ERROR) {
239 return nullptr;
240 }
241 // Locales with no specific rules (all numbers have the "other" category
242 // will return a U_MISSING_RESOURCE_ERROR at this point. This is not
243 // an error.
244 locRule = UnicodeString(PLURAL_DEFAULT_RULE);
245 status = U_ZERO_ERROR;
246 }
247 PluralRuleParser parser;
248 parser.parse(locRule, newObj.getAlias(), status);
249 // TODO: should rule parse errors be returned, or
250 // should we silently use default rules?
251 // Original impl used default rules.
252 // Ask the question to ICU Core.
253
254 return newObj.orphan();
255 }
256
257 UnicodeString
select(int32_t number) const258 PluralRules::select(int32_t number) const {
259 return select(FixedDecimal(number));
260 }
261
262 UnicodeString
select(double number) const263 PluralRules::select(double number) const {
264 return select(FixedDecimal(number));
265 }
266
267 UnicodeString
select(const IFixedDecimal & number) const268 PluralRules::select(const IFixedDecimal &number) const {
269 if (mRules == nullptr) {
270 return UnicodeString(TRUE, PLURAL_DEFAULT_RULE, -1);
271 }
272 else {
273 return mRules->select(number);
274 }
275 }
276
277
278
279 StringEnumeration*
getKeywords(UErrorCode & status) const280 PluralRules::getKeywords(UErrorCode& status) const {
281 if (U_FAILURE(status)) {
282 return nullptr;
283 }
284 if (U_FAILURE(mInternalStatus)) {
285 status = mInternalStatus;
286 return nullptr;
287 }
288 LocalPointer<StringEnumeration> nameEnumerator(new PluralKeywordEnumeration(mRules, status), status);
289 if (U_FAILURE(status)) {
290 return nullptr;
291 }
292 return nameEnumerator.orphan();
293 }
294
295 double
getUniqueKeywordValue(const UnicodeString &)296 PluralRules::getUniqueKeywordValue(const UnicodeString& /* keyword */) {
297 // Not Implemented.
298 return UPLRULES_NO_UNIQUE_VALUE;
299 }
300
301 int32_t
getAllKeywordValues(const UnicodeString &,double *,int32_t,UErrorCode & error)302 PluralRules::getAllKeywordValues(const UnicodeString & /* keyword */, double * /* dest */,
303 int32_t /* destCapacity */, UErrorCode& error) {
304 error = U_UNSUPPORTED_ERROR;
305 return 0;
306 }
307
308
scaleForInt(double d)309 static double scaleForInt(double d) {
310 double scale = 1.0;
311 while (d != floor(d)) {
312 d = d * 10.0;
313 scale = scale * 10.0;
314 }
315 return scale;
316 }
317
318 static int32_t
getSamplesFromString(const UnicodeString & samples,double * dest,int32_t destCapacity,UErrorCode & status)319 getSamplesFromString(const UnicodeString &samples, double *dest,
320 int32_t destCapacity, UErrorCode& status) {
321 int32_t sampleCount = 0;
322 int32_t sampleStartIdx = 0;
323 int32_t sampleEndIdx = 0;
324
325 //std::string ss; // TODO: debugging.
326 // std::cout << "PluralRules::getSamples(), samples = \"" << samples.toUTF8String(ss) << "\"\n";
327 for (sampleCount = 0; sampleCount < destCapacity && sampleStartIdx < samples.length(); ) {
328 sampleEndIdx = samples.indexOf(COMMA, sampleStartIdx);
329 if (sampleEndIdx == -1) {
330 sampleEndIdx = samples.length();
331 }
332 const UnicodeString &sampleRange = samples.tempSubStringBetween(sampleStartIdx, sampleEndIdx);
333 // ss.erase();
334 // std::cout << "PluralRules::getSamples(), samplesRange = \"" << sampleRange.toUTF8String(ss) << "\"\n";
335 int32_t tildeIndex = sampleRange.indexOf(TILDE);
336 if (tildeIndex < 0) {
337 FixedDecimal fixed(sampleRange, status);
338 double sampleValue = fixed.source;
339 if (fixed.visibleDecimalDigitCount == 0 || sampleValue != floor(sampleValue)) {
340 dest[sampleCount++] = sampleValue;
341 }
342 } else {
343
344 FixedDecimal fixedLo(sampleRange.tempSubStringBetween(0, tildeIndex), status);
345 FixedDecimal fixedHi(sampleRange.tempSubStringBetween(tildeIndex+1), status);
346 double rangeLo = fixedLo.source;
347 double rangeHi = fixedHi.source;
348 if (U_FAILURE(status)) {
349 break;
350 }
351 if (rangeHi < rangeLo) {
352 status = U_INVALID_FORMAT_ERROR;
353 break;
354 }
355
356 // For ranges of samples with fraction decimal digits, scale the number up so that we
357 // are adding one in the units place. Avoids roundoffs from repetitive adds of tenths.
358
359 double scale = scaleForInt(rangeLo);
360 double t = scaleForInt(rangeHi);
361 if (t > scale) {
362 scale = t;
363 }
364 rangeLo *= scale;
365 rangeHi *= scale;
366 for (double n=rangeLo; n<=rangeHi; n+=1) {
367 // Hack Alert: don't return any decimal samples with integer values that
368 // originated from a format with trailing decimals.
369 // This API is returning doubles, which can't distinguish having displayed
370 // zeros to the right of the decimal.
371 // This results in test failures with values mapping back to a different keyword.
372 double sampleValue = n/scale;
373 if (!(sampleValue == floor(sampleValue) && fixedLo.visibleDecimalDigitCount > 0)) {
374 dest[sampleCount++] = sampleValue;
375 }
376 if (sampleCount >= destCapacity) {
377 break;
378 }
379 }
380 }
381 sampleStartIdx = sampleEndIdx + 1;
382 }
383 return sampleCount;
384 }
385
386
387 int32_t
getSamples(const UnicodeString & keyword,double * dest,int32_t destCapacity,UErrorCode & status)388 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
389 int32_t destCapacity, UErrorCode& status) {
390 if (destCapacity == 0 || U_FAILURE(status)) {
391 return 0;
392 }
393 if (U_FAILURE(mInternalStatus)) {
394 status = mInternalStatus;
395 return 0;
396 }
397 RuleChain *rc = rulesForKeyword(keyword);
398 if (rc == nullptr) {
399 return 0;
400 }
401 int32_t numSamples = getSamplesFromString(rc->fIntegerSamples, dest, destCapacity, status);
402 if (numSamples == 0) {
403 numSamples = getSamplesFromString(rc->fDecimalSamples, dest, destCapacity, status);
404 }
405 return numSamples;
406 }
407
408
rulesForKeyword(const UnicodeString & keyword) const409 RuleChain *PluralRules::rulesForKeyword(const UnicodeString &keyword) const {
410 RuleChain *rc;
411 for (rc = mRules; rc != nullptr; rc = rc->fNext) {
412 if (rc->fKeyword == keyword) {
413 break;
414 }
415 }
416 return rc;
417 }
418
419
420 UBool
isKeyword(const UnicodeString & keyword) const421 PluralRules::isKeyword(const UnicodeString& keyword) const {
422 if (0 == keyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
423 return true;
424 }
425 return rulesForKeyword(keyword) != nullptr;
426 }
427
428 UnicodeString
getKeywordOther() const429 PluralRules::getKeywordOther() const {
430 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
431 }
432
433 UBool
operator ==(const PluralRules & other) const434 PluralRules::operator==(const PluralRules& other) const {
435 const UnicodeString *ptrKeyword;
436 UErrorCode status= U_ZERO_ERROR;
437
438 if ( this == &other ) {
439 return TRUE;
440 }
441 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
442 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
443 if (U_FAILURE(status)) {
444 return FALSE;
445 }
446
447 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
448 return FALSE;
449 }
450 myKeywordList->reset(status);
451 while ((ptrKeyword=myKeywordList->snext(status))!=nullptr) {
452 if (!other.isKeyword(*ptrKeyword)) {
453 return FALSE;
454 }
455 }
456 otherKeywordList->reset(status);
457 while ((ptrKeyword=otherKeywordList->snext(status))!=nullptr) {
458 if (!this->isKeyword(*ptrKeyword)) {
459 return FALSE;
460 }
461 }
462 if (U_FAILURE(status)) {
463 return FALSE;
464 }
465
466 return TRUE;
467 }
468
469
470 void
parse(const UnicodeString & ruleData,PluralRules * prules,UErrorCode & status)471 PluralRuleParser::parse(const UnicodeString& ruleData, PluralRules *prules, UErrorCode &status)
472 {
473 if (U_FAILURE(status)) {
474 return;
475 }
476 U_ASSERT(ruleIndex == 0); // Parsers are good for a single use only!
477 ruleSrc = &ruleData;
478
479 while (ruleIndex< ruleSrc->length()) {
480 getNextToken(status);
481 if (U_FAILURE(status)) {
482 return;
483 }
484 checkSyntax(status);
485 if (U_FAILURE(status)) {
486 return;
487 }
488 switch (type) {
489 case tAnd:
490 U_ASSERT(curAndConstraint != nullptr);
491 curAndConstraint = curAndConstraint->add(status);
492 break;
493 case tOr:
494 {
495 U_ASSERT(currentChain != nullptr);
496 OrConstraint *orNode=currentChain->ruleHeader;
497 while (orNode->next != nullptr) {
498 orNode = orNode->next;
499 }
500 orNode->next= new OrConstraint();
501 if (orNode->next == nullptr) {
502 status = U_MEMORY_ALLOCATION_ERROR;
503 break;
504 }
505 orNode=orNode->next;
506 orNode->next=nullptr;
507 curAndConstraint = orNode->add(status);
508 }
509 break;
510 case tIs:
511 U_ASSERT(curAndConstraint != nullptr);
512 U_ASSERT(curAndConstraint->value == -1);
513 U_ASSERT(curAndConstraint->rangeList == nullptr);
514 break;
515 case tNot:
516 U_ASSERT(curAndConstraint != nullptr);
517 curAndConstraint->negated=TRUE;
518 break;
519
520 case tNotEqual:
521 curAndConstraint->negated=TRUE;
522 U_FALLTHROUGH;
523 case tIn:
524 case tWithin:
525 case tEqual:
526 {
527 U_ASSERT(curAndConstraint != nullptr);
528 LocalPointer<UVector32> newRangeList(new UVector32(status), status);
529 if (U_FAILURE(status)) {
530 break;
531 }
532 curAndConstraint->rangeList = newRangeList.orphan();
533 curAndConstraint->rangeList->addElement(-1, status); // range Low
534 curAndConstraint->rangeList->addElement(-1, status); // range Hi
535 rangeLowIdx = 0;
536 rangeHiIdx = 1;
537 curAndConstraint->value=PLURAL_RANGE_HIGH;
538 curAndConstraint->integerOnly = (type != tWithin);
539 }
540 break;
541 case tNumber:
542 U_ASSERT(curAndConstraint != nullptr);
543 if ( (curAndConstraint->op==AndConstraint::MOD)&&
544 (curAndConstraint->opNum == -1 ) ) {
545 curAndConstraint->opNum=getNumberValue(token);
546 }
547 else {
548 if (curAndConstraint->rangeList == nullptr) {
549 // this is for an 'is' rule
550 curAndConstraint->value = getNumberValue(token);
551 } else {
552 // this is for an 'in' or 'within' rule
553 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) == -1) {
554 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeLowIdx);
555 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
556 }
557 else {
558 curAndConstraint->rangeList->setElementAt(getNumberValue(token), rangeHiIdx);
559 if (curAndConstraint->rangeList->elementAti(rangeLowIdx) >
560 curAndConstraint->rangeList->elementAti(rangeHiIdx)) {
561 // Range Lower bound > Range Upper bound.
562 // U_UNEXPECTED_TOKEN seems a little funny, but it is consistently
563 // used for all plural rule parse errors.
564 status = U_UNEXPECTED_TOKEN;
565 break;
566 }
567 }
568 }
569 }
570 break;
571 case tComma:
572 // TODO: rule syntax checking is inadequate, can happen with badly formed rules.
573 // Catch cases like "n mod 10, is 1" here instead.
574 if (curAndConstraint == nullptr || curAndConstraint->rangeList == nullptr) {
575 status = U_UNEXPECTED_TOKEN;
576 break;
577 }
578 U_ASSERT(curAndConstraint->rangeList->size() >= 2);
579 rangeLowIdx = curAndConstraint->rangeList->size();
580 curAndConstraint->rangeList->addElement(-1, status); // range Low
581 rangeHiIdx = curAndConstraint->rangeList->size();
582 curAndConstraint->rangeList->addElement(-1, status); // range Hi
583 break;
584 case tMod:
585 U_ASSERT(curAndConstraint != nullptr);
586 curAndConstraint->op=AndConstraint::MOD;
587 break;
588 case tVariableN:
589 case tVariableI:
590 case tVariableF:
591 case tVariableT:
592 case tVariableV:
593 U_ASSERT(curAndConstraint != nullptr);
594 curAndConstraint->digitsType = type;
595 break;
596 case tKeyword:
597 {
598 RuleChain *newChain = new RuleChain;
599 if (newChain == nullptr) {
600 status = U_MEMORY_ALLOCATION_ERROR;
601 break;
602 }
603 newChain->fKeyword = token;
604 if (prules->mRules == nullptr) {
605 prules->mRules = newChain;
606 } else {
607 // The new rule chain goes at the end of the linked list of rule chains,
608 // unless there is an "other" keyword & chain. "other" must remain last.
609 RuleChain *insertAfter = prules->mRules;
610 while (insertAfter->fNext!=nullptr &&
611 insertAfter->fNext->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5) != 0 ){
612 insertAfter=insertAfter->fNext;
613 }
614 newChain->fNext = insertAfter->fNext;
615 insertAfter->fNext = newChain;
616 }
617 OrConstraint *orNode = new OrConstraint();
618 if (orNode == nullptr) {
619 status = U_MEMORY_ALLOCATION_ERROR;
620 break;
621 }
622 newChain->ruleHeader = orNode;
623 curAndConstraint = orNode->add(status);
624 currentChain = newChain;
625 }
626 break;
627
628 case tInteger:
629 for (;;) {
630 getNextToken(status);
631 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
632 break;
633 }
634 if (type == tEllipsis) {
635 currentChain->fIntegerSamplesUnbounded = TRUE;
636 continue;
637 }
638 currentChain->fIntegerSamples.append(token);
639 }
640 break;
641
642 case tDecimal:
643 for (;;) {
644 getNextToken(status);
645 if (U_FAILURE(status) || type == tSemiColon || type == tEOF || type == tAt) {
646 break;
647 }
648 if (type == tEllipsis) {
649 currentChain->fDecimalSamplesUnbounded = TRUE;
650 continue;
651 }
652 currentChain->fDecimalSamples.append(token);
653 }
654 break;
655
656 default:
657 break;
658 }
659 prevType=type;
660 if (U_FAILURE(status)) {
661 break;
662 }
663 }
664 }
665
666 UnicodeString
getRuleFromResource(const Locale & locale,UPluralType type,UErrorCode & errCode)667 PluralRules::getRuleFromResource(const Locale& locale, UPluralType type, UErrorCode& errCode) {
668 UnicodeString emptyStr;
669
670 if (U_FAILURE(errCode)) {
671 return emptyStr;
672 }
673 LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &errCode));
674 if(U_FAILURE(errCode)) {
675 return emptyStr;
676 }
677 const char *typeKey;
678 switch (type) {
679 case UPLURAL_TYPE_CARDINAL:
680 typeKey = "locales";
681 break;
682 case UPLURAL_TYPE_ORDINAL:
683 typeKey = "locales_ordinals";
684 break;
685 default:
686 // Must not occur: The caller should have checked for valid types.
687 errCode = U_ILLEGAL_ARGUMENT_ERROR;
688 return emptyStr;
689 }
690 LocalUResourceBundlePointer locRes(ures_getByKey(rb.getAlias(), typeKey, nullptr, &errCode));
691 if(U_FAILURE(errCode)) {
692 return emptyStr;
693 }
694 int32_t resLen=0;
695 const char *curLocaleName=locale.getName();
696 const UChar* s = ures_getStringByKey(locRes.getAlias(), curLocaleName, &resLen, &errCode);
697
698 if (s == nullptr) {
699 // Check parent locales.
700 UErrorCode status = U_ZERO_ERROR;
701 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
702 const char *curLocaleName2=locale.getName();
703 uprv_strcpy(parentLocaleName, curLocaleName2);
704
705 while (uloc_getParent(parentLocaleName, parentLocaleName,
706 ULOC_FULLNAME_CAPACITY, &status) > 0) {
707 resLen=0;
708 s = ures_getStringByKey(locRes.getAlias(), parentLocaleName, &resLen, &status);
709 if (s != nullptr) {
710 errCode = U_ZERO_ERROR;
711 break;
712 }
713 status = U_ZERO_ERROR;
714 }
715 }
716 if (s==nullptr) {
717 return emptyStr;
718 }
719
720 char setKey[256];
721 u_UCharsToChars(s, setKey, resLen + 1);
722 // printf("\n PluralRule: %s\n", setKey);
723
724 LocalUResourceBundlePointer ruleRes(ures_getByKey(rb.getAlias(), "rules", nullptr, &errCode));
725 if(U_FAILURE(errCode)) {
726 return emptyStr;
727 }
728 LocalUResourceBundlePointer setRes(ures_getByKey(ruleRes.getAlias(), setKey, nullptr, &errCode));
729 if (U_FAILURE(errCode)) {
730 return emptyStr;
731 }
732
733 int32_t numberKeys = ures_getSize(setRes.getAlias());
734 UnicodeString result;
735 const char *key=nullptr;
736 for(int32_t i=0; i<numberKeys; ++i) { // Keys are zero, one, few, ...
737 UnicodeString rules = ures_getNextUnicodeString(setRes.getAlias(), &key, &errCode);
738 UnicodeString uKey(key, -1, US_INV);
739 result.append(uKey);
740 result.append(COLON);
741 result.append(rules);
742 result.append(SEMI_COLON);
743 }
744 return result;
745 }
746
747
748 UnicodeString
getRules() const749 PluralRules::getRules() const {
750 UnicodeString rules;
751 if (mRules != nullptr) {
752 mRules->dumpRules(rules);
753 }
754 return rules;
755 }
756
AndConstraint(const AndConstraint & other)757 AndConstraint::AndConstraint(const AndConstraint& other) {
758 this->fInternalStatus = other.fInternalStatus;
759 if (U_FAILURE(fInternalStatus)) {
760 return; // stop early if the object we are copying from is invalid.
761 }
762 this->op = other.op;
763 this->opNum=other.opNum;
764 this->value=other.value;
765 if (other.rangeList != nullptr) {
766 LocalPointer<UVector32> newRangeList(new UVector32(fInternalStatus), fInternalStatus);
767 if (U_FAILURE(fInternalStatus)) {
768 return;
769 }
770 this->rangeList = newRangeList.orphan();
771 this->rangeList->assign(*other.rangeList, fInternalStatus);
772 }
773 this->integerOnly=other.integerOnly;
774 this->negated=other.negated;
775 this->digitsType = other.digitsType;
776 if (other.next != nullptr) {
777 this->next = new AndConstraint(*other.next);
778 if (this->next == nullptr) {
779 fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
780 }
781 }
782 }
783
~AndConstraint()784 AndConstraint::~AndConstraint() {
785 delete rangeList;
786 rangeList = nullptr;
787 delete next;
788 next = nullptr;
789 }
790
791 UBool
isFulfilled(const IFixedDecimal & number)792 AndConstraint::isFulfilled(const IFixedDecimal &number) {
793 UBool result = TRUE;
794 if (digitsType == none) {
795 // An empty AndConstraint, created by a rule with a keyword but no following expression.
796 return TRUE;
797 }
798
799 PluralOperand operand = tokenTypeToPluralOperand(digitsType);
800 double n = number.getPluralOperand(operand); // pulls n | i | v | f value for the number.
801 // Will always be positive.
802 // May be non-integer (n option only)
803 do {
804 if (integerOnly && n != uprv_floor(n)) {
805 result = FALSE;
806 break;
807 }
808
809 if (op == MOD) {
810 n = fmod(n, opNum);
811 }
812 if (rangeList == nullptr) {
813 result = value == -1 || // empty rule
814 n == value; // 'is' rule
815 break;
816 }
817 result = FALSE; // 'in' or 'within' rule
818 for (int32_t r=0; r<rangeList->size(); r+=2) {
819 if (rangeList->elementAti(r) <= n && n <= rangeList->elementAti(r+1)) {
820 result = TRUE;
821 break;
822 }
823 }
824 } while (FALSE);
825
826 if (negated) {
827 result = !result;
828 }
829 return result;
830 }
831
832 AndConstraint*
add(UErrorCode & status)833 AndConstraint::add(UErrorCode& status) {
834 if (U_FAILURE(fInternalStatus)) {
835 status = fInternalStatus;
836 return nullptr;
837 }
838 this->next = new AndConstraint();
839 if (this->next == nullptr) {
840 status = U_MEMORY_ALLOCATION_ERROR;
841 }
842 return this->next;
843 }
844
845
OrConstraint(const OrConstraint & other)846 OrConstraint::OrConstraint(const OrConstraint& other) {
847 this->fInternalStatus = other.fInternalStatus;
848 if (U_FAILURE(fInternalStatus)) {
849 return; // stop early if the object we are copying from is invalid.
850 }
851 if ( other.childNode != nullptr ) {
852 this->childNode = new AndConstraint(*(other.childNode));
853 if (this->childNode == nullptr) {
854 fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
855 return;
856 }
857 }
858 if (other.next != nullptr ) {
859 this->next = new OrConstraint(*(other.next));
860 if (this->next == nullptr) {
861 fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
862 return;
863 }
864 if (U_FAILURE(this->next->fInternalStatus)) {
865 this->fInternalStatus = this->next->fInternalStatus;
866 }
867 }
868 }
869
~OrConstraint()870 OrConstraint::~OrConstraint() {
871 delete childNode;
872 childNode = nullptr;
873 delete next;
874 next = nullptr;
875 }
876
877 AndConstraint*
add(UErrorCode & status)878 OrConstraint::add(UErrorCode& status) {
879 if (U_FAILURE(fInternalStatus)) {
880 status = fInternalStatus;
881 return nullptr;
882 }
883 OrConstraint *curOrConstraint=this;
884 {
885 while (curOrConstraint->next!=nullptr) {
886 curOrConstraint = curOrConstraint->next;
887 }
888 U_ASSERT(curOrConstraint->childNode == nullptr);
889 curOrConstraint->childNode = new AndConstraint();
890 if (curOrConstraint->childNode == nullptr) {
891 status = U_MEMORY_ALLOCATION_ERROR;
892 }
893 }
894 return curOrConstraint->childNode;
895 }
896
897 UBool
isFulfilled(const IFixedDecimal & number)898 OrConstraint::isFulfilled(const IFixedDecimal &number) {
899 OrConstraint* orRule=this;
900 UBool result=FALSE;
901
902 while (orRule!=nullptr && !result) {
903 result=TRUE;
904 AndConstraint* andRule = orRule->childNode;
905 while (andRule!=nullptr && result) {
906 result = andRule->isFulfilled(number);
907 andRule=andRule->next;
908 }
909 orRule = orRule->next;
910 }
911
912 return result;
913 }
914
915
RuleChain(const RuleChain & other)916 RuleChain::RuleChain(const RuleChain& other) :
917 fKeyword(other.fKeyword), fDecimalSamples(other.fDecimalSamples),
918 fIntegerSamples(other.fIntegerSamples), fDecimalSamplesUnbounded(other.fDecimalSamplesUnbounded),
919 fIntegerSamplesUnbounded(other.fIntegerSamplesUnbounded), fInternalStatus(other.fInternalStatus) {
920 if (U_FAILURE(this->fInternalStatus)) {
921 return; // stop early if the object we are copying from is invalid.
922 }
923 if (other.ruleHeader != nullptr) {
924 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
925 if (this->ruleHeader == nullptr) {
926 this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
927 }
928 else if (U_FAILURE(this->ruleHeader->fInternalStatus)) {
929 // If the OrConstraint wasn't fully copied, then set our status to failure as well.
930 this->fInternalStatus = this->ruleHeader->fInternalStatus;
931 return; // exit early.
932 }
933 }
934 if (other.fNext != nullptr ) {
935 this->fNext = new RuleChain(*other.fNext);
936 if (this->fNext == nullptr) {
937 this->fInternalStatus = U_MEMORY_ALLOCATION_ERROR;
938 }
939 else if (U_FAILURE(this->fNext->fInternalStatus)) {
940 // If the RuleChain wasn't fully copied, then set our status to failure as well.
941 this->fInternalStatus = this->fNext->fInternalStatus;
942 }
943 }
944 }
945
~RuleChain()946 RuleChain::~RuleChain() {
947 delete fNext;
948 delete ruleHeader;
949 }
950
951 UnicodeString
select(const IFixedDecimal & number) const952 RuleChain::select(const IFixedDecimal &number) const {
953 if (!number.isNaN() && !number.isInfinite()) {
954 for (const RuleChain *rules = this; rules != nullptr; rules = rules->fNext) {
955 if (rules->ruleHeader->isFulfilled(number)) {
956 return rules->fKeyword;
957 }
958 }
959 }
960 return UnicodeString(TRUE, PLURAL_KEYWORD_OTHER, 5);
961 }
962
tokenString(tokenType tok)963 static UnicodeString tokenString(tokenType tok) {
964 UnicodeString s;
965 switch (tok) {
966 case tVariableN:
967 s.append(LOW_N); break;
968 case tVariableI:
969 s.append(LOW_I); break;
970 case tVariableF:
971 s.append(LOW_F); break;
972 case tVariableV:
973 s.append(LOW_V); break;
974 case tVariableT:
975 s.append(LOW_T); break;
976 default:
977 s.append(TILDE);
978 }
979 return s;
980 }
981
982 void
dumpRules(UnicodeString & result)983 RuleChain::dumpRules(UnicodeString& result) {
984 UChar digitString[16];
985
986 if ( ruleHeader != nullptr ) {
987 result += fKeyword;
988 result += COLON;
989 result += SPACE;
990 OrConstraint* orRule=ruleHeader;
991 while ( orRule != nullptr ) {
992 AndConstraint* andRule=orRule->childNode;
993 while ( andRule != nullptr ) {
994 if ((andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) && (andRule->value == -1)) {
995 // Empty Rules.
996 } else if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeList==nullptr) ) {
997 result += tokenString(andRule->digitsType);
998 result += UNICODE_STRING_SIMPLE(" is ");
999 if (andRule->negated) {
1000 result += UNICODE_STRING_SIMPLE("not ");
1001 }
1002 uprv_itou(digitString,16, andRule->value,10,0);
1003 result += UnicodeString(digitString);
1004 }
1005 else {
1006 result += tokenString(andRule->digitsType);
1007 result += SPACE;
1008 if (andRule->op==AndConstraint::MOD) {
1009 result += UNICODE_STRING_SIMPLE("mod ");
1010 uprv_itou(digitString,16, andRule->opNum,10,0);
1011 result += UnicodeString(digitString);
1012 }
1013 if (andRule->rangeList==nullptr) {
1014 if (andRule->negated) {
1015 result += UNICODE_STRING_SIMPLE(" is not ");
1016 uprv_itou(digitString,16, andRule->value,10,0);
1017 result += UnicodeString(digitString);
1018 }
1019 else {
1020 result += UNICODE_STRING_SIMPLE(" is ");
1021 uprv_itou(digitString,16, andRule->value,10,0);
1022 result += UnicodeString(digitString);
1023 }
1024 }
1025 else {
1026 if (andRule->negated) {
1027 if ( andRule->integerOnly ) {
1028 result += UNICODE_STRING_SIMPLE(" not in ");
1029 }
1030 else {
1031 result += UNICODE_STRING_SIMPLE(" not within ");
1032 }
1033 }
1034 else {
1035 if ( andRule->integerOnly ) {
1036 result += UNICODE_STRING_SIMPLE(" in ");
1037 }
1038 else {
1039 result += UNICODE_STRING_SIMPLE(" within ");
1040 }
1041 }
1042 for (int32_t r=0; r<andRule->rangeList->size(); r+=2) {
1043 int32_t rangeLo = andRule->rangeList->elementAti(r);
1044 int32_t rangeHi = andRule->rangeList->elementAti(r+1);
1045 uprv_itou(digitString,16, rangeLo, 10, 0);
1046 result += UnicodeString(digitString);
1047 result += UNICODE_STRING_SIMPLE("..");
1048 uprv_itou(digitString,16, rangeHi, 10,0);
1049 result += UnicodeString(digitString);
1050 if (r+2 < andRule->rangeList->size()) {
1051 result += UNICODE_STRING_SIMPLE(", ");
1052 }
1053 }
1054 }
1055 }
1056 if ( (andRule=andRule->next) != nullptr) {
1057 result += UNICODE_STRING_SIMPLE(" and ");
1058 }
1059 }
1060 if ( (orRule = orRule->next) != nullptr ) {
1061 result += UNICODE_STRING_SIMPLE(" or ");
1062 }
1063 }
1064 }
1065 if ( fNext != nullptr ) {
1066 result += UNICODE_STRING_SIMPLE("; ");
1067 fNext->dumpRules(result);
1068 }
1069 }
1070
1071
1072 UErrorCode
getKeywords(int32_t capacityOfKeywords,UnicodeString * keywords,int32_t & arraySize) const1073 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1074 if (U_FAILURE(fInternalStatus)) {
1075 return fInternalStatus;
1076 }
1077 if ( arraySize < capacityOfKeywords-1 ) {
1078 keywords[arraySize++]=fKeyword;
1079 }
1080 else {
1081 return U_BUFFER_OVERFLOW_ERROR;
1082 }
1083
1084 if ( fNext != nullptr ) {
1085 return fNext->getKeywords(capacityOfKeywords, keywords, arraySize);
1086 }
1087 else {
1088 return U_ZERO_ERROR;
1089 }
1090 }
1091
1092 UBool
isKeyword(const UnicodeString & keywordParam) const1093 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1094 if ( fKeyword == keywordParam ) {
1095 return TRUE;
1096 }
1097
1098 if ( fNext != nullptr ) {
1099 return fNext->isKeyword(keywordParam);
1100 }
1101 else {
1102 return FALSE;
1103 }
1104 }
1105
1106
PluralRuleParser()1107 PluralRuleParser::PluralRuleParser() :
1108 ruleIndex(0), token(), type(none), prevType(none),
1109 curAndConstraint(nullptr), currentChain(nullptr), rangeLowIdx(-1), rangeHiIdx(-1)
1110 {
1111 }
1112
~PluralRuleParser()1113 PluralRuleParser::~PluralRuleParser() {
1114 }
1115
1116
1117 int32_t
getNumberValue(const UnicodeString & token)1118 PluralRuleParser::getNumberValue(const UnicodeString& token) {
1119 int32_t i;
1120 char digits[128];
1121
1122 i = token.extract(0, token.length(), digits, UPRV_LENGTHOF(digits), US_INV);
1123 digits[i]='\0';
1124
1125 return((int32_t)atoi(digits));
1126 }
1127
1128
1129 void
checkSyntax(UErrorCode & status)1130 PluralRuleParser::checkSyntax(UErrorCode &status)
1131 {
1132 if (U_FAILURE(status)) {
1133 return;
1134 }
1135 if (!(prevType==none || prevType==tSemiColon)) {
1136 type = getKeyType(token, type); // Switch token type from tKeyword if we scanned a reserved word,
1137 // and we are not at the start of a rule, where a
1138 // keyword is expected.
1139 }
1140
1141 switch(prevType) {
1142 case none:
1143 case tSemiColon:
1144 if (type!=tKeyword && type != tEOF) {
1145 status = U_UNEXPECTED_TOKEN;
1146 }
1147 break;
1148 case tVariableN:
1149 case tVariableI:
1150 case tVariableF:
1151 case tVariableT:
1152 case tVariableV:
1153 if (type != tIs && type != tMod && type != tIn &&
1154 type != tNot && type != tWithin && type != tEqual && type != tNotEqual) {
1155 status = U_UNEXPECTED_TOKEN;
1156 }
1157 break;
1158 case tKeyword:
1159 if (type != tColon) {
1160 status = U_UNEXPECTED_TOKEN;
1161 }
1162 break;
1163 case tColon:
1164 if (!(type == tVariableN ||
1165 type == tVariableI ||
1166 type == tVariableF ||
1167 type == tVariableT ||
1168 type == tVariableV ||
1169 type == tAt)) {
1170 status = U_UNEXPECTED_TOKEN;
1171 }
1172 break;
1173 case tIs:
1174 if ( type != tNumber && type != tNot) {
1175 status = U_UNEXPECTED_TOKEN;
1176 }
1177 break;
1178 case tNot:
1179 if (type != tNumber && type != tIn && type != tWithin) {
1180 status = U_UNEXPECTED_TOKEN;
1181 }
1182 break;
1183 case tMod:
1184 case tDot2:
1185 case tIn:
1186 case tWithin:
1187 case tEqual:
1188 case tNotEqual:
1189 if (type != tNumber) {
1190 status = U_UNEXPECTED_TOKEN;
1191 }
1192 break;
1193 case tAnd:
1194 case tOr:
1195 if ( type != tVariableN &&
1196 type != tVariableI &&
1197 type != tVariableF &&
1198 type != tVariableT &&
1199 type != tVariableV) {
1200 status = U_UNEXPECTED_TOKEN;
1201 }
1202 break;
1203 case tComma:
1204 if (type != tNumber) {
1205 status = U_UNEXPECTED_TOKEN;
1206 }
1207 break;
1208 case tNumber:
1209 if (type != tDot2 && type != tSemiColon && type != tIs && type != tNot &&
1210 type != tIn && type != tEqual && type != tNotEqual && type != tWithin &&
1211 type != tAnd && type != tOr && type != tComma && type != tAt &&
1212 type != tEOF)
1213 {
1214 status = U_UNEXPECTED_TOKEN;
1215 }
1216 // TODO: a comma following a number that is not part of a range will be allowed.
1217 // It's not the only case of this sort of thing. Parser needs a re-write.
1218 break;
1219 case tAt:
1220 if (type != tDecimal && type != tInteger) {
1221 status = U_UNEXPECTED_TOKEN;
1222 }
1223 break;
1224 default:
1225 status = U_UNEXPECTED_TOKEN;
1226 break;
1227 }
1228 }
1229
1230
1231 /*
1232 * Scan the next token from the input rules.
1233 * rules and returned token type are in the parser state variables.
1234 */
1235 void
getNextToken(UErrorCode & status)1236 PluralRuleParser::getNextToken(UErrorCode &status)
1237 {
1238 if (U_FAILURE(status)) {
1239 return;
1240 }
1241
1242 UChar ch;
1243 while (ruleIndex < ruleSrc->length()) {
1244 ch = ruleSrc->charAt(ruleIndex);
1245 type = charType(ch);
1246 if (type != tSpace) {
1247 break;
1248 }
1249 ++(ruleIndex);
1250 }
1251 if (ruleIndex >= ruleSrc->length()) {
1252 type = tEOF;
1253 return;
1254 }
1255 int32_t curIndex= ruleIndex;
1256
1257 switch (type) {
1258 case tColon:
1259 case tSemiColon:
1260 case tComma:
1261 case tEllipsis:
1262 case tTilde: // scanned '~'
1263 case tAt: // scanned '@'
1264 case tEqual: // scanned '='
1265 case tMod: // scanned '%'
1266 // Single character tokens.
1267 ++curIndex;
1268 break;
1269
1270 case tNotEqual: // scanned '!'
1271 if (ruleSrc->charAt(curIndex+1) == EQUALS) {
1272 curIndex += 2;
1273 } else {
1274 type = none;
1275 curIndex += 1;
1276 }
1277 break;
1278
1279 case tKeyword:
1280 while (type == tKeyword && ++curIndex < ruleSrc->length()) {
1281 ch = ruleSrc->charAt(curIndex);
1282 type = charType(ch);
1283 }
1284 type = tKeyword;
1285 break;
1286
1287 case tNumber:
1288 while (type == tNumber && ++curIndex < ruleSrc->length()) {
1289 ch = ruleSrc->charAt(curIndex);
1290 type = charType(ch);
1291 }
1292 type = tNumber;
1293 break;
1294
1295 case tDot:
1296 // We could be looking at either ".." in a range, or "..." at the end of a sample.
1297 if (curIndex+1 >= ruleSrc->length() || ruleSrc->charAt(curIndex+1) != DOT) {
1298 ++curIndex;
1299 break; // Single dot
1300 }
1301 if (curIndex+2 >= ruleSrc->length() || ruleSrc->charAt(curIndex+2) != DOT) {
1302 curIndex += 2;
1303 type = tDot2;
1304 break; // double dot
1305 }
1306 type = tEllipsis;
1307 curIndex += 3;
1308 break; // triple dot
1309
1310 default:
1311 status = U_UNEXPECTED_TOKEN;
1312 ++curIndex;
1313 break;
1314 }
1315
1316 U_ASSERT(ruleIndex <= ruleSrc->length());
1317 U_ASSERT(curIndex <= ruleSrc->length());
1318 token=UnicodeString(*ruleSrc, ruleIndex, curIndex-ruleIndex);
1319 ruleIndex = curIndex;
1320 }
1321
1322 tokenType
charType(UChar ch)1323 PluralRuleParser::charType(UChar ch) {
1324 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1325 return tNumber;
1326 }
1327 if (ch>=LOW_A && ch<=LOW_Z) {
1328 return tKeyword;
1329 }
1330 switch (ch) {
1331 case COLON:
1332 return tColon;
1333 case SPACE:
1334 return tSpace;
1335 case SEMI_COLON:
1336 return tSemiColon;
1337 case DOT:
1338 return tDot;
1339 case COMMA:
1340 return tComma;
1341 case EXCLAMATION:
1342 return tNotEqual;
1343 case EQUALS:
1344 return tEqual;
1345 case PERCENT_SIGN:
1346 return tMod;
1347 case AT:
1348 return tAt;
1349 case ELLIPSIS:
1350 return tEllipsis;
1351 case TILDE:
1352 return tTilde;
1353 default :
1354 return none;
1355 }
1356 }
1357
1358
1359 // Set token type for reserved words in the Plural Rule syntax.
1360
1361 tokenType
getKeyType(const UnicodeString & token,tokenType keyType)1362 PluralRuleParser::getKeyType(const UnicodeString &token, tokenType keyType)
1363 {
1364 if (keyType != tKeyword) {
1365 return keyType;
1366 }
1367
1368 if (0 == token.compare(PK_VAR_N, 1)) {
1369 keyType = tVariableN;
1370 } else if (0 == token.compare(PK_VAR_I, 1)) {
1371 keyType = tVariableI;
1372 } else if (0 == token.compare(PK_VAR_F, 1)) {
1373 keyType = tVariableF;
1374 } else if (0 == token.compare(PK_VAR_T, 1)) {
1375 keyType = tVariableT;
1376 } else if (0 == token.compare(PK_VAR_V, 1)) {
1377 keyType = tVariableV;
1378 } else if (0 == token.compare(PK_IS, 2)) {
1379 keyType = tIs;
1380 } else if (0 == token.compare(PK_AND, 3)) {
1381 keyType = tAnd;
1382 } else if (0 == token.compare(PK_IN, 2)) {
1383 keyType = tIn;
1384 } else if (0 == token.compare(PK_WITHIN, 6)) {
1385 keyType = tWithin;
1386 } else if (0 == token.compare(PK_NOT, 3)) {
1387 keyType = tNot;
1388 } else if (0 == token.compare(PK_MOD, 3)) {
1389 keyType = tMod;
1390 } else if (0 == token.compare(PK_OR, 2)) {
1391 keyType = tOr;
1392 } else if (0 == token.compare(PK_DECIMAL, 7)) {
1393 keyType = tDecimal;
1394 } else if (0 == token.compare(PK_INTEGER, 7)) {
1395 keyType = tInteger;
1396 }
1397 return keyType;
1398 }
1399
1400
PluralKeywordEnumeration(RuleChain * header,UErrorCode & status)1401 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1402 : pos(0), fKeywordNames(status) {
1403 if (U_FAILURE(status)) {
1404 return;
1405 }
1406 fKeywordNames.setDeleter(uprv_deleteUObject);
1407 UBool addKeywordOther = TRUE;
1408 RuleChain *node = header;
1409 while (node != nullptr) {
1410 auto newElem = new UnicodeString(node->fKeyword);
1411 if (newElem == nullptr) {
1412 status = U_MEMORY_ALLOCATION_ERROR;
1413 return;
1414 }
1415 fKeywordNames.addElement(newElem, status);
1416 if (U_FAILURE(status)) {
1417 delete newElem;
1418 return;
1419 }
1420 if (0 == node->fKeyword.compare(PLURAL_KEYWORD_OTHER, 5)) {
1421 addKeywordOther = FALSE;
1422 }
1423 node = node->fNext;
1424 }
1425
1426 if (addKeywordOther) {
1427 auto newElem = new UnicodeString(PLURAL_KEYWORD_OTHER);
1428 if (newElem == nullptr) {
1429 status = U_MEMORY_ALLOCATION_ERROR;
1430 return;
1431 }
1432 fKeywordNames.addElement(newElem, status);
1433 if (U_FAILURE(status)) {
1434 delete newElem;
1435 return;
1436 }
1437 }
1438 }
1439
1440 const UnicodeString*
snext(UErrorCode & status)1441 PluralKeywordEnumeration::snext(UErrorCode& status) {
1442 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1443 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1444 }
1445 return nullptr;
1446 }
1447
1448 void
reset(UErrorCode &)1449 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1450 pos=0;
1451 }
1452
1453 int32_t
count(UErrorCode &) const1454 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1455 return fKeywordNames.size();
1456 }
1457
~PluralKeywordEnumeration()1458 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1459 }
1460
tokenTypeToPluralOperand(tokenType tt)1461 PluralOperand tokenTypeToPluralOperand(tokenType tt) {
1462 switch(tt) {
1463 case tVariableN:
1464 return PLURAL_OPERAND_N;
1465 case tVariableI:
1466 return PLURAL_OPERAND_I;
1467 case tVariableF:
1468 return PLURAL_OPERAND_F;
1469 case tVariableV:
1470 return PLURAL_OPERAND_V;
1471 case tVariableT:
1472 return PLURAL_OPERAND_T;
1473 default:
1474 U_ASSERT(FALSE); // unexpected.
1475 return PLURAL_OPERAND_N;
1476 }
1477 }
1478
FixedDecimal(double n,int32_t v,int64_t f)1479 FixedDecimal::FixedDecimal(double n, int32_t v, int64_t f) {
1480 init(n, v, f);
1481 // check values. TODO make into unit test.
1482 //
1483 // long visiblePower = (int) Math.pow(10, v);
1484 // if (decimalDigits > visiblePower) {
1485 // throw new IllegalArgumentException();
1486 // }
1487 // double fraction = intValue + (decimalDigits / (double) visiblePower);
1488 // if (fraction != source) {
1489 // double diff = Math.abs(fraction - source)/(Math.abs(fraction) + Math.abs(source));
1490 // if (diff > 0.00000001d) {
1491 // throw new IllegalArgumentException();
1492 // }
1493 // }
1494 }
1495
FixedDecimal(double n,int32_t v)1496 FixedDecimal::FixedDecimal(double n, int32_t v) {
1497 // Ugly, but for samples we don't care.
1498 init(n, v, getFractionalDigits(n, v));
1499 }
1500
FixedDecimal(double n)1501 FixedDecimal::FixedDecimal(double n) {
1502 init(n);
1503 }
1504
FixedDecimal()1505 FixedDecimal::FixedDecimal() {
1506 init(0, 0, 0);
1507 }
1508
1509
1510 // Create a FixedDecimal from a UnicodeString containing a number.
1511 // Inefficient, but only used for samples, so simplicity trumps efficiency.
1512
FixedDecimal(const UnicodeString & num,UErrorCode & status)1513 FixedDecimal::FixedDecimal(const UnicodeString &num, UErrorCode &status) {
1514 CharString cs;
1515 cs.appendInvariantChars(num, status);
1516 DecimalQuantity dl;
1517 dl.setToDecNumber(cs.toStringPiece(), status);
1518 if (U_FAILURE(status)) {
1519 init(0, 0, 0);
1520 return;
1521 }
1522 int32_t decimalPoint = num.indexOf(DOT);
1523 double n = dl.toDouble();
1524 if (decimalPoint == -1) {
1525 init(n, 0, 0);
1526 } else {
1527 int32_t v = num.length() - decimalPoint - 1;
1528 init(n, v, getFractionalDigits(n, v));
1529 }
1530 }
1531
1532
FixedDecimal(const FixedDecimal & other)1533 FixedDecimal::FixedDecimal(const FixedDecimal &other) {
1534 source = other.source;
1535 visibleDecimalDigitCount = other.visibleDecimalDigitCount;
1536 decimalDigits = other.decimalDigits;
1537 decimalDigitsWithoutTrailingZeros = other.decimalDigitsWithoutTrailingZeros;
1538 intValue = other.intValue;
1539 _hasIntegerValue = other._hasIntegerValue;
1540 isNegative = other.isNegative;
1541 _isNaN = other._isNaN;
1542 _isInfinite = other._isInfinite;
1543 }
1544
1545 FixedDecimal::~FixedDecimal() = default;
1546
1547
init(double n)1548 void FixedDecimal::init(double n) {
1549 int32_t numFractionDigits = decimals(n);
1550 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1551 }
1552
1553
init(double n,int32_t v,int64_t f)1554 void FixedDecimal::init(double n, int32_t v, int64_t f) {
1555 isNegative = n < 0.0;
1556 source = fabs(n);
1557 _isNaN = uprv_isNaN(source);
1558 _isInfinite = uprv_isInfinite(source);
1559 if (_isNaN || _isInfinite) {
1560 v = 0;
1561 f = 0;
1562 intValue = 0;
1563 _hasIntegerValue = FALSE;
1564 } else {
1565 intValue = (int64_t)source;
1566 _hasIntegerValue = (source == intValue);
1567 }
1568
1569 visibleDecimalDigitCount = v;
1570 decimalDigits = f;
1571 if (f == 0) {
1572 decimalDigitsWithoutTrailingZeros = 0;
1573 } else {
1574 int64_t fdwtz = f;
1575 while ((fdwtz%10) == 0) {
1576 fdwtz /= 10;
1577 }
1578 decimalDigitsWithoutTrailingZeros = fdwtz;
1579 }
1580 }
1581
1582
1583 // Fast path only exact initialization. Return true if successful.
1584 // Note: Do not multiply by 10 each time through loop, rounding cruft can build
1585 // up that makes the check for an integer result fail.
1586 // A single multiply of the original number works more reliably.
1587 static int32_t p10[] = {1, 10, 100, 1000, 10000};
quickInit(double n)1588 UBool FixedDecimal::quickInit(double n) {
1589 UBool success = FALSE;
1590 n = fabs(n);
1591 int32_t numFractionDigits;
1592 for (numFractionDigits = 0; numFractionDigits <= 3; numFractionDigits++) {
1593 double scaledN = n * p10[numFractionDigits];
1594 if (scaledN == floor(scaledN)) {
1595 success = TRUE;
1596 break;
1597 }
1598 }
1599 if (success) {
1600 init(n, numFractionDigits, getFractionalDigits(n, numFractionDigits));
1601 }
1602 return success;
1603 }
1604
1605
1606
decimals(double n)1607 int32_t FixedDecimal::decimals(double n) {
1608 // Count the number of decimal digits in the fraction part of the number, excluding trailing zeros.
1609 // fastpath the common cases, integers or fractions with 3 or fewer digits
1610 n = fabs(n);
1611 for (int ndigits=0; ndigits<=3; ndigits++) {
1612 double scaledN = n * p10[ndigits];
1613 if (scaledN == floor(scaledN)) {
1614 return ndigits;
1615 }
1616 }
1617
1618 // Slow path, convert with sprintf, parse converted output.
1619 char buf[30] = {0};
1620 sprintf(buf, "%1.15e", n);
1621 // formatted number looks like this: 1.234567890123457e-01
1622 int exponent = atoi(buf+18);
1623 int numFractionDigits = 15;
1624 for (int i=16; ; --i) {
1625 if (buf[i] != '0') {
1626 break;
1627 }
1628 --numFractionDigits;
1629 }
1630 numFractionDigits -= exponent; // Fraction part of fixed point representation.
1631 return numFractionDigits;
1632 }
1633
1634
1635 // Get the fraction digits of a double, represented as an integer.
1636 // v is the number of visible fraction digits in the displayed form of the number.
1637 // Example: n = 1001.234, v = 6, result = 234000
1638 // TODO: need to think through how this is used in the plural rule context.
1639 // This function can easily encounter integer overflow,
1640 // and can easily return noise digits when the precision of a double is exceeded.
1641
getFractionalDigits(double n,int32_t v)1642 int64_t FixedDecimal::getFractionalDigits(double n, int32_t v) {
1643 if (v == 0 || n == floor(n) || uprv_isNaN(n) || uprv_isPositiveInfinity(n)) {
1644 return 0;
1645 }
1646 n = fabs(n);
1647 double fract = n - floor(n);
1648 switch (v) {
1649 case 1: return (int64_t)(fract*10.0 + 0.5);
1650 case 2: return (int64_t)(fract*100.0 + 0.5);
1651 case 3: return (int64_t)(fract*1000.0 + 0.5);
1652 default:
1653 double scaled = floor(fract * pow(10.0, (double)v) + 0.5);
1654 if (scaled > U_INT64_MAX) {
1655 return U_INT64_MAX;
1656 } else {
1657 return (int64_t)scaled;
1658 }
1659 }
1660 }
1661
1662
adjustForMinFractionDigits(int32_t minFractionDigits)1663 void FixedDecimal::adjustForMinFractionDigits(int32_t minFractionDigits) {
1664 int32_t numTrailingFractionZeros = minFractionDigits - visibleDecimalDigitCount;
1665 if (numTrailingFractionZeros > 0) {
1666 for (int32_t i=0; i<numTrailingFractionZeros; i++) {
1667 // Do not let the decimalDigits value overflow if there are many trailing zeros.
1668 // Limit the value to 18 digits, the most that a 64 bit int can fully represent.
1669 if (decimalDigits >= 100000000000000000LL) {
1670 break;
1671 }
1672 decimalDigits *= 10;
1673 }
1674 visibleDecimalDigitCount += numTrailingFractionZeros;
1675 }
1676 }
1677
1678
getPluralOperand(PluralOperand operand) const1679 double FixedDecimal::getPluralOperand(PluralOperand operand) const {
1680 switch(operand) {
1681 case PLURAL_OPERAND_N: return source;
1682 case PLURAL_OPERAND_I: return static_cast<double>(intValue);
1683 case PLURAL_OPERAND_F: return static_cast<double>(decimalDigits);
1684 case PLURAL_OPERAND_T: return static_cast<double>(decimalDigitsWithoutTrailingZeros);
1685 case PLURAL_OPERAND_V: return visibleDecimalDigitCount;
1686 default:
1687 U_ASSERT(FALSE); // unexpected.
1688 return source;
1689 }
1690 }
1691
isNaN() const1692 bool FixedDecimal::isNaN() const {
1693 return _isNaN;
1694 }
1695
isInfinite() const1696 bool FixedDecimal::isInfinite() const {
1697 return _isInfinite;
1698 }
1699
hasIntegerValue() const1700 bool FixedDecimal::hasIntegerValue() const {
1701 return _hasIntegerValue;
1702 }
1703
isNanOrInfinity() const1704 bool FixedDecimal::isNanOrInfinity() const {
1705 return _isNaN || _isInfinite;
1706 }
1707
getVisibleFractionDigitCount() const1708 int32_t FixedDecimal::getVisibleFractionDigitCount() const {
1709 return visibleDecimalDigitCount;
1710 }
1711
1712
1713
PluralAvailableLocalesEnumeration(UErrorCode & status)1714 PluralAvailableLocalesEnumeration::PluralAvailableLocalesEnumeration(UErrorCode &status) {
1715 fOpenStatus = status;
1716 if (U_FAILURE(status)) {
1717 return;
1718 }
1719 fOpenStatus = U_ZERO_ERROR; // clear any warnings.
1720 LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "plurals", &fOpenStatus));
1721 fLocales = ures_getByKey(rb.getAlias(), "locales", nullptr, &fOpenStatus);
1722 }
1723
~PluralAvailableLocalesEnumeration()1724 PluralAvailableLocalesEnumeration::~PluralAvailableLocalesEnumeration() {
1725 ures_close(fLocales);
1726 ures_close(fRes);
1727 fLocales = nullptr;
1728 fRes = nullptr;
1729 }
1730
next(int32_t * resultLength,UErrorCode & status)1731 const char *PluralAvailableLocalesEnumeration::next(int32_t *resultLength, UErrorCode &status) {
1732 if (U_FAILURE(status)) {
1733 return nullptr;
1734 }
1735 if (U_FAILURE(fOpenStatus)) {
1736 status = fOpenStatus;
1737 return nullptr;
1738 }
1739 fRes = ures_getNextResource(fLocales, fRes, &status);
1740 if (fRes == nullptr || U_FAILURE(status)) {
1741 if (status == U_INDEX_OUTOFBOUNDS_ERROR) {
1742 status = U_ZERO_ERROR;
1743 }
1744 return nullptr;
1745 }
1746 const char *result = ures_getKey(fRes);
1747 if (resultLength != nullptr) {
1748 *resultLength = static_cast<int32_t>(uprv_strlen(result));
1749 }
1750 return result;
1751 }
1752
1753
reset(UErrorCode & status)1754 void PluralAvailableLocalesEnumeration::reset(UErrorCode &status) {
1755 if (U_FAILURE(status)) {
1756 return;
1757 }
1758 if (U_FAILURE(fOpenStatus)) {
1759 status = fOpenStatus;
1760 return;
1761 }
1762 ures_resetIterator(fLocales);
1763 }
1764
count(UErrorCode & status) const1765 int32_t PluralAvailableLocalesEnumeration::count(UErrorCode &status) const {
1766 if (U_FAILURE(status)) {
1767 return 0;
1768 }
1769 if (U_FAILURE(fOpenStatus)) {
1770 status = fOpenStatus;
1771 return 0;
1772 }
1773 return ures_getSize(fLocales);
1774 }
1775
1776 U_NAMESPACE_END
1777
1778
1779 #endif /* #if !UCONFIG_NO_FORMATTING */
1780
1781 //eof
1782