1 /*
2 *******************************************************************************
3 * Copyright (C) 2007-2011, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 *
7 * File PLURRULE.CPP
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 *******************************************************************************
13 */
14
15
16 #include "unicode/utypes.h"
17 #include "unicode/localpointer.h"
18 #include "unicode/plurrule.h"
19 #include "unicode/ures.h"
20 #include "cmemory.h"
21 #include "cstring.h"
22 #include "hash.h"
23 #include "mutex.h"
24 #include "patternprops.h"
25 #include "plurrule_impl.h"
26 #include "putilimp.h"
27 #include "ucln_in.h"
28 #include "uhash.h"
29 #include "ustrfmt.h"
30 #include "locutil.h"
31
32 #if !UCONFIG_NO_FORMATTING
33
34 U_NAMESPACE_BEGIN
35
36 // shared by all instances when lazy-initializing samples
37 static UMTX pluralMutex;
38
39 #define ARRAY_SIZE(array) (int32_t)(sizeof array / sizeof array[0])
40
41 static const UChar PLURAL_KEYWORD_OTHER[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,0};
42 static const UChar PLURAL_DEFAULT_RULE[]={LOW_O,LOW_T,LOW_H,LOW_E,LOW_R,COLON,SPACE,LOW_N,0};
43 static const UChar PK_IN[]={LOW_I,LOW_N,0};
44 static const UChar PK_NOT[]={LOW_N,LOW_O,LOW_T,0};
45 static const UChar PK_IS[]={LOW_I,LOW_S,0};
46 static const UChar PK_MOD[]={LOW_M,LOW_O,LOW_D,0};
47 static const UChar PK_AND[]={LOW_A,LOW_N,LOW_D,0};
48 static const UChar PK_OR[]={LOW_O,LOW_R,0};
49 static const UChar PK_VAR_N[]={LOW_N,0};
50 static const UChar PK_WITHIN[]={LOW_W,LOW_I,LOW_T,LOW_H,LOW_I,LOW_N,0};
51
52 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralRules)
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)53 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(PluralKeywordEnumeration)
54
55 PluralRules::PluralRules(UErrorCode& status)
56 : UObject(),
57 mRules(NULL),
58 mParser(NULL),
59 mSamples(NULL),
60 mSampleInfo(NULL),
61 mSampleInfoCount(0)
62 {
63 if (U_FAILURE(status)) {
64 return;
65 }
66 mParser = new RuleParser();
67 if (mParser==NULL) {
68 status = U_MEMORY_ALLOCATION_ERROR;
69 }
70 }
71
PluralRules(const PluralRules & other)72 PluralRules::PluralRules(const PluralRules& other)
73 : UObject(other),
74 mRules(NULL),
75 mParser(NULL),
76 mSamples(NULL),
77 mSampleInfo(NULL),
78 mSampleInfoCount(0)
79 {
80 *this=other;
81 }
82
~PluralRules()83 PluralRules::~PluralRules() {
84 delete mRules;
85 delete mParser;
86 uprv_free(mSamples);
87 uprv_free(mSampleInfo);
88 }
89
90 PluralRules*
clone() const91 PluralRules::clone() const {
92 return new PluralRules(*this);
93 }
94
95 PluralRules&
operator =(const PluralRules & other)96 PluralRules::operator=(const PluralRules& other) {
97 if (this != &other) {
98 delete mRules;
99 if (other.mRules==NULL) {
100 mRules = NULL;
101 }
102 else {
103 mRules = new RuleChain(*other.mRules);
104 }
105 delete mParser;
106 mParser = new RuleParser();
107
108 uprv_free(mSamples);
109 mSamples = NULL;
110
111 uprv_free(mSampleInfo);
112 mSampleInfo = NULL;
113 mSampleInfoCount = 0;
114 }
115
116 return *this;
117 }
118
119 PluralRules* U_EXPORT2
createRules(const UnicodeString & description,UErrorCode & status)120 PluralRules::createRules(const UnicodeString& description, UErrorCode& status) {
121 RuleChain rules;
122
123 if (U_FAILURE(status)) {
124 return NULL;
125 }
126 PluralRules *newRules = new PluralRules(status);
127 if ( (newRules != NULL)&& U_SUCCESS(status) ) {
128 newRules->parseDescription((UnicodeString &)description, rules, status);
129 if (U_SUCCESS(status)) {
130 newRules->addRules(rules);
131 }
132 }
133 if (U_FAILURE(status)) {
134 delete newRules;
135 return NULL;
136 }
137 else {
138 return newRules;
139 }
140 }
141
142 PluralRules* U_EXPORT2
createDefaultRules(UErrorCode & status)143 PluralRules::createDefaultRules(UErrorCode& status) {
144 return createRules(PLURAL_DEFAULT_RULE, status);
145 }
146
147 PluralRules* U_EXPORT2
forLocale(const Locale & locale,UErrorCode & status)148 PluralRules::forLocale(const Locale& locale, UErrorCode& status) {
149 RuleChain rChain;
150 if (U_FAILURE(status)) {
151 return NULL;
152 }
153 PluralRules *newObj = new PluralRules(status);
154 if (newObj==NULL || U_FAILURE(status)) {
155 delete newObj;
156 return NULL;
157 }
158 UnicodeString locRule = newObj->getRuleFromResource(locale, status);
159 if ((locRule.length() != 0) && U_SUCCESS(status)) {
160 newObj->parseDescription(locRule, rChain, status);
161 if (U_SUCCESS(status)) {
162 newObj->addRules(rChain);
163 }
164 }
165 if (U_FAILURE(status)||(locRule.length() == 0)) {
166 // use default plural rule
167 status = U_ZERO_ERROR;
168 UnicodeString defRule = UnicodeString(PLURAL_DEFAULT_RULE);
169 newObj->parseDescription(defRule, rChain, status);
170 newObj->addRules(rChain);
171 }
172
173 return newObj;
174 }
175
176 UnicodeString
select(int32_t number) const177 PluralRules::select(int32_t number) const {
178 if (mRules == NULL) {
179 return PLURAL_DEFAULT_RULE;
180 }
181 else {
182 return mRules->select(number);
183 }
184 }
185
186 UnicodeString
select(double number) const187 PluralRules::select(double number) const {
188 if (mRules == NULL) {
189 return PLURAL_DEFAULT_RULE;
190 }
191 else {
192 return mRules->select(number);
193 }
194 }
195
196 StringEnumeration*
getKeywords(UErrorCode & status) const197 PluralRules::getKeywords(UErrorCode& status) const {
198 if (U_FAILURE(status)) return NULL;
199 StringEnumeration* nameEnumerator = new PluralKeywordEnumeration(mRules, status);
200 if (U_FAILURE(status)) {
201 delete nameEnumerator;
202 return NULL;
203 }
204
205 return nameEnumerator;
206 }
207
208 double
getUniqueKeywordValue(const UnicodeString & keyword)209 PluralRules::getUniqueKeywordValue(const UnicodeString& keyword) {
210 double val = 0.0;
211 UErrorCode status = U_ZERO_ERROR;
212 int32_t count = getSamplesInternal(keyword, &val, 1, FALSE, status);
213 return count == 1 ? val : UPLRULES_NO_UNIQUE_VALUE;
214 }
215
216 int32_t
getAllKeywordValues(const UnicodeString & keyword,double * dest,int32_t destCapacity,UErrorCode & error)217 PluralRules::getAllKeywordValues(const UnicodeString &keyword, double *dest,
218 int32_t destCapacity, UErrorCode& error) {
219 return getSamplesInternal(keyword, dest, destCapacity, FALSE, error);
220 }
221
222 int32_t
getSamples(const UnicodeString & keyword,double * dest,int32_t destCapacity,UErrorCode & status)223 PluralRules::getSamples(const UnicodeString &keyword, double *dest,
224 int32_t destCapacity, UErrorCode& status) {
225 return getSamplesInternal(keyword, dest, destCapacity, TRUE, status);
226 }
227
228 int32_t
getSamplesInternal(const UnicodeString & keyword,double * dest,int32_t destCapacity,UBool includeUnlimited,UErrorCode & status)229 PluralRules::getSamplesInternal(const UnicodeString &keyword, double *dest,
230 int32_t destCapacity, UBool includeUnlimited,
231 UErrorCode& status) {
232 initSamples(status);
233 if (U_FAILURE(status)) {
234 return -1;
235 }
236 if (destCapacity < 0 || (dest == NULL && destCapacity > 0)) {
237 status = U_ILLEGAL_ARGUMENT_ERROR;
238 return -1;
239 }
240
241 int32_t index = getKeywordIndex(keyword, status);
242 if (index == -1) {
243 return 0;
244 }
245
246 const int32_t LIMIT_MASK = 0x1 << 31;
247
248 if (!includeUnlimited) {
249 if ((mSampleInfo[index] & LIMIT_MASK) == 0) {
250 return -1;
251 }
252 }
253
254 int32_t start = index == 0 ? 0 : mSampleInfo[index - 1] & ~LIMIT_MASK;
255 int32_t limit = mSampleInfo[index] & ~LIMIT_MASK;
256 int32_t len = limit - start;
257 if (len <= destCapacity) {
258 destCapacity = len;
259 } else if (includeUnlimited) {
260 len = destCapacity; // no overflow, and don't report more than we copy
261 } else {
262 status = U_BUFFER_OVERFLOW_ERROR;
263 return len;
264 }
265 for (int32_t i = 0; i < destCapacity; ++i, ++start) {
266 dest[i] = mSamples[start];
267 }
268 return len;
269 }
270
271
272 UBool
isKeyword(const UnicodeString & keyword) const273 PluralRules::isKeyword(const UnicodeString& keyword) const {
274 if ( keyword == PLURAL_KEYWORD_OTHER ) {
275 return true;
276 }
277 else {
278 if (mRules==NULL) {
279 return false;
280 }
281 else {
282 return mRules->isKeyword(keyword);
283 }
284 }
285 }
286
287 UnicodeString
getKeywordOther() const288 PluralRules::getKeywordOther() const {
289 return PLURAL_KEYWORD_OTHER;
290 }
291
292 UBool
operator ==(const PluralRules & other) const293 PluralRules::operator==(const PluralRules& other) const {
294 int32_t limit;
295 const UnicodeString *ptrKeyword;
296 UErrorCode status= U_ZERO_ERROR;
297
298 if ( this == &other ) {
299 return TRUE;
300 }
301 LocalPointer<StringEnumeration> myKeywordList(getKeywords(status));
302 LocalPointer<StringEnumeration> otherKeywordList(other.getKeywords(status));
303 if (U_FAILURE(status)) {
304 return FALSE;
305 }
306
307 if (myKeywordList->count(status)!=otherKeywordList->count(status)) {
308 return FALSE;
309 }
310 myKeywordList->reset(status);
311 while ((ptrKeyword=myKeywordList->snext(status))!=NULL) {
312 if (!other.isKeyword(*ptrKeyword)) {
313 return FALSE;
314 }
315 }
316 otherKeywordList->reset(status);
317 while ((ptrKeyword=otherKeywordList->snext(status))!=NULL) {
318 if (!this->isKeyword(*ptrKeyword)) {
319 return FALSE;
320 }
321 }
322 if (U_FAILURE(status)) {
323 return FALSE;
324 }
325
326 if ((limit=this->getRepeatLimit()) != other.getRepeatLimit()) {
327 return FALSE;
328 }
329 UnicodeString myKeyword, otherKeyword;
330 for (int32_t i=0; i<limit; ++i) {
331 myKeyword = this->select(i);
332 otherKeyword = other.select(i);
333 if (myKeyword!=otherKeyword) {
334 return FALSE;
335 }
336 }
337 return TRUE;
338 }
339
340 void
parseDescription(UnicodeString & data,RuleChain & rules,UErrorCode & status)341 PluralRules::parseDescription(UnicodeString& data, RuleChain& rules, UErrorCode &status)
342 {
343 int32_t ruleIndex=0;
344 UnicodeString token;
345 tokenType type;
346 tokenType prevType=none;
347 RuleChain *ruleChain=NULL;
348 AndConstraint *curAndConstraint=NULL;
349 OrConstraint *orNode=NULL;
350 RuleChain *lastChain=NULL;
351
352 if (U_FAILURE(status)) {
353 return;
354 }
355 UnicodeString ruleData = data.toLower();
356 while (ruleIndex< ruleData.length()) {
357 mParser->getNextToken(ruleData, &ruleIndex, token, type, status);
358 if (U_FAILURE(status)) {
359 return;
360 }
361 mParser->checkSyntax(prevType, type, status);
362 if (U_FAILURE(status)) {
363 return;
364 }
365 switch (type) {
366 case tAnd:
367 curAndConstraint = curAndConstraint->add();
368 break;
369 case tOr:
370 lastChain = &rules;
371 while (lastChain->next !=NULL) {
372 lastChain = lastChain->next;
373 }
374 orNode=lastChain->ruleHeader;
375 while (orNode->next != NULL) {
376 orNode = orNode->next;
377 }
378 orNode->next= new OrConstraint();
379 orNode=orNode->next;
380 orNode->next=NULL;
381 curAndConstraint = orNode->add();
382 break;
383 case tIs:
384 curAndConstraint->rangeHigh=-1;
385 break;
386 case tNot:
387 curAndConstraint->notIn=TRUE;
388 break;
389 case tIn:
390 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
391 curAndConstraint->integerOnly = TRUE;
392 break;
393 case tWithin:
394 curAndConstraint->rangeHigh=PLURAL_RANGE_HIGH;
395 break;
396 case tNumber:
397 if ( (curAndConstraint->op==AndConstraint::MOD)&&
398 (curAndConstraint->opNum == -1 ) ) {
399 curAndConstraint->opNum=getNumberValue(token);
400 }
401 else {
402 if (curAndConstraint->rangeLow == -1) {
403 curAndConstraint->rangeLow=getNumberValue(token);
404 }
405 else {
406 curAndConstraint->rangeHigh=getNumberValue(token);
407 }
408 }
409 break;
410 case tMod:
411 curAndConstraint->op=AndConstraint::MOD;
412 break;
413 case tKeyword:
414 if (ruleChain==NULL) {
415 ruleChain = &rules;
416 }
417 else {
418 while (ruleChain->next!=NULL){
419 ruleChain=ruleChain->next;
420 }
421 ruleChain=ruleChain->next=new RuleChain();
422 }
423 if (ruleChain->ruleHeader != NULL) {
424 delete ruleChain->ruleHeader;
425 }
426 orNode = ruleChain->ruleHeader = new OrConstraint();
427 curAndConstraint = orNode->add();
428 ruleChain->keyword = token;
429 break;
430 default:
431 break;
432 }
433 prevType=type;
434 }
435 }
436
437 int32_t
getNumberValue(const UnicodeString & token) const438 PluralRules::getNumberValue(const UnicodeString& token) const {
439 int32_t i;
440 char digits[128];
441
442 i = token.extract(0, token.length(), digits, ARRAY_SIZE(digits), US_INV);
443 digits[i]='\0';
444
445 return((int32_t)atoi(digits));
446 }
447
448
449 void
getNextLocale(const UnicodeString & localeData,int32_t * curIndex,UnicodeString & localeName)450 PluralRules::getNextLocale(const UnicodeString& localeData, int32_t* curIndex, UnicodeString& localeName) {
451 int32_t i=*curIndex;
452
453 localeName.remove();
454 while (i< localeData.length()) {
455 if ( (localeData.charAt(i)!= SPACE) && (localeData.charAt(i)!= COMMA) ) {
456 break;
457 }
458 i++;
459 }
460
461 while (i< localeData.length()) {
462 if ( (localeData.charAt(i)== SPACE) || (localeData.charAt(i)== COMMA) ) {
463 break;
464 }
465 localeName+=localeData.charAt(i++);
466 }
467 *curIndex=i;
468 }
469
470
471 int32_t
getRepeatLimit() const472 PluralRules::getRepeatLimit() const {
473 if (mRules!=NULL) {
474 return mRules->getRepeatLimit();
475 }
476 else {
477 return 0;
478 }
479 }
480
481 int32_t
getKeywordIndex(const UnicodeString & keyword,UErrorCode & status) const482 PluralRules::getKeywordIndex(const UnicodeString& keyword,
483 UErrorCode& status) const {
484 if (U_SUCCESS(status)) {
485 int32_t n = 0;
486 RuleChain* rc = mRules;
487 while (rc != NULL) {
488 if (rc->ruleHeader != NULL) {
489 if (rc->keyword == keyword) {
490 return n;
491 }
492 ++n;
493 }
494 rc = rc->next;
495 }
496 if (keyword == PLURAL_KEYWORD_OTHER) {
497 return n;
498 }
499 }
500 return -1;
501 }
502
503 typedef struct SampleRecord {
504 int32_t ruleIndex;
505 double value;
506 } SampleRecord;
507
508 void
initSamples(UErrorCode & status)509 PluralRules::initSamples(UErrorCode& status) {
510 if (U_FAILURE(status)) {
511 return;
512 }
513 Mutex lock(&pluralMutex);
514
515 if (mSamples) {
516 return;
517 }
518
519 // Note, the original design let you have multiple rules with the same keyword. But
520 // we don't use that in our data and existing functions in this implementation don't
521 // fully support it (for example, the returned keywords is a list and not a set).
522 //
523 // So I don't support this here either. If you ask for samples, or for all values,
524 // you will get information about the first rule with that keyword, not all rules with
525 // that keyword.
526
527 int32_t maxIndex = 0;
528 int32_t otherIndex = -1; // the value -1 will indicate we added 'other' at end
529 RuleChain* rc = mRules;
530 while (rc != NULL) {
531 if (rc->ruleHeader != NULL) {
532 if (otherIndex == -1 && rc->keyword == PLURAL_KEYWORD_OTHER) {
533 otherIndex = maxIndex;
534 }
535 ++maxIndex;
536 }
537 rc = rc->next;
538 }
539 if (otherIndex == -1) {
540 ++maxIndex;
541 }
542
543 LocalMemory<int32_t> newSampleInfo;
544 if (NULL == newSampleInfo.allocateInsteadAndCopy(maxIndex)) {
545 status = U_MEMORY_ALLOCATION_ERROR;
546 return;
547 }
548
549 const int32_t LIMIT_MASK = 0x1 << 31;
550
551 rc = mRules;
552 int32_t n = 0;
553 while (rc != NULL) {
554 if (rc->ruleHeader != NULL) {
555 newSampleInfo[n++] = rc->ruleHeader->isLimited() ? LIMIT_MASK : 0;
556 }
557 rc = rc->next;
558 }
559 if (otherIndex == -1) {
560 newSampleInfo[maxIndex - 1] = 0; // unlimited
561 }
562
563 MaybeStackArray<SampleRecord, 10> newSamples;
564 int32_t sampleCount = 0;
565
566 int32_t limit = getRepeatLimit() * MAX_SAMPLES * 2;
567 if (limit < 10) {
568 limit = 10;
569 }
570
571 for (int i = 0, keywordsRemaining = maxIndex;
572 keywordsRemaining > 0 && i < limit;
573 ++i) {
574 double val = i / 2.0;
575
576 n = 0;
577 rc = mRules;
578 int32_t found = -1;
579 while (rc != NULL) {
580 if (rc->ruleHeader != NULL) {
581 if (rc->ruleHeader->isFulfilled(val)) {
582 found = n;
583 break;
584 }
585 ++n;
586 }
587 rc = rc->next;
588 }
589 if (found == -1) {
590 // 'other'. If there is an 'other' rule, the rule set is bad since nothing
591 // should leak through, but we don't bother to report that here.
592 found = otherIndex == -1 ? maxIndex - 1 : otherIndex;
593 }
594 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
595 continue;
596 }
597 newSampleInfo[found] += 1; // won't impact limit flag
598
599 if (sampleCount == newSamples.getCapacity()) {
600 int32_t newCapacity = sampleCount < 20 ? 128 : sampleCount * 2;
601 if (NULL == newSamples.resize(newCapacity, sampleCount)) {
602 status = U_MEMORY_ALLOCATION_ERROR;
603 return;
604 }
605 }
606 newSamples[sampleCount].ruleIndex = found;
607 newSamples[sampleCount].value = val;
608 ++sampleCount;
609
610 if (newSampleInfo[found] == MAX_SAMPLES) { // limit flag not set
611 --keywordsRemaining;
612 }
613 }
614
615 // sort the values by index, leaving order otherwise unchanged
616 // this is just a selection sort for simplicity
617 LocalMemory<double> values;
618 if (NULL == values.allocateInsteadAndCopy(sampleCount)) {
619 status = U_MEMORY_ALLOCATION_ERROR;
620 return;
621 }
622 for (int i = 0, j = 0; i < maxIndex; ++i) {
623 for (int k = 0; k < sampleCount; ++k) {
624 if (newSamples[k].ruleIndex == i) {
625 values[j++] = newSamples[k].value;
626 }
627 }
628 }
629
630 // convert array of mask/lengths to array of mask/limits
631 limit = 0;
632 for (int i = 0; i < maxIndex; ++i) {
633 int32_t info = newSampleInfo[i];
634 int32_t len = info & ~LIMIT_MASK;
635 limit += len;
636 // if a rule is 'unlimited' but has fewer than MAX_SAMPLES samples,
637 // it's not really unlimited, so mark it as limited
638 int32_t mask = len < MAX_SAMPLES ? LIMIT_MASK : info & LIMIT_MASK;
639 newSampleInfo[i] = limit | mask;
640 }
641
642 // ok, we've got good data
643 mSamples = values.orphan();
644 mSampleInfo = newSampleInfo.orphan();
645 mSampleInfoCount = maxIndex;
646 }
647
648 void
addRules(RuleChain & rules)649 PluralRules::addRules(RuleChain& rules) {
650 RuleChain *newRule = new RuleChain(rules);
651 this->mRules=newRule;
652 newRule->setRepeatLimit();
653 }
654
655 UnicodeString
getRuleFromResource(const Locale & locale,UErrorCode & errCode)656 PluralRules::getRuleFromResource(const Locale& locale, UErrorCode& errCode) {
657 UnicodeString emptyStr;
658
659 if (U_FAILURE(errCode)) {
660 return emptyStr;
661 }
662 UResourceBundle *rb=ures_openDirect(NULL, "plurals", &errCode);
663 if(U_FAILURE(errCode)) {
664 /* total failure, not even root could be opened */
665 return emptyStr;
666 }
667 UResourceBundle *locRes=ures_getByKey(rb, "locales", NULL, &errCode);
668 if(U_FAILURE(errCode)) {
669 ures_close(rb);
670 return emptyStr;
671 }
672 int32_t resLen=0;
673 const char *curLocaleName=locale.getName();
674 const UChar* s = ures_getStringByKey(locRes, curLocaleName, &resLen, &errCode);
675
676 if (s == NULL) {
677 // Check parent locales.
678 UErrorCode status = U_ZERO_ERROR;
679 char parentLocaleName[ULOC_FULLNAME_CAPACITY];
680 const char *curLocaleName=locale.getName();
681 int32_t localeNameLen=0;
682 uprv_strcpy(parentLocaleName, curLocaleName);
683
684 while ((localeNameLen=uloc_getParent(parentLocaleName, parentLocaleName,
685 ULOC_FULLNAME_CAPACITY, &status)) > 0) {
686 resLen=0;
687 s = ures_getStringByKey(locRes, parentLocaleName, &resLen, &status);
688 if (s != NULL) {
689 errCode = U_ZERO_ERROR;
690 break;
691 }
692 status = U_ZERO_ERROR;
693 }
694 }
695 if (s==NULL) {
696 ures_close(locRes);
697 ures_close(rb);
698 return emptyStr;
699 }
700
701 char setKey[256];
702 UChar result[256];
703 u_UCharsToChars(s, setKey, resLen + 1);
704 // printf("\n PluralRule: %s\n", setKey);
705
706
707 UResourceBundle *ruleRes=ures_getByKey(rb, "rules", NULL, &errCode);
708 if(U_FAILURE(errCode)) {
709 ures_close(locRes);
710 ures_close(rb);
711 return emptyStr;
712 }
713 resLen=0;
714 UResourceBundle *setRes = ures_getByKey(ruleRes, setKey, NULL, &errCode);
715 if (U_FAILURE(errCode)) {
716 ures_close(ruleRes);
717 ures_close(locRes);
718 ures_close(rb);
719 return emptyStr;
720 }
721
722 int32_t numberKeys = ures_getSize(setRes);
723 char *key=NULL;
724 int32_t len=0;
725 for(int32_t i=0; i<numberKeys; ++i) {
726 int32_t keyLen;
727 resLen=0;
728 s=ures_getNextString(setRes, &resLen, (const char**)&key, &errCode);
729 keyLen = (int32_t)uprv_strlen(key);
730 u_charsToUChars(key, result+len, keyLen);
731 len += keyLen;
732 result[len++]=COLON;
733 uprv_memcpy(result+len, s, resLen*sizeof(UChar));
734 len += resLen;
735 result[len++]=SEMI_COLON;
736 }
737 result[len++]=0;
738 u_UCharsToChars(result, setKey, len);
739 // printf(" Rule: %s\n", setKey);
740
741 ures_close(setRes);
742 ures_close(ruleRes);
743 ures_close(locRes);
744 ures_close(rb);
745 return UnicodeString(result);
746 }
747
AndConstraint()748 AndConstraint::AndConstraint() {
749 op = AndConstraint::NONE;
750 opNum=-1;
751 rangeLow=-1;
752 rangeHigh=-1;
753 notIn=FALSE;
754 integerOnly=FALSE;
755 next=NULL;
756 }
757
758
AndConstraint(const AndConstraint & other)759 AndConstraint::AndConstraint(const AndConstraint& other) {
760 this->op = other.op;
761 this->opNum=other.opNum;
762 this->rangeLow=other.rangeLow;
763 this->rangeHigh=other.rangeHigh;
764 this->integerOnly=other.integerOnly;
765 this->notIn=other.notIn;
766 if (other.next==NULL) {
767 this->next=NULL;
768 }
769 else {
770 this->next = new AndConstraint(*other.next);
771 }
772 }
773
~AndConstraint()774 AndConstraint::~AndConstraint() {
775 if (next!=NULL) {
776 delete next;
777 }
778 }
779
780
781 UBool
isFulfilled(double number)782 AndConstraint::isFulfilled(double number) {
783 UBool result=TRUE;
784 double value=number;
785
786 // arrrrrrgh
787 if ((rangeHigh == -1 || integerOnly) && number != uprv_floor(number)) {
788 return notIn;
789 }
790
791 if ( op == MOD ) {
792 value = (int32_t)value % opNum;
793 }
794 if ( rangeHigh == -1 ) {
795 if ( rangeLow == -1 ) {
796 result = TRUE; // empty rule
797 }
798 else {
799 if ( value == rangeLow ) {
800 result = TRUE;
801 }
802 else {
803 result = FALSE;
804 }
805 }
806 }
807 else {
808 if ((rangeLow <= value) && (value <= rangeHigh)) {
809 if (integerOnly) {
810 if ( value != (int32_t)value) {
811 result = FALSE;
812 }
813 else {
814 result = TRUE;
815 }
816 }
817 else {
818 result = TRUE;
819 }
820 }
821 else {
822 result = FALSE;
823 }
824 }
825 if (notIn) {
826 return !result;
827 }
828 else {
829 return result;
830 }
831 }
832
833 UBool
isLimited()834 AndConstraint::isLimited() {
835 return (rangeHigh == -1 || integerOnly) && !notIn && op != MOD;
836 }
837
838 int32_t
updateRepeatLimit(int32_t maxLimit)839 AndConstraint::updateRepeatLimit(int32_t maxLimit) {
840
841 if ( op == MOD ) {
842 return uprv_max(opNum, maxLimit);
843 }
844 else {
845 if ( rangeHigh == -1 ) {
846 return uprv_max(rangeLow, maxLimit);
847 }
848 else{
849 return uprv_max(rangeHigh, maxLimit);
850 }
851 }
852 }
853
854
855 AndConstraint*
add()856 AndConstraint::add()
857 {
858 this->next = new AndConstraint();
859 return this->next;
860 }
861
OrConstraint()862 OrConstraint::OrConstraint() {
863 childNode=NULL;
864 next=NULL;
865 }
866
OrConstraint(const OrConstraint & other)867 OrConstraint::OrConstraint(const OrConstraint& other) {
868 if ( other.childNode == NULL ) {
869 this->childNode = NULL;
870 }
871 else {
872 this->childNode = new AndConstraint(*(other.childNode));
873 }
874 if (other.next == NULL ) {
875 this->next = NULL;
876 }
877 else {
878 this->next = new OrConstraint(*(other.next));
879 }
880 }
881
~OrConstraint()882 OrConstraint::~OrConstraint() {
883 if (childNode!=NULL) {
884 delete childNode;
885 }
886 if (next!=NULL) {
887 delete next;
888 }
889 }
890
891 AndConstraint*
add()892 OrConstraint::add()
893 {
894 OrConstraint *curOrConstraint=this;
895 {
896 while (curOrConstraint->next!=NULL) {
897 curOrConstraint = curOrConstraint->next;
898 }
899 curOrConstraint->next = NULL;
900 curOrConstraint->childNode = new AndConstraint();
901 }
902 return curOrConstraint->childNode;
903 }
904
905 UBool
isFulfilled(double number)906 OrConstraint::isFulfilled(double number) {
907 OrConstraint* orRule=this;
908 UBool result=FALSE;
909
910 while (orRule!=NULL && !result) {
911 result=TRUE;
912 AndConstraint* andRule = orRule->childNode;
913 while (andRule!=NULL && result) {
914 result = andRule->isFulfilled(number);
915 andRule=andRule->next;
916 }
917 orRule = orRule->next;
918 }
919
920 return result;
921 }
922
923 UBool
isLimited()924 OrConstraint::isLimited() {
925 for (OrConstraint *orc = this; orc != NULL; orc = orc->next) {
926 UBool result = FALSE;
927 for (AndConstraint *andc = orc->childNode; andc != NULL; andc = andc->next) {
928 if (andc->isLimited()) {
929 result = TRUE;
930 break;
931 }
932 }
933 if (result == FALSE) {
934 return FALSE;
935 }
936 }
937 return TRUE;
938 }
939
RuleChain()940 RuleChain::RuleChain() {
941 ruleHeader=NULL;
942 next = NULL;
943 repeatLimit=0;
944 }
945
RuleChain(const RuleChain & other)946 RuleChain::RuleChain(const RuleChain& other) {
947 this->repeatLimit = other.repeatLimit;
948 this->keyword=other.keyword;
949 if (other.ruleHeader != NULL) {
950 this->ruleHeader = new OrConstraint(*(other.ruleHeader));
951 }
952 else {
953 this->ruleHeader = NULL;
954 }
955 if (other.next != NULL ) {
956 this->next = new RuleChain(*other.next);
957 }
958 else
959 {
960 this->next = NULL;
961 }
962 }
963
~RuleChain()964 RuleChain::~RuleChain() {
965 if (next != NULL) {
966 delete next;
967 }
968 if ( ruleHeader != NULL ) {
969 delete ruleHeader;
970 }
971 }
972
973 UnicodeString
select(double number) const974 RuleChain::select(double number) const {
975
976 if ( ruleHeader != NULL ) {
977 if (ruleHeader->isFulfilled(number)) {
978 return keyword;
979 }
980 }
981 if ( next != NULL ) {
982 return next->select(number);
983 }
984 else {
985 return PLURAL_KEYWORD_OTHER;
986 }
987
988 }
989
990 void
dumpRules(UnicodeString & result)991 RuleChain::dumpRules(UnicodeString& result) {
992 UChar digitString[16];
993
994 if ( ruleHeader != NULL ) {
995 result += keyword;
996 OrConstraint* orRule=ruleHeader;
997 while ( orRule != NULL ) {
998 AndConstraint* andRule=orRule->childNode;
999 while ( andRule != NULL ) {
1000 if ( (andRule->op==AndConstraint::NONE) && (andRule->rangeHigh==-1) ) {
1001 result += UNICODE_STRING_SIMPLE(" n is ");
1002 if (andRule->notIn) {
1003 result += UNICODE_STRING_SIMPLE("not ");
1004 }
1005 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1006 result += UnicodeString(digitString);
1007 }
1008 else {
1009 if (andRule->op==AndConstraint::MOD) {
1010 result += UNICODE_STRING_SIMPLE(" n mod ");
1011 uprv_itou(digitString,16, andRule->opNum,10,0);
1012 result += UnicodeString(digitString);
1013 }
1014 else {
1015 result += UNICODE_STRING_SIMPLE(" n ");
1016 }
1017 if (andRule->rangeHigh==-1) {
1018 if (andRule->notIn) {
1019 result += UNICODE_STRING_SIMPLE(" is not ");
1020 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1021 result += UnicodeString(digitString);
1022 }
1023 else {
1024 result += UNICODE_STRING_SIMPLE(" is ");
1025 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1026 result += UnicodeString(digitString);
1027 }
1028 }
1029 else {
1030 if (andRule->notIn) {
1031 if ( andRule->integerOnly ) {
1032 result += UNICODE_STRING_SIMPLE(" not in ");
1033 }
1034 else {
1035 result += UNICODE_STRING_SIMPLE(" not within ");
1036 }
1037 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1038 result += UnicodeString(digitString);
1039 result += UNICODE_STRING_SIMPLE(" .. ");
1040 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1041 result += UnicodeString(digitString);
1042 }
1043 else {
1044 if ( andRule->integerOnly ) {
1045 result += UNICODE_STRING_SIMPLE(" in ");
1046 }
1047 else {
1048 result += UNICODE_STRING_SIMPLE(" within ");
1049 }
1050 uprv_itou(digitString,16, andRule->rangeLow,10,0);
1051 result += UnicodeString(digitString);
1052 result += UNICODE_STRING_SIMPLE(" .. ");
1053 uprv_itou(digitString,16, andRule->rangeHigh,10,0);
1054 }
1055 }
1056 }
1057 if ( (andRule=andRule->next) != NULL) {
1058 result += PK_AND;
1059 }
1060 }
1061 if ( (orRule = orRule->next) != NULL ) {
1062 result += PK_OR;
1063 }
1064 }
1065 }
1066 if ( next != NULL ) {
1067 next->dumpRules(result);
1068 }
1069 }
1070
1071 int32_t
getRepeatLimit()1072 RuleChain::getRepeatLimit () {
1073 return repeatLimit;
1074 }
1075
1076 void
setRepeatLimit()1077 RuleChain::setRepeatLimit () {
1078 int32_t limit=0;
1079
1080 if ( next != NULL ) {
1081 next->setRepeatLimit();
1082 limit = next->repeatLimit;
1083 }
1084
1085 if ( ruleHeader != NULL ) {
1086 OrConstraint* orRule=ruleHeader;
1087 while ( orRule != NULL ) {
1088 AndConstraint* andRule=orRule->childNode;
1089 while ( andRule != NULL ) {
1090 limit = andRule->updateRepeatLimit(limit);
1091 andRule = andRule->next;
1092 }
1093 orRule = orRule->next;
1094 }
1095 }
1096 repeatLimit = limit;
1097 }
1098
1099 UErrorCode
getKeywords(int32_t capacityOfKeywords,UnicodeString * keywords,int32_t & arraySize) const1100 RuleChain::getKeywords(int32_t capacityOfKeywords, UnicodeString* keywords, int32_t& arraySize) const {
1101 if ( arraySize < capacityOfKeywords-1 ) {
1102 keywords[arraySize++]=keyword;
1103 }
1104 else {
1105 return U_BUFFER_OVERFLOW_ERROR;
1106 }
1107
1108 if ( next != NULL ) {
1109 return next->getKeywords(capacityOfKeywords, keywords, arraySize);
1110 }
1111 else {
1112 return U_ZERO_ERROR;
1113 }
1114 }
1115
1116 UBool
isKeyword(const UnicodeString & keywordParam) const1117 RuleChain::isKeyword(const UnicodeString& keywordParam) const {
1118 if ( keyword == keywordParam ) {
1119 return TRUE;
1120 }
1121
1122 if ( next != NULL ) {
1123 return next->isKeyword(keywordParam);
1124 }
1125 else {
1126 return FALSE;
1127 }
1128 }
1129
1130
RuleParser()1131 RuleParser::RuleParser() {
1132 }
1133
~RuleParser()1134 RuleParser::~RuleParser() {
1135 }
1136
1137 void
checkSyntax(tokenType prevType,tokenType curType,UErrorCode & status)1138 RuleParser::checkSyntax(tokenType prevType, tokenType curType, UErrorCode &status)
1139 {
1140 if (U_FAILURE(status)) {
1141 return;
1142 }
1143 switch(prevType) {
1144 case none:
1145 case tSemiColon:
1146 if (curType!=tKeyword) {
1147 status = U_UNEXPECTED_TOKEN;
1148 }
1149 break;
1150 case tVariableN :
1151 if (curType != tIs && curType != tMod && curType != tIn &&
1152 curType != tNot && curType != tWithin) {
1153 status = U_UNEXPECTED_TOKEN;
1154 }
1155 break;
1156 case tZero:
1157 case tOne:
1158 case tTwo:
1159 case tFew:
1160 case tMany:
1161 case tOther:
1162 case tKeyword:
1163 if (curType != tColon) {
1164 status = U_UNEXPECTED_TOKEN;
1165 }
1166 break;
1167 case tColon :
1168 if (curType != tVariableN) {
1169 status = U_UNEXPECTED_TOKEN;
1170 }
1171 break;
1172 case tIs:
1173 if ( curType != tNumber && curType != tNot) {
1174 status = U_UNEXPECTED_TOKEN;
1175 }
1176 break;
1177 case tNot:
1178 if (curType != tNumber && curType != tIn && curType != tWithin) {
1179 status = U_UNEXPECTED_TOKEN;
1180 }
1181 break;
1182 case tMod:
1183 case tDot:
1184 case tIn:
1185 case tWithin:
1186 case tAnd:
1187 case tOr:
1188 if (curType != tNumber && curType != tVariableN) {
1189 status = U_UNEXPECTED_TOKEN;
1190 }
1191 break;
1192 case tNumber:
1193 if (curType != tDot && curType != tSemiColon && curType != tIs && curType != tNot &&
1194 curType != tIn && curType != tWithin && curType != tAnd && curType != tOr)
1195 {
1196 status = U_UNEXPECTED_TOKEN;
1197 }
1198 break;
1199 default:
1200 status = U_UNEXPECTED_TOKEN;
1201 break;
1202 }
1203 }
1204
1205 void
getNextToken(const UnicodeString & ruleData,int32_t * ruleIndex,UnicodeString & token,tokenType & type,UErrorCode & status)1206 RuleParser::getNextToken(const UnicodeString& ruleData,
1207 int32_t *ruleIndex,
1208 UnicodeString& token,
1209 tokenType& type,
1210 UErrorCode &status)
1211 {
1212 int32_t curIndex= *ruleIndex;
1213 UChar ch;
1214 tokenType prevType=none;
1215
1216 if (U_FAILURE(status)) {
1217 return;
1218 }
1219 while (curIndex<ruleData.length()) {
1220 ch = ruleData.charAt(curIndex);
1221 if ( !inRange(ch, type) ) {
1222 status = U_ILLEGAL_CHARACTER;
1223 return;
1224 }
1225 switch (type) {
1226 case tSpace:
1227 if ( *ruleIndex != curIndex ) { // letter
1228 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1229 *ruleIndex=curIndex;
1230 type=prevType;
1231 getKeyType(token, type, status);
1232 return;
1233 }
1234 else {
1235 *ruleIndex=*ruleIndex+1;
1236 }
1237 break; // consective space
1238 case tColon:
1239 case tSemiColon:
1240 if ( *ruleIndex != curIndex ) {
1241 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1242 *ruleIndex=curIndex;
1243 type=prevType;
1244 getKeyType(token, type, status);
1245 return;
1246 }
1247 else {
1248 *ruleIndex=curIndex+1;
1249 return;
1250 }
1251 case tLetter:
1252 if ((type==prevType)||(prevType==none)) {
1253 prevType=type;
1254 break;
1255 }
1256 break;
1257 case tNumber:
1258 if ((type==prevType)||(prevType==none)) {
1259 prevType=type;
1260 break;
1261 }
1262 else {
1263 *ruleIndex=curIndex+1;
1264 return;
1265 }
1266 case tDot:
1267 if (prevType==none) { // first dot
1268 prevType=type;
1269 continue;
1270 }
1271 else {
1272 if ( *ruleIndex != curIndex ) {
1273 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1274 *ruleIndex=curIndex; // letter
1275 type=prevType;
1276 getKeyType(token, type, status);
1277 return;
1278 }
1279 else { // two consective dots
1280 *ruleIndex=curIndex+2;
1281 return;
1282 }
1283 }
1284 break;
1285 default:
1286 status = U_UNEXPECTED_TOKEN;
1287 return;
1288 }
1289 curIndex++;
1290 }
1291 if ( curIndex>=ruleData.length() ) {
1292 if ( (type == tLetter)||(type == tNumber) ) {
1293 token=UnicodeString(ruleData, *ruleIndex, curIndex-*ruleIndex);
1294 getKeyType(token, type, status);
1295 if (U_FAILURE(status)) {
1296 return;
1297 }
1298 }
1299 *ruleIndex = ruleData.length();
1300 }
1301 }
1302
1303 UBool
inRange(UChar ch,tokenType & type)1304 RuleParser::inRange(UChar ch, tokenType& type) {
1305 if ((ch>=CAP_A) && (ch<=CAP_Z)) {
1306 // we assume all characters are in lower case already.
1307 return FALSE;
1308 }
1309 if ((ch>=LOW_A) && (ch<=LOW_Z)) {
1310 type = tLetter;
1311 return TRUE;
1312 }
1313 if ((ch>=U_ZERO) && (ch<=U_NINE)) {
1314 type = tNumber;
1315 return TRUE;
1316 }
1317 switch (ch) {
1318 case COLON:
1319 type = tColon;
1320 return TRUE;
1321 case SPACE:
1322 type = tSpace;
1323 return TRUE;
1324 case SEMI_COLON:
1325 type = tSemiColon;
1326 return TRUE;
1327 case DOT:
1328 type = tDot;
1329 return TRUE;
1330 default :
1331 type = none;
1332 return FALSE;
1333 }
1334 }
1335
1336
1337 void
getKeyType(const UnicodeString & token,tokenType & keyType,UErrorCode & status)1338 RuleParser::getKeyType(const UnicodeString& token, tokenType& keyType, UErrorCode &status)
1339 {
1340 if (U_FAILURE(status)) {
1341 return;
1342 }
1343 if ( keyType==tNumber) {
1344 }
1345 else if (token==PK_VAR_N) {
1346 keyType = tVariableN;
1347 }
1348 else if (token==PK_IS) {
1349 keyType = tIs;
1350 }
1351 else if (token==PK_AND) {
1352 keyType = tAnd;
1353 }
1354 else if (token==PK_IN) {
1355 keyType = tIn;
1356 }
1357 else if (token==PK_WITHIN) {
1358 keyType = tWithin;
1359 }
1360 else if (token==PK_NOT) {
1361 keyType = tNot;
1362 }
1363 else if (token==PK_MOD) {
1364 keyType = tMod;
1365 }
1366 else if (token==PK_OR) {
1367 keyType = tOr;
1368 }
1369 else if ( isValidKeyword(token) ) {
1370 keyType = tKeyword;
1371 }
1372 else {
1373 status = U_UNEXPECTED_TOKEN;
1374 }
1375 }
1376
1377 UBool
isValidKeyword(const UnicodeString & token)1378 RuleParser::isValidKeyword(const UnicodeString& token) {
1379 return PatternProps::isIdentifier(token.getBuffer(), token.length());
1380 }
1381
PluralKeywordEnumeration(RuleChain * header,UErrorCode & status)1382 PluralKeywordEnumeration::PluralKeywordEnumeration(RuleChain *header, UErrorCode& status)
1383 : pos(0), fKeywordNames(status) {
1384 if (U_FAILURE(status)) {
1385 return;
1386 }
1387 fKeywordNames.setDeleter(uhash_deleteUObject);
1388 UBool addKeywordOther=TRUE;
1389 RuleChain *node=header;
1390 while(node!=NULL) {
1391 fKeywordNames.addElement(new UnicodeString(node->keyword), status);
1392 if (U_FAILURE(status)) {
1393 return;
1394 }
1395 if (node->keyword == PLURAL_KEYWORD_OTHER) {
1396 addKeywordOther= FALSE;
1397 }
1398 node=node->next;
1399 }
1400
1401 if (addKeywordOther) {
1402 fKeywordNames.addElement(new UnicodeString(PLURAL_KEYWORD_OTHER), status);
1403 }
1404 }
1405
1406 const UnicodeString*
snext(UErrorCode & status)1407 PluralKeywordEnumeration::snext(UErrorCode& status) {
1408 if (U_SUCCESS(status) && pos < fKeywordNames.size()) {
1409 return (const UnicodeString*)fKeywordNames.elementAt(pos++);
1410 }
1411 return NULL;
1412 }
1413
1414 void
reset(UErrorCode &)1415 PluralKeywordEnumeration::reset(UErrorCode& /*status*/) {
1416 pos=0;
1417 }
1418
1419 int32_t
count(UErrorCode &) const1420 PluralKeywordEnumeration::count(UErrorCode& /*status*/) const {
1421 return fKeywordNames.size();
1422 }
1423
~PluralKeywordEnumeration()1424 PluralKeywordEnumeration::~PluralKeywordEnumeration() {
1425 }
1426
1427 U_NAMESPACE_END
1428
1429
1430 #endif /* #if !UCONFIG_NO_FORMATTING */
1431
1432 //eof
1433