• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *   Copyright (C) 1997-2015, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 ******************************************************************************
8 *   file name:  nfrs.cpp
9 *   encoding:   UTF-8
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 * Modification history
14 * Date        Name      Comments
15 * 10/11/2001  Doug      Ported from ICU4J
16 */
17 
18 #include "nfrs.h"
19 
20 #if U_HAVE_RBNF
21 
22 #include "unicode/uchar.h"
23 #include "nfrule.h"
24 #include "nfrlist.h"
25 #include "patternprops.h"
26 #include "putilimp.h"
27 
28 #ifdef RBNF_DEBUG
29 #include "cmemory.h"
30 #endif
31 
32 enum {
33     /** -x */
34     NEGATIVE_RULE_INDEX = 0,
35     /** x.x */
36     IMPROPER_FRACTION_RULE_INDEX = 1,
37     /** 0.x */
38     PROPER_FRACTION_RULE_INDEX = 2,
39     /** x.0 */
40     DEFAULT_RULE_INDEX = 3,
41     /** Inf */
42     INFINITY_RULE_INDEX = 4,
43     /** NaN */
44     NAN_RULE_INDEX = 5,
45     NON_NUMERICAL_RULE_LENGTH = 6
46 };
47 
48 U_NAMESPACE_BEGIN
49 
50 #if 0
51 // euclid's algorithm works with doubles
52 // note, doubles only get us up to one quadrillion or so, which
53 // isn't as much range as we get with longs.  We probably still
54 // want either 64-bit math, or BigInteger.
55 
56 static int64_t
57 util_lcm(int64_t x, int64_t y)
58 {
59     x.abs();
60     y.abs();
61 
62     if (x == 0 || y == 0) {
63         return 0;
64     } else {
65         do {
66             if (x < y) {
67                 int64_t t = x; x = y; y = t;
68             }
69             x -= y * (x/y);
70         } while (x != 0);
71 
72         return y;
73     }
74 }
75 
76 #else
77 /**
78  * Calculates the least common multiple of x and y.
79  */
80 static int64_t
util_lcm(int64_t x,int64_t y)81 util_lcm(int64_t x, int64_t y)
82 {
83     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
84     // vol. 2, 1st ed., pp. 298-299
85     int64_t x1 = x;
86     int64_t y1 = y;
87 
88     int p2 = 0;
89     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
90         ++p2;
91         x1 >>= 1;
92         y1 >>= 1;
93     }
94 
95     int64_t t;
96     if ((x1 & 1) == 1) {
97         t = -y1;
98     } else {
99         t = x1;
100     }
101 
102     while (t != 0) {
103         while ((t & 1) == 0) {
104             t = t >> 1;
105         }
106         if (t > 0) {
107             x1 = t;
108         } else {
109             y1 = -t;
110         }
111         t = x1 - y1;
112     }
113 
114     int64_t gcd = x1 << p2;
115 
116     // x * y == gcd(x, y) * lcm(x, y)
117     return x / gcd * y;
118 }
119 #endif
120 
121 static const char16_t gPercent = 0x0025;
122 static const char16_t gColon = 0x003a;
123 static const char16_t gSemicolon = 0x003b;
124 static const char16_t gLineFeed = 0x000a;
125 
126 static const char16_t gPercentPercent[] =
127 {
128     0x25, 0x25, 0
129 }; /* "%%" */
130 
131 static const char16_t gNoparse[] =
132 {
133     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
134 }; /* "@noparse" */
135 
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)136 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
137   : name()
138   , rules(0)
139   , owner(_owner)
140   , fractionRules()
141   , fIsFractionRuleSet(false)
142   , fIsPublic(false)
143   , fIsParseable(true)
144 {
145     for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
146         nonNumericalRules[i] = nullptr;
147     }
148 
149     if (U_FAILURE(status)) {
150         return;
151     }
152 
153     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
154 
155     if (description.length() == 0) {
156         // throw new IllegalArgumentException("Empty rule set description");
157         status = U_PARSE_ERROR;
158         return;
159     }
160 
161     // if the description begins with a rule set name (the rule set
162     // name can be omitted in formatter descriptions that consist
163     // of only one rule set), copy it out into our "name" member
164     // and delete it from the description
165     if (description.charAt(0) == gPercent) {
166         int32_t pos = description.indexOf(gColon);
167         if (pos == -1) {
168             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
169             status = U_PARSE_ERROR;
170         } else {
171             name.setTo(description, 0, pos);
172             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
173             }
174             description.remove(0, pos);
175         }
176     } else {
177         name.setTo(UNICODE_STRING_SIMPLE("%default"));
178     }
179 
180     if (description.length() == 0) {
181         // throw new IllegalArgumentException("Empty rule set description");
182         status = U_PARSE_ERROR;
183     }
184 
185     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
186 
187     if ( name.endsWith(gNoparse,8) ) {
188         fIsParseable = false;
189         name.truncate(name.length()-8); // remove the @noparse from the name
190     }
191 
192     // all of the other members of NFRuleSet are initialized
193     // by parseRules()
194 }
195 
196 void
parseRules(UnicodeString & description,UErrorCode & status)197 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
198 {
199     // start by creating a Vector whose elements are Strings containing
200     // the descriptions of the rules (one rule per element).  The rules
201     // are separated by semicolons (there's no escape facility: ALL
202     // semicolons are rule delimiters)
203 
204     if (U_FAILURE(status)) {
205         return;
206     }
207 
208     // ensure we are starting with an empty rule list
209     rules.deleteAll();
210 
211     // dlf - the original code kept a separate description array for no reason,
212     // so I got rid of it.  The loop was too complex so I simplified it.
213 
214     UnicodeString currentDescription;
215     int32_t oldP = 0;
216     while (oldP < description.length()) {
217         int32_t p = description.indexOf(gSemicolon, oldP);
218         if (p == -1) {
219             p = description.length();
220         }
221         currentDescription.setTo(description, oldP, p - oldP);
222         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
223         oldP = p + 1;
224     }
225 
226     // for rules that didn't specify a base value, their base values
227     // were initialized to 0.  Make another pass through the list and
228     // set all those rules' base values.  We also remove any special
229     // rules from the list and put them into their own member variables
230     int64_t defaultBaseValue = 0;
231 
232     // (this isn't a for loop because we might be deleting items from
233     // the vector-- we want to make sure we only increment i when
234     // we _didn't_ delete anything from the vector)
235     int32_t rulesSize = rules.size();
236     for (int32_t i = 0; i < rulesSize; i++) {
237         NFRule* rule = rules[i];
238         int64_t baseValue = rule->getBaseValue();
239 
240         if (baseValue == 0) {
241             // if the rule's base value is 0, fill in a default
242             // base value (this will be 1 plus the preceding
243             // rule's base value for regular rule sets, and the
244             // same as the preceding rule's base value in fraction
245             // rule sets)
246             rule->setBaseValue(defaultBaseValue, status);
247         }
248         else {
249             // if it's a regular rule that already knows its base value,
250             // check to make sure the rules are in order, and update
251             // the default base value for the next rule
252             if (baseValue < defaultBaseValue) {
253                 // throw new IllegalArgumentException("Rules are not in order");
254                 status = U_PARSE_ERROR;
255                 return;
256             }
257             defaultBaseValue = baseValue;
258         }
259         if (!fIsFractionRuleSet) {
260             ++defaultBaseValue;
261         }
262     }
263 }
264 
265 /**
266  * Set one of the non-numerical rules.
267  * @param rule The rule to set.
268  */
setNonNumericalRule(NFRule * rule)269 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
270     switch (rule->getBaseValue()) {
271         case NFRule::kNegativeNumberRule:
272             delete nonNumericalRules[NEGATIVE_RULE_INDEX];
273             nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
274             return;
275         case NFRule::kImproperFractionRule:
276             setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, true);
277             return;
278         case NFRule::kProperFractionRule:
279             setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, true);
280             return;
281         case NFRule::kDefaultRule:
282             setBestFractionRule(DEFAULT_RULE_INDEX, rule, true);
283             return;
284         case NFRule::kInfinityRule:
285             delete nonNumericalRules[INFINITY_RULE_INDEX];
286             nonNumericalRules[INFINITY_RULE_INDEX] = rule;
287             return;
288         case NFRule::kNaNRule:
289             delete nonNumericalRules[NAN_RULE_INDEX];
290             nonNumericalRules[NAN_RULE_INDEX] = rule;
291             return;
292         case NFRule::kNoBase:
293         case NFRule::kOtherRule:
294         default:
295             // If we do not remember the rule inside the object.
296             // delete it here to prevent memory leak.
297             delete rule;
298             return;
299     }
300 }
301 
302 /**
303  * Determine the best fraction rule to use. Rules matching the decimal point from
304  * DecimalFormatSymbols become the main set of rules to use.
305  * @param originalIndex The index into nonNumericalRules
306  * @param newRule The new rule to consider
307  * @param rememberRule Should the new rule be added to fractionRules.
308  */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)309 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
310     if (rememberRule) {
311         fractionRules.add(newRule);
312     }
313     NFRule *bestResult = nonNumericalRules[originalIndex];
314     if (bestResult == nullptr) {
315         nonNumericalRules[originalIndex] = newRule;
316     }
317     else {
318         // We have more than one. Which one is better?
319         const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
320         if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
321             == newRule->getDecimalPoint())
322         {
323             nonNumericalRules[originalIndex] = newRule;
324         }
325         // else leave it alone
326     }
327 }
328 
~NFRuleSet()329 NFRuleSet::~NFRuleSet()
330 {
331     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
332         if (i != IMPROPER_FRACTION_RULE_INDEX
333             && i != PROPER_FRACTION_RULE_INDEX
334             && i != DEFAULT_RULE_INDEX)
335         {
336             delete nonNumericalRules[i];
337         }
338         // else it will be deleted via NFRuleList fractionRules
339     }
340 }
341 
342 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)343 util_equalRules(const NFRule* rule1, const NFRule* rule2)
344 {
345     if (rule1) {
346         if (rule2) {
347             return *rule1 == *rule2;
348         }
349     } else if (!rule2) {
350         return true;
351     }
352     return false;
353 }
354 
355 bool
operator ==(const NFRuleSet & rhs) const356 NFRuleSet::operator==(const NFRuleSet& rhs) const
357 {
358     if (rules.size() == rhs.rules.size() &&
359         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
360         name == rhs.name) {
361 
362         // ...then compare the non-numerical rule lists...
363         for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
364             if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
365                 return false;
366             }
367         }
368 
369         // ...then compare the rule lists...
370         for (uint32_t i = 0; i < rules.size(); ++i) {
371             if (*rules[i] != *rhs.rules[i]) {
372                 return false;
373             }
374         }
375         return true;
376     }
377     return false;
378 }
379 
380 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)381 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
382     for (uint32_t i = 0; i < rules.size(); ++i) {
383         rules[i]->setDecimalFormatSymbols(newSymbols, status);
384     }
385     // Switch the fraction rules to mirror the DecimalFormatSymbols.
386     for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= DEFAULT_RULE_INDEX; nonNumericalIdx++) {
387         if (nonNumericalRules[nonNumericalIdx]) {
388             for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
389                 NFRule *fractionRule = fractionRules[fIdx];
390                 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
391                     setBestFractionRule(nonNumericalIdx, fractionRule, false);
392                 }
393             }
394         }
395     }
396 
397     for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
398         NFRule *rule = nonNumericalRules[nnrIdx];
399         if (rule) {
400             rule->setDecimalFormatSymbols(newSymbols, status);
401         }
402     }
403 }
404 
405 #define RECURSION_LIMIT 64
406 
407 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const408 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
409 {
410     if (recursionCount >= RECURSION_LIMIT) {
411         // stop recursion
412         status = U_INVALID_STATE_ERROR;
413         return;
414     }
415     const NFRule *rule = findNormalRule(number);
416     if (rule) { // else error, but can't report it
417         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
418     }
419 }
420 
421 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const422 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
423 {
424     if (recursionCount >= RECURSION_LIMIT) {
425         // stop recursion
426         status = U_INVALID_STATE_ERROR;
427         return;
428     }
429     const NFRule *rule = findDoubleRule(number);
430     if (rule) { // else error, but can't report it
431         rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
432     }
433 }
434 
435 const NFRule*
findDoubleRule(double number) const436 NFRuleSet::findDoubleRule(double number) const
437 {
438     // if this is a fraction rule set, use findFractionRuleSetRule()
439     if (isFractionRuleSet()) {
440         return findFractionRuleSetRule(number);
441     }
442 
443     if (uprv_isNaN(number)) {
444         const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
445         if (!rule) {
446             rule = owner->getDefaultNaNRule();
447         }
448         return rule;
449     }
450 
451     // if the number is negative, return the negative number rule
452     // (if there isn't a negative-number rule, we pretend it's a
453     // positive number)
454     if (number < 0) {
455         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
456             return  nonNumericalRules[NEGATIVE_RULE_INDEX];
457         } else {
458             number = -number;
459         }
460     }
461 
462     if (uprv_isInfinite(number)) {
463         const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
464         if (!rule) {
465             rule = owner->getDefaultInfinityRule();
466         }
467         return rule;
468     }
469 
470     // if the number isn't an integer, we use one of the fraction rules...
471     if (number != uprv_floor(number)) {
472         // if the number is between 0 and 1, return the proper
473         // fraction rule
474         if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
475             return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
476         }
477         // otherwise, return the improper fraction rule
478         else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
479             return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
480         }
481     }
482 
483     // if there's a default rule, use it to format the number
484     if (nonNumericalRules[DEFAULT_RULE_INDEX]) {
485         return nonNumericalRules[DEFAULT_RULE_INDEX];
486     }
487 
488     // and if we haven't yet returned a rule, use findNormalRule()
489     // to find the applicable rule
490     int64_t r = util64_fromDouble(number + 0.5);
491     return findNormalRule(r);
492 }
493 
494 const NFRule *
findNormalRule(int64_t number) const495 NFRuleSet::findNormalRule(int64_t number) const
496 {
497     // if this is a fraction rule set, use findFractionRuleSetRule()
498     // to find the rule (we should only go into this clause if the
499     // value is 0)
500     if (fIsFractionRuleSet) {
501         return findFractionRuleSetRule(static_cast<double>(number));
502     }
503 
504     // if the number is negative, return the negative-number rule
505     // (if there isn't one, pretend the number is positive)
506     if (number < 0) {
507         if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
508             return nonNumericalRules[NEGATIVE_RULE_INDEX];
509         } else {
510             number = -number;
511         }
512     }
513 
514     // we have to repeat the preceding two checks, even though we
515     // do them in findRule(), because the version of format() that
516     // takes a long bypasses findRule() and goes straight to this
517     // function.  This function does skip the fraction rules since
518     // we know the value is an integer (it also skips the default
519     // rule, since it's considered a fraction rule.  Skipping the
520     // default rule in this function is also how we avoid infinite
521     // recursion)
522 
523     // {dlf} unfortunately this fails if there are no rules except
524     // special rules.  If there are no rules, use the default rule.
525 
526     // binary-search the rule list for the applicable rule
527     // (a rule is used for all values from its base value to
528     // the next rule's base value)
529     int32_t hi = rules.size();
530     if (hi > 0) {
531         int32_t lo = 0;
532 
533         while (lo < hi) {
534             int32_t mid = (lo + hi) / 2;
535             if (rules[mid]->getBaseValue() == number) {
536                 return rules[mid];
537             }
538             else if (rules[mid]->getBaseValue() > number) {
539                 hi = mid;
540             }
541             else {
542                 lo = mid + 1;
543             }
544         }
545         if (hi == 0) { // bad rule set, minimum base > 0
546             return nullptr; // want to throw exception here
547         }
548 
549         NFRule *result = rules[hi - 1];
550 
551         // use shouldRollBack() to see whether we need to invoke the
552         // rollback rule (see shouldRollBack()'s documentation for
553         // an explanation of the rollback rule).  If we do, roll back
554         // one rule and return that one instead of the one we'd normally
555         // return
556         if (result->shouldRollBack(number)) {
557             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
558                 return nullptr;
559             }
560             result = rules[hi - 2];
561         }
562         return result;
563     }
564     // else use the default rule
565     return nonNumericalRules[DEFAULT_RULE_INDEX];
566 }
567 
568 /**
569  * If this rule is a fraction rule set, this function is used by
570  * findRule() to select the most appropriate rule for formatting
571  * the number.  Basically, the base value of each rule in the rule
572  * set is treated as the denominator of a fraction.  Whichever
573  * denominator can produce the fraction closest in value to the
574  * number passed in is the result.  If there's a tie, the earlier
575  * one in the list wins.  (If there are two rules in a row with the
576  * same base value, the first one is used when the numerator of the
577  * fraction would be 1, and the second rule is used the rest of the
578  * time.
579  * @param number The number being formatted (which will always be
580  * a number between 0 and 1)
581  * @return The rule to use to format this number
582  */
583 const NFRule*
findFractionRuleSetRule(double number) const584 NFRuleSet::findFractionRuleSetRule(double number) const
585 {
586     // the obvious way to do this (multiply the value being formatted
587     // by each rule's base value until you get an integral result)
588     // doesn't work because of rounding error.  This method is more
589     // accurate
590 
591     // find the least common multiple of the rules' base values
592     // and multiply this by the number being formatted.  This is
593     // all the precision we need, and we can do all of the rest
594     // of the math using integer arithmetic
595     int64_t leastCommonMultiple = rules[0]->getBaseValue();
596     int64_t numerator;
597     {
598         for (uint32_t i = 1; i < rules.size(); ++i) {
599             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
600         }
601         numerator = util64_fromDouble(number * static_cast<double>(leastCommonMultiple) + 0.5);
602     }
603     // for each rule, do the following...
604     int64_t tempDifference;
605     int64_t difference = util64_fromDouble(uprv_maxMantissa());
606     int32_t winner = 0;
607     for (uint32_t i = 0; i < rules.size(); ++i) {
608         // "numerator" is the numerator of the fraction if the
609         // denominator is the LCD.  The numerator if the rule's
610         // base value is the denominator is "numerator" times the
611         // base value divided bythe LCD.  Here we check to see if
612         // that's an integer, and if not, how close it is to being
613         // an integer.
614         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
615 
616 
617         // normalize the result of the above calculation: we want
618         // the numerator's distance from the CLOSEST multiple
619         // of the LCD
620         if (leastCommonMultiple - tempDifference < tempDifference) {
621             tempDifference = leastCommonMultiple - tempDifference;
622         }
623 
624         // if this is as close as we've come, keep track of how close
625         // that is, and the line number of the rule that did it.  If
626         // we've scored a direct hit, we don't have to look at any more
627         // rules
628         if (tempDifference < difference) {
629             difference = tempDifference;
630             winner = i;
631             if (difference == 0) {
632                 break;
633             }
634         }
635     }
636 
637     // if we have two successive rules that both have the winning base
638     // value, then the first one (the one we found above) is used if
639     // the numerator of the fraction is 1 and the second one is used if
640     // the numerator of the fraction is anything else (this lets us
641     // do things like "one third"/"two thirds" without having to define
642     // a whole bunch of extra rule sets)
643     if (static_cast<unsigned>(winner + 1) < rules.size() &&
644         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
645         double n = static_cast<double>(rules[winner]->getBaseValue()) * number;
646         if (n < 0.5 || n >= 2) {
647             ++winner;
648         }
649     }
650 
651     // finally, return the winning rule
652     return rules[winner];
653 }
654 
655 /**
656  * Parses a string.  Matches the string to be parsed against each
657  * of its rules (with a base value less than upperBound) and returns
658  * the value produced by the rule that matched the most characters
659  * in the source string.
660  * @param text The string to parse
661  * @param parsePosition The initial position is ignored and assumed
662  * to be 0.  On exit, this object has been updated to point to the
663  * first character position this rule set didn't consume.
664  * @param upperBound Limits the rules that can be allowed to match.
665  * Only rules whose base values are strictly less than upperBound
666  * are considered.
667  * @return The numerical result of parsing this string.  This will
668  * be the matching rule's base value, composed appropriately with
669  * the results of matching any of its substitutions.  The object
670  * will be an instance of Long if it's an integral value; otherwise,
671  * it will be an instance of Double.  This function always returns
672  * a valid object: If nothing matched the input string at all,
673  * this function returns new Long(0), and the parse position is
674  * left unchanged.
675  */
676 #ifdef RBNF_DEBUG
677 #include <stdio.h>
678 
dumpUS(FILE * f,const UnicodeString & us)679 static void dumpUS(FILE* f, const UnicodeString& us) {
680   int len = us.length();
681   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
682   if (buf != nullptr) {
683 	  us.extract(0, len, buf);
684 	  buf[len] = 0;
685 	  fprintf(f, "%s", buf);
686 	  uprv_free(buf); //delete[] buf;
687   }
688 }
689 #endif
690 
691 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,uint32_t nonNumericalExecutedRuleMask,int32_t recursionCount,Formattable & result) const692 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, uint32_t nonNumericalExecutedRuleMask, int32_t recursionCount, Formattable& result) const
693 {
694     // try matching each rule in the rule set against the text being
695     // parsed.  Whichever one matches the most characters is the one
696     // that determines the value we return.
697 
698     result.setLong(0);
699 
700     // dump out if we've reached the recursion limit
701     if (recursionCount >= RECURSION_LIMIT) {
702         // stop recursion
703         return false;
704     }
705 
706     // dump out if there's no text to parse
707     if (text.length() == 0) {
708         return 0;
709     }
710 
711     ParsePosition highWaterMark;
712     ParsePosition workingPos = pos;
713 
714 #ifdef RBNF_DEBUG
715     fprintf(stderr, "<nfrs> %x '", this);
716     dumpUS(stderr, name);
717     fprintf(stderr, "' text '");
718     dumpUS(stderr, text);
719     fprintf(stderr, "'\n");
720     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
721 #endif
722     // Try each of the negative rules, fraction rules, infinity rules and NaN rules
723     for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
724         if (nonNumericalRules[i] && ((nonNumericalExecutedRuleMask >> i) & 1) == 0) {
725             // Mark this rule as being executed so that we don't try to execute it again.
726             nonNumericalExecutedRuleMask |= 1 << i;
727 
728             Formattable tempResult;
729             UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
730             if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
731                 result = tempResult;
732                 highWaterMark = workingPos;
733             }
734             workingPos = pos;
735         }
736     }
737 #ifdef RBNF_DEBUG
738     fprintf(stderr, "<nfrs> continue other with text '");
739     dumpUS(stderr, text);
740     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
741 #endif
742 
743     // finally, go through the regular rules one at a time.  We start
744     // at the end of the list because we want to try matching the most
745     // sigificant rule first (this helps ensure that we parse
746     // "five thousand three hundred six" as
747     // "(five thousand) (three hundred) (six)" rather than
748     // "((five thousand three) hundred) (six)").  Skip rules whose
749     // base values are higher than the upper bound (again, this helps
750     // limit ambiguity by making sure the rules that match a rule's
751     // are less significant than the rule containing the substitutions)/
752     {
753         int64_t ub = util64_fromDouble(upperBound);
754 #ifdef RBNF_DEBUG
755         {
756             char ubstr[64];
757             util64_toa(ub, ubstr, 64);
758             char ubstrhex[64];
759             util64_toa(ub, ubstrhex, 64, 16);
760             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
761         }
762 #endif
763         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
764             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
765                 continue;
766             }
767             Formattable tempResult;
768             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, nonNumericalExecutedRuleMask, recursionCount + 1, tempResult);
769             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
770                 result = tempResult;
771                 highWaterMark = workingPos;
772             }
773             workingPos = pos;
774         }
775     }
776 #ifdef RBNF_DEBUG
777     fprintf(stderr, "<nfrs> exit\n");
778 #endif
779     // finally, update the parse position we were passed to point to the
780     // first character we didn't use, and return the result that
781     // corresponds to that string of characters
782     pos = highWaterMark;
783 
784     return 1;
785 }
786 
787 void
appendRules(UnicodeString & result) const788 NFRuleSet::appendRules(UnicodeString& result) const
789 {
790     uint32_t i;
791 
792     // the rule set name goes first...
793     result.append(name);
794     result.append(gColon);
795     result.append(gLineFeed);
796 
797     // followed by the regular rules...
798     for (i = 0; i < rules.size(); i++) {
799         rules[i]->_appendRuleText(result);
800         result.append(gLineFeed);
801     }
802 
803     // followed by the special rules (if they exist)
804     for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
805         NFRule *rule = nonNumericalRules[i];
806         if (nonNumericalRules[i]) {
807             if (rule->getBaseValue() == NFRule::kImproperFractionRule
808                 || rule->getBaseValue() == NFRule::kProperFractionRule
809                 || rule->getBaseValue() == NFRule::kDefaultRule)
810             {
811                 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
812                     NFRule *fractionRule = fractionRules[fIdx];
813                     if (fractionRule->getBaseValue() == rule->getBaseValue()) {
814                         fractionRule->_appendRuleText(result);
815                         result.append(gLineFeed);
816                     }
817                 }
818             }
819             else {
820                 rule->_appendRuleText(result);
821                 result.append(gLineFeed);
822             }
823         }
824     }
825 }
826 
827 // utility functions
828 
util64_fromDouble(double d)829 int64_t util64_fromDouble(double d) {
830     int64_t result = 0;
831     if (!uprv_isNaN(d)) {
832         double mant = uprv_maxMantissa();
833         if (d < -mant) {
834             d = -mant;
835         } else if (d > mant) {
836             d = mant;
837         }
838         UBool neg = d < 0;
839         if (neg) {
840             d = -d;
841         }
842         result = static_cast<int64_t>(uprv_floor(d));
843         if (neg) {
844             result = -result;
845         }
846     }
847     return result;
848 }
849 
util64_pow(uint32_t base,uint16_t exponent)850 uint64_t util64_pow(uint32_t base, uint16_t exponent)  {
851     if (base == 0) {
852         return 0;
853     }
854     uint64_t result = 1;
855     uint64_t pow = base;
856     while (true) {
857         if ((exponent & 1) == 1) {
858             result *= pow;
859         }
860         exponent >>= 1;
861         if (exponent == 0) {
862             break;
863         }
864         pow *= pow;
865     }
866     return result;
867 }
868 
869 static const uint8_t asciiDigits[] = {
870     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
871     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
872     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
873     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
874     0x77u, 0x78u, 0x79u, 0x7au,
875 };
876 
877 static const char16_t kUMinus = static_cast<char16_t>(0x002d);
878 
879 #ifdef RBNF_DEBUG
880 static const char kMinus = '-';
881 
882 static const uint8_t digitInfo[] = {
883         0,     0,     0,     0,     0,     0,     0,     0,
884         0,     0,     0,     0,     0,     0,     0,     0,
885         0,     0,     0,     0,     0,     0,     0,     0,
886         0,     0,     0,     0,     0,     0,     0,     0,
887         0,     0,     0,     0,     0,     0,     0,     0,
888         0,     0,     0,     0,     0,     0,     0,     0,
889     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
890     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
891         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
892     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
893     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
894     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
895         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
896     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
897     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
898     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
899 };
900 
util64_atoi(const char * str,uint32_t radix)901 int64_t util64_atoi(const char* str, uint32_t radix)
902 {
903     if (radix > 36) {
904         radix = 36;
905     } else if (radix < 2) {
906         radix = 2;
907     }
908     int64_t lradix = radix;
909 
910     int neg = 0;
911     if (*str == kMinus) {
912         ++str;
913         neg = 1;
914     }
915     int64_t result = 0;
916     uint8_t b;
917     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
918         result *= lradix;
919         result += (int32_t)b;
920     }
921     if (neg) {
922         result = -result;
923     }
924     return result;
925 }
926 
util64_utoi(const char16_t * str,uint32_t radix)927 int64_t util64_utoi(const char16_t* str, uint32_t radix)
928 {
929     if (radix > 36) {
930         radix = 36;
931     } else if (radix < 2) {
932         radix = 2;
933     }
934     int64_t lradix = radix;
935 
936     int neg = 0;
937     if (*str == kUMinus) {
938         ++str;
939         neg = 1;
940     }
941     int64_t result = 0;
942     char16_t c;
943     uint8_t b;
944     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
945         result *= lradix;
946         result += (int32_t)b;
947     }
948     if (neg) {
949         result = -result;
950     }
951     return result;
952 }
953 
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)954 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
955 {
956     if (radix > 36) {
957         radix = 36;
958     } else if (radix < 2) {
959         radix = 2;
960     }
961     int64_t base = radix;
962 
963     char* p = buf;
964     if (len && (w < 0) && (radix == 10) && !raw) {
965         w = -w;
966         *p++ = kMinus;
967         --len;
968     } else if (len && (w == 0)) {
969         *p++ = (char)raw ? 0 : asciiDigits[0];
970         --len;
971     }
972 
973     while (len && w != 0) {
974         int64_t n = w / base;
975         int64_t m = n * base;
976         int32_t d = (int32_t)(w-m);
977         *p++ = raw ? (char)d : asciiDigits[d];
978         w = n;
979         --len;
980     }
981     if (len) {
982         *p = 0; // null terminate if room for caller convenience
983     }
984 
985     len = p - buf;
986     if (*buf == kMinus) {
987         ++buf;
988     }
989     while (--p > buf) {
990         char c = *p;
991         *p = *buf;
992         *buf = c;
993         ++buf;
994     }
995 
996     return len;
997 }
998 #endif
999 
util64_tou(int64_t w,char16_t * buf,uint32_t len,uint32_t radix,UBool raw)1000 uint32_t util64_tou(int64_t w, char16_t* buf, uint32_t len, uint32_t radix, UBool raw)
1001 {
1002     if (radix > 36) {
1003         radix = 36;
1004     } else if (radix < 2) {
1005         radix = 2;
1006     }
1007     int64_t base = radix;
1008 
1009     char16_t* p = buf;
1010     if (len && (w < 0) && (radix == 10) && !raw) {
1011         w = -w;
1012         *p++ = kUMinus;
1013         --len;
1014     } else if (len && (w == 0)) {
1015         *p++ = static_cast<char16_t>(raw) ? 0 : asciiDigits[0];
1016         --len;
1017     }
1018 
1019     while (len && (w != 0)) {
1020         int64_t n = w / base;
1021         int64_t m = n * base;
1022         int32_t d = static_cast<int32_t>(w - m);
1023         *p++ = static_cast<char16_t>(raw ? d : asciiDigits[d]);
1024         w = n;
1025         --len;
1026     }
1027     if (len) {
1028         *p = 0; // null terminate if room for caller convenience
1029     }
1030 
1031     len = static_cast<uint32_t>(p - buf);
1032     if (*buf == kUMinus) {
1033         ++buf;
1034     }
1035     while (--p > buf) {
1036         char16_t c = *p;
1037         *p = *buf;
1038         *buf = c;
1039         ++buf;
1040     }
1041 
1042     return len;
1043 }
1044 
1045 
1046 U_NAMESPACE_END
1047 
1048 /* U_HAVE_RBNF */
1049 #endif
1050