• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 ******************************************************************************
3 *   Copyright (C) 1997-2014, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 ******************************************************************************
6 *   file name:  nfrs.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 * Modification history
12 * Date        Name      Comments
13 * 10/11/2001  Doug      Ported from ICU4J
14 */
15 
16 #include "nfrs.h"
17 
18 #if U_HAVE_RBNF
19 
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23 #include "patternprops.h"
24 
25 #ifdef RBNF_DEBUG
26 #include "cmemory.h"
27 #endif
28 
29 U_NAMESPACE_BEGIN
30 
31 #if 0
32 // euclid's algorithm works with doubles
33 // note, doubles only get us up to one quadrillion or so, which
34 // isn't as much range as we get with longs.  We probably still
35 // want either 64-bit math, or BigInteger.
36 
37 static int64_t
38 util_lcm(int64_t x, int64_t y)
39 {
40     x.abs();
41     y.abs();
42 
43     if (x == 0 || y == 0) {
44         return 0;
45     } else {
46         do {
47             if (x < y) {
48                 int64_t t = x; x = y; y = t;
49             }
50             x -= y * (x/y);
51         } while (x != 0);
52 
53         return y;
54     }
55 }
56 
57 #else
58 /**
59  * Calculates the least common multiple of x and y.
60  */
61 static int64_t
util_lcm(int64_t x,int64_t y)62 util_lcm(int64_t x, int64_t y)
63 {
64     // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
65     // vol. 2, 1st ed., pp. 298-299
66     int64_t x1 = x;
67     int64_t y1 = y;
68 
69     int p2 = 0;
70     while ((x1 & 1) == 0 && (y1 & 1) == 0) {
71         ++p2;
72         x1 >>= 1;
73         y1 >>= 1;
74     }
75 
76     int64_t t;
77     if ((x1 & 1) == 1) {
78         t = -y1;
79     } else {
80         t = x1;
81     }
82 
83     while (t != 0) {
84         while ((t & 1) == 0) {
85             t = t >> 1;
86         }
87         if (t > 0) {
88             x1 = t;
89         } else {
90             y1 = -t;
91         }
92         t = x1 - y1;
93     }
94 
95     int64_t gcd = x1 << p2;
96 
97     // x * y == gcd(x, y) * lcm(x, y)
98     return x / gcd * y;
99 }
100 #endif
101 
102 static const UChar gPercent = 0x0025;
103 static const UChar gColon = 0x003a;
104 static const UChar gSemicolon = 0x003b;
105 static const UChar gLineFeed = 0x000a;
106 
107 static const UChar gFourSpaces[] =
108 {
109     0x20, 0x20, 0x20, 0x20, 0
110 }; /* "    " */
111 static const UChar gPercentPercent[] =
112 {
113     0x25, 0x25, 0
114 }; /* "%%" */
115 
116 static const UChar gNoparse[] =
117 {
118     0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
119 }; /* "@noparse" */
120 
NFRuleSet(UnicodeString * descriptions,int32_t index,UErrorCode & status)121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
122   : name()
123   , rules(0)
124   , negativeNumberRule(NULL)
125   , fIsFractionRuleSet(FALSE)
126   , fIsPublic(FALSE)
127   , fIsParseable(TRUE)
128   , fRecursionCount(0)
129 {
130     for (int i = 0; i < 3; ++i) {
131         fractionRules[i] = NULL;
132     }
133 
134     if (U_FAILURE(status)) {
135         return;
136     }
137 
138     UnicodeString& description = descriptions[index]; // !!! make sure index is valid
139 
140     if (description.length() == 0) {
141         // throw new IllegalArgumentException("Empty rule set description");
142         status = U_PARSE_ERROR;
143         return;
144     }
145 
146     // if the description begins with a rule set name (the rule set
147     // name can be omitted in formatter descriptions that consist
148     // of only one rule set), copy it out into our "name" member
149     // and delete it from the description
150     if (description.charAt(0) == gPercent) {
151         int32_t pos = description.indexOf(gColon);
152         if (pos == -1) {
153             // throw new IllegalArgumentException("Rule set name doesn't end in colon");
154             status = U_PARSE_ERROR;
155         } else {
156             name.setTo(description, 0, pos);
157             while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
158             }
159             description.remove(0, pos);
160         }
161     } else {
162         name.setTo(UNICODE_STRING_SIMPLE("%default"));
163     }
164 
165     if (description.length() == 0) {
166         // throw new IllegalArgumentException("Empty rule set description");
167         status = U_PARSE_ERROR;
168     }
169 
170     fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
171 
172     if ( name.endsWith(gNoparse,8) ) {
173         fIsParseable = FALSE;
174         name.truncate(name.length()-8); // remove the @noparse from the name
175     }
176 
177     // all of the other members of NFRuleSet are initialized
178     // by parseRules()
179 }
180 
181 void
parseRules(UnicodeString & description,const RuleBasedNumberFormat * owner,UErrorCode & status)182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
183 {
184     // start by creating a Vector whose elements are Strings containing
185     // the descriptions of the rules (one rule per element).  The rules
186     // are separated by semicolons (there's no escape facility: ALL
187     // semicolons are rule delimiters)
188 
189     if (U_FAILURE(status)) {
190         return;
191     }
192 
193     // ensure we are starting with an empty rule list
194     rules.deleteAll();
195 
196     // dlf - the original code kept a separate description array for no reason,
197     // so I got rid of it.  The loop was too complex so I simplified it.
198 
199     UnicodeString currentDescription;
200     int32_t oldP = 0;
201     while (oldP < description.length()) {
202         int32_t p = description.indexOf(gSemicolon, oldP);
203         if (p == -1) {
204             p = description.length();
205         }
206         currentDescription.setTo(description, oldP, p - oldP);
207         NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
208         oldP = p + 1;
209     }
210 
211     // for rules that didn't specify a base value, their base values
212     // were initialized to 0.  Make another pass through the list and
213     // set all those rules' base values.  We also remove any special
214     // rules from the list and put them into their own member variables
215     int64_t defaultBaseValue = 0;
216 
217     // (this isn't a for loop because we might be deleting items from
218     // the vector-- we want to make sure we only increment i when
219     // we _didn't_ delete aything from the vector)
220     uint32_t i = 0;
221     while (i < rules.size()) {
222         NFRule* rule = rules[i];
223 
224         switch (rule->getType()) {
225             // if the rule's base value is 0, fill in a default
226             // base value (this will be 1 plus the preceding
227             // rule's base value for regular rule sets, and the
228             // same as the preceding rule's base value in fraction
229             // rule sets)
230         case NFRule::kNoBase:
231             rule->setBaseValue(defaultBaseValue, status);
232             if (!isFractionRuleSet()) {
233                 ++defaultBaseValue;
234             }
235             ++i;
236             break;
237 
238             // if it's the negative-number rule, copy it into its own
239             // data member and delete it from the list
240         case NFRule::kNegativeNumberRule:
241             if (negativeNumberRule) {
242                 delete negativeNumberRule;
243             }
244             negativeNumberRule = rules.remove(i);
245             break;
246 
247             // if it's the improper fraction rule, copy it into the
248             // correct element of fractionRules
249         case NFRule::kImproperFractionRule:
250             if (fractionRules[0]) {
251                 delete fractionRules[0];
252             }
253             fractionRules[0] = rules.remove(i);
254             break;
255 
256             // if it's the proper fraction rule, copy it into the
257             // correct element of fractionRules
258         case NFRule::kProperFractionRule:
259             if (fractionRules[1]) {
260                 delete fractionRules[1];
261             }
262             fractionRules[1] = rules.remove(i);
263             break;
264 
265             // if it's the master rule, copy it into the
266             // correct element of fractionRules
267         case NFRule::kMasterRule:
268             if (fractionRules[2]) {
269                 delete fractionRules[2];
270             }
271             fractionRules[2] = rules.remove(i);
272             break;
273 
274             // if it's a regular rule that already knows its base value,
275             // check to make sure the rules are in order, and update
276             // the default base value for the next rule
277         default:
278             if (rule->getBaseValue() < defaultBaseValue) {
279                 // throw new IllegalArgumentException("Rules are not in order");
280                 status = U_PARSE_ERROR;
281                 return;
282             }
283             defaultBaseValue = rule->getBaseValue();
284             if (!isFractionRuleSet()) {
285                 ++defaultBaseValue;
286             }
287             ++i;
288             break;
289         }
290     }
291 }
292 
~NFRuleSet()293 NFRuleSet::~NFRuleSet()
294 {
295     delete negativeNumberRule;
296     delete fractionRules[0];
297     delete fractionRules[1];
298     delete fractionRules[2];
299 }
300 
301 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
303 {
304     if (rule1) {
305         if (rule2) {
306             return *rule1 == *rule2;
307         }
308     } else if (!rule2) {
309         return TRUE;
310     }
311     return FALSE;
312 }
313 
314 UBool
operator ==(const NFRuleSet & rhs) const315 NFRuleSet::operator==(const NFRuleSet& rhs) const
316 {
317     if (rules.size() == rhs.rules.size() &&
318         fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
319         name == rhs.name &&
320         util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
321         util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
322         util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
323         util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
324 
325         for (uint32_t i = 0; i < rules.size(); ++i) {
326             if (*rules[i] != *rhs.rules[i]) {
327                 return FALSE;
328             }
329         }
330         return TRUE;
331     }
332     return FALSE;
333 }
334 
335 #define RECURSION_LIMIT 50
336 
337 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,UErrorCode & status) const338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const
339 {
340     NFRule *rule = findNormalRule(number);
341     if (rule) { // else error, but can't report it
342         NFRuleSet* ncThis = (NFRuleSet*)this;
343         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
344             // stop recursion
345             ncThis->fRecursionCount = 0;
346         } else {
347             rule->doFormat(number, toAppendTo, pos, status);
348             ncThis->fRecursionCount--;
349         }
350     }
351 }
352 
353 void
format(double number,UnicodeString & toAppendTo,int32_t pos,UErrorCode & status) const354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, UErrorCode& status) const
355 {
356     NFRule *rule = findDoubleRule(number);
357     if (rule) { // else error, but can't report it
358         NFRuleSet* ncThis = (NFRuleSet*)this;
359         if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
360             // stop recursion
361             ncThis->fRecursionCount = 0;
362         } else {
363             rule->doFormat(number, toAppendTo, pos, status);
364             ncThis->fRecursionCount--;
365         }
366     }
367 }
368 
369 NFRule*
findDoubleRule(double number) const370 NFRuleSet::findDoubleRule(double number) const
371 {
372     // if this is a fraction rule set, use findFractionRuleSetRule()
373     if (isFractionRuleSet()) {
374         return findFractionRuleSetRule(number);
375     }
376 
377     // if the number is negative, return the negative number rule
378     // (if there isn't a negative-number rule, we pretend it's a
379     // positive number)
380     if (number < 0) {
381         if (negativeNumberRule) {
382             return  negativeNumberRule;
383         } else {
384             number = -number;
385         }
386     }
387 
388     // if the number isn't an integer, we use one of the fraction rules...
389     if (number != uprv_floor(number)) {
390         // if the number is between 0 and 1, return the proper
391         // fraction rule
392         if (number < 1 && fractionRules[1]) {
393             return fractionRules[1];
394         }
395         // otherwise, return the improper fraction rule
396         else if (fractionRules[0]) {
397             return fractionRules[0];
398         }
399     }
400 
401     // if there's a master rule, use it to format the number
402     if (fractionRules[2]) {
403         return fractionRules[2];
404     }
405 
406     // and if we haven't yet returned a rule, use findNormalRule()
407     // to find the applicable rule
408     int64_t r = util64_fromDouble(number + 0.5);
409     return findNormalRule(r);
410 }
411 
412 NFRule *
findNormalRule(int64_t number) const413 NFRuleSet::findNormalRule(int64_t number) const
414 {
415     // if this is a fraction rule set, use findFractionRuleSetRule()
416     // to find the rule (we should only go into this clause if the
417     // value is 0)
418     if (fIsFractionRuleSet) {
419         return findFractionRuleSetRule((double)number);
420     }
421 
422     // if the number is negative, return the negative-number rule
423     // (if there isn't one, pretend the number is positive)
424     if (number < 0) {
425         if (negativeNumberRule) {
426             return negativeNumberRule;
427         } else {
428             number = -number;
429         }
430     }
431 
432     // we have to repeat the preceding two checks, even though we
433     // do them in findRule(), because the version of format() that
434     // takes a long bypasses findRule() and goes straight to this
435     // function.  This function does skip the fraction rules since
436     // we know the value is an integer (it also skips the master
437     // rule, since it's considered a fraction rule.  Skipping the
438     // master rule in this function is also how we avoid infinite
439     // recursion)
440 
441     // {dlf} unfortunately this fails if there are no rules except
442     // special rules.  If there are no rules, use the master rule.
443 
444     // binary-search the rule list for the applicable rule
445     // (a rule is used for all values from its base value to
446     // the next rule's base value)
447     int32_t hi = rules.size();
448     if (hi > 0) {
449         int32_t lo = 0;
450 
451         while (lo < hi) {
452             int32_t mid = (lo + hi) / 2;
453             if (rules[mid]->getBaseValue() == number) {
454                 return rules[mid];
455             }
456             else if (rules[mid]->getBaseValue() > number) {
457                 hi = mid;
458             }
459             else {
460                 lo = mid + 1;
461             }
462         }
463         if (hi == 0) { // bad rule set, minimum base > 0
464             return NULL; // want to throw exception here
465         }
466 
467         NFRule *result = rules[hi - 1];
468 
469         // use shouldRollBack() to see whether we need to invoke the
470         // rollback rule (see shouldRollBack()'s documentation for
471         // an explanation of the rollback rule).  If we do, roll back
472         // one rule and return that one instead of the one we'd normally
473         // return
474         if (result->shouldRollBack((double)number)) {
475             if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
476                 return NULL;
477             }
478             result = rules[hi - 2];
479         }
480         return result;
481     }
482     // else use the master rule
483     return fractionRules[2];
484 }
485 
486 /**
487  * If this rule is a fraction rule set, this function is used by
488  * findRule() to select the most appropriate rule for formatting
489  * the number.  Basically, the base value of each rule in the rule
490  * set is treated as the denominator of a fraction.  Whichever
491  * denominator can produce the fraction closest in value to the
492  * number passed in is the result.  If there's a tie, the earlier
493  * one in the list wins.  (If there are two rules in a row with the
494  * same base value, the first one is used when the numerator of the
495  * fraction would be 1, and the second rule is used the rest of the
496  * time.
497  * @param number The number being formatted (which will always be
498  * a number between 0 and 1)
499  * @return The rule to use to format this number
500  */
501 NFRule*
findFractionRuleSetRule(double number) const502 NFRuleSet::findFractionRuleSetRule(double number) const
503 {
504     // the obvious way to do this (multiply the value being formatted
505     // by each rule's base value until you get an integral result)
506     // doesn't work because of rounding error.  This method is more
507     // accurate
508 
509     // find the least common multiple of the rules' base values
510     // and multiply this by the number being formatted.  This is
511     // all the precision we need, and we can do all of the rest
512     // of the math using integer arithmetic
513     int64_t leastCommonMultiple = rules[0]->getBaseValue();
514     int64_t numerator;
515     {
516         for (uint32_t i = 1; i < rules.size(); ++i) {
517             leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
518         }
519         numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
520     }
521     // for each rule, do the following...
522     int64_t tempDifference;
523     int64_t difference = util64_fromDouble(uprv_maxMantissa());
524     int32_t winner = 0;
525     for (uint32_t i = 0; i < rules.size(); ++i) {
526         // "numerator" is the numerator of the fraction if the
527         // denominator is the LCD.  The numerator if the rule's
528         // base value is the denominator is "numerator" times the
529         // base value divided bythe LCD.  Here we check to see if
530         // that's an integer, and if not, how close it is to being
531         // an integer.
532         tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
533 
534 
535         // normalize the result of the above calculation: we want
536         // the numerator's distance from the CLOSEST multiple
537         // of the LCD
538         if (leastCommonMultiple - tempDifference < tempDifference) {
539             tempDifference = leastCommonMultiple - tempDifference;
540         }
541 
542         // if this is as close as we've come, keep track of how close
543         // that is, and the line number of the rule that did it.  If
544         // we've scored a direct hit, we don't have to look at any more
545         // rules
546         if (tempDifference < difference) {
547             difference = tempDifference;
548             winner = i;
549             if (difference == 0) {
550                 break;
551             }
552         }
553     }
554 
555     // if we have two successive rules that both have the winning base
556     // value, then the first one (the one we found above) is used if
557     // the numerator of the fraction is 1 and the second one is used if
558     // the numerator of the fraction is anything else (this lets us
559     // do things like "one third"/"two thirds" without haveing to define
560     // a whole bunch of extra rule sets)
561     if ((unsigned)(winner + 1) < rules.size() &&
562         rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
563         double n = ((double)rules[winner]->getBaseValue()) * number;
564         if (n < 0.5 || n >= 2) {
565             ++winner;
566         }
567     }
568 
569     // finally, return the winning rule
570     return rules[winner];
571 }
572 
573 /**
574  * Parses a string.  Matches the string to be parsed against each
575  * of its rules (with a base value less than upperBound) and returns
576  * the value produced by the rule that matched the most charcters
577  * in the source string.
578  * @param text The string to parse
579  * @param parsePosition The initial position is ignored and assumed
580  * to be 0.  On exit, this object has been updated to point to the
581  * first character position this rule set didn't consume.
582  * @param upperBound Limits the rules that can be allowed to match.
583  * Only rules whose base values are strictly less than upperBound
584  * are considered.
585  * @return The numerical result of parsing this string.  This will
586  * be the matching rule's base value, composed appropriately with
587  * the results of matching any of its substitutions.  The object
588  * will be an instance of Long if it's an integral value; otherwise,
589  * it will be an instance of Double.  This function always returns
590  * a valid object: If nothing matched the input string at all,
591  * this function returns new Long(0), and the parse position is
592  * left unchanged.
593  */
594 #ifdef RBNF_DEBUG
595 #include <stdio.h>
596 
dumpUS(FILE * f,const UnicodeString & us)597 static void dumpUS(FILE* f, const UnicodeString& us) {
598   int len = us.length();
599   char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
600   if (buf != NULL) {
601 	  us.extract(0, len, buf);
602 	  buf[len] = 0;
603 	  fprintf(f, "%s", buf);
604 	  uprv_free(buf); //delete[] buf;
605   }
606 }
607 #endif
608 
609 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
611 {
612     // try matching each rule in the rule set against the text being
613     // parsed.  Whichever one matches the most characters is the one
614     // that determines the value we return.
615 
616     result.setLong(0);
617 
618     // dump out if there's no text to parse
619     if (text.length() == 0) {
620         return 0;
621     }
622 
623     ParsePosition highWaterMark;
624     ParsePosition workingPos = pos;
625 
626 #ifdef RBNF_DEBUG
627     fprintf(stderr, "<nfrs> %x '", this);
628     dumpUS(stderr, name);
629     fprintf(stderr, "' text '");
630     dumpUS(stderr, text);
631     fprintf(stderr, "'\n");
632     fprintf(stderr, "  parse negative: %d\n", this, negativeNumberRule != 0);
633 #endif
634 
635     // start by trying the negative number rule (if there is one)
636     if (negativeNumberRule) {
637         Formattable tempResult;
638 #ifdef RBNF_DEBUG
639         fprintf(stderr, "  <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
640 #endif
641         UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
642 #ifdef RBNF_DEBUG
643         fprintf(stderr, "  <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
644 #endif
645         if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
646             result = tempResult;
647             highWaterMark = workingPos;
648         }
649         workingPos = pos;
650     }
651 #ifdef RBNF_DEBUG
652     fprintf(stderr, "<nfrs> continue fractional with text '");
653     dumpUS(stderr, text);
654     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
655 #endif
656     // then try each of the fraction rules
657     {
658         for (int i = 0; i < 3; i++) {
659             if (fractionRules[i]) {
660                 Formattable tempResult;
661                 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
662                 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
663                     result = tempResult;
664                     highWaterMark = workingPos;
665                 }
666                 workingPos = pos;
667             }
668         }
669     }
670 #ifdef RBNF_DEBUG
671     fprintf(stderr, "<nfrs> continue other with text '");
672     dumpUS(stderr, text);
673     fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
674 #endif
675 
676     // finally, go through the regular rules one at a time.  We start
677     // at the end of the list because we want to try matching the most
678     // sigificant rule first (this helps ensure that we parse
679     // "five thousand three hundred six" as
680     // "(five thousand) (three hundred) (six)" rather than
681     // "((five thousand three) hundred) (six)").  Skip rules whose
682     // base values are higher than the upper bound (again, this helps
683     // limit ambiguity by making sure the rules that match a rule's
684     // are less significant than the rule containing the substitutions)/
685     {
686         int64_t ub = util64_fromDouble(upperBound);
687 #ifdef RBNF_DEBUG
688         {
689             char ubstr[64];
690             util64_toa(ub, ubstr, 64);
691             char ubstrhex[64];
692             util64_toa(ub, ubstrhex, 64, 16);
693             fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
694         }
695 #endif
696         for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
697             if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
698                 continue;
699             }
700             Formattable tempResult;
701             UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
702             if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
703                 result = tempResult;
704                 highWaterMark = workingPos;
705             }
706             workingPos = pos;
707         }
708     }
709 #ifdef RBNF_DEBUG
710     fprintf(stderr, "<nfrs> exit\n");
711 #endif
712     // finally, update the parse postion we were passed to point to the
713     // first character we didn't use, and return the result that
714     // corresponds to that string of characters
715     pos = highWaterMark;
716 
717     return 1;
718 }
719 
720 void
appendRules(UnicodeString & result) const721 NFRuleSet::appendRules(UnicodeString& result) const
722 {
723     // the rule set name goes first...
724     result.append(name);
725     result.append(gColon);
726     result.append(gLineFeed);
727 
728     // followed by the regular rules...
729     for (uint32_t i = 0; i < rules.size(); i++) {
730         result.append(gFourSpaces, 4);
731         rules[i]->_appendRuleText(result);
732         result.append(gLineFeed);
733     }
734 
735     // followed by the special rules (if they exist)
736     if (negativeNumberRule) {
737         result.append(gFourSpaces, 4);
738         negativeNumberRule->_appendRuleText(result);
739         result.append(gLineFeed);
740     }
741 
742     {
743         for (uint32_t i = 0; i < 3; ++i) {
744             if (fractionRules[i]) {
745                 result.append(gFourSpaces, 4);
746                 fractionRules[i]->_appendRuleText(result);
747                 result.append(gLineFeed);
748             }
749         }
750     }
751 }
752 
753 // utility functions
754 
util64_fromDouble(double d)755 int64_t util64_fromDouble(double d) {
756     int64_t result = 0;
757     if (!uprv_isNaN(d)) {
758         double mant = uprv_maxMantissa();
759         if (d < -mant) {
760             d = -mant;
761         } else if (d > mant) {
762             d = mant;
763         }
764         UBool neg = d < 0;
765         if (neg) {
766             d = -d;
767         }
768         result = (int64_t)uprv_floor(d);
769         if (neg) {
770             result = -result;
771         }
772     }
773     return result;
774 }
775 
util64_pow(int32_t r,uint32_t e)776 int64_t util64_pow(int32_t r, uint32_t e)  {
777     if (r == 0) {
778         return 0;
779     } else if (e == 0) {
780         return 1;
781     } else {
782         int64_t n = r;
783         while (--e > 0) {
784             n *= r;
785         }
786         return n;
787     }
788 }
789 
790 static const uint8_t asciiDigits[] = {
791     0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
792     0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
793     0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
794     0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
795     0x77u, 0x78u, 0x79u, 0x7au,
796 };
797 
798 static const UChar kUMinus = (UChar)0x002d;
799 
800 #ifdef RBNF_DEBUG
801 static const char kMinus = '-';
802 
803 static const uint8_t digitInfo[] = {
804         0,     0,     0,     0,     0,     0,     0,     0,
805         0,     0,     0,     0,     0,     0,     0,     0,
806         0,     0,     0,     0,     0,     0,     0,     0,
807         0,     0,     0,     0,     0,     0,     0,     0,
808         0,     0,     0,     0,     0,     0,     0,     0,
809         0,     0,     0,     0,     0,     0,     0,     0,
810     0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
811     0x88u, 0x89u,     0,     0,     0,     0,     0,     0,
812         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
813     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
814     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
815     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
816         0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
817     0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
818     0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
819     0xa1u, 0xa2u, 0xa3u,     0,     0,     0,     0,     0,
820 };
821 
util64_atoi(const char * str,uint32_t radix)822 int64_t util64_atoi(const char* str, uint32_t radix)
823 {
824     if (radix > 36) {
825         radix = 36;
826     } else if (radix < 2) {
827         radix = 2;
828     }
829     int64_t lradix = radix;
830 
831     int neg = 0;
832     if (*str == kMinus) {
833         ++str;
834         neg = 1;
835     }
836     int64_t result = 0;
837     uint8_t b;
838     while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
839         result *= lradix;
840         result += (int32_t)b;
841     }
842     if (neg) {
843         result = -result;
844     }
845     return result;
846 }
847 
util64_utoi(const UChar * str,uint32_t radix)848 int64_t util64_utoi(const UChar* str, uint32_t radix)
849 {
850     if (radix > 36) {
851         radix = 36;
852     } else if (radix < 2) {
853         radix = 2;
854     }
855     int64_t lradix = radix;
856 
857     int neg = 0;
858     if (*str == kUMinus) {
859         ++str;
860         neg = 1;
861     }
862     int64_t result = 0;
863     UChar c;
864     uint8_t b;
865     while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
866         result *= lradix;
867         result += (int32_t)b;
868     }
869     if (neg) {
870         result = -result;
871     }
872     return result;
873 }
874 
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
876 {
877     if (radix > 36) {
878         radix = 36;
879     } else if (radix < 2) {
880         radix = 2;
881     }
882     int64_t base = radix;
883 
884     char* p = buf;
885     if (len && (w < 0) && (radix == 10) && !raw) {
886         w = -w;
887         *p++ = kMinus;
888         --len;
889     } else if (len && (w == 0)) {
890         *p++ = (char)raw ? 0 : asciiDigits[0];
891         --len;
892     }
893 
894     while (len && w != 0) {
895         int64_t n = w / base;
896         int64_t m = n * base;
897         int32_t d = (int32_t)(w-m);
898         *p++ = raw ? (char)d : asciiDigits[d];
899         w = n;
900         --len;
901     }
902     if (len) {
903         *p = 0; // null terminate if room for caller convenience
904     }
905 
906     len = p - buf;
907     if (*buf == kMinus) {
908         ++buf;
909     }
910     while (--p > buf) {
911         char c = *p;
912         *p = *buf;
913         *buf = c;
914         ++buf;
915     }
916 
917     return len;
918 }
919 #endif
920 
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
922 {
923     if (radix > 36) {
924         radix = 36;
925     } else if (radix < 2) {
926         radix = 2;
927     }
928     int64_t base = radix;
929 
930     UChar* p = buf;
931     if (len && (w < 0) && (radix == 10) && !raw) {
932         w = -w;
933         *p++ = kUMinus;
934         --len;
935     } else if (len && (w == 0)) {
936         *p++ = (UChar)raw ? 0 : asciiDigits[0];
937         --len;
938     }
939 
940     while (len && (w != 0)) {
941         int64_t n = w / base;
942         int64_t m = n * base;
943         int32_t d = (int32_t)(w-m);
944         *p++ = (UChar)(raw ? d : asciiDigits[d]);
945         w = n;
946         --len;
947     }
948     if (len) {
949         *p = 0; // null terminate if room for caller convenience
950     }
951 
952     len = (uint32_t)(p - buf);
953     if (*buf == kUMinus) {
954         ++buf;
955     }
956     while (--p > buf) {
957         UChar c = *p;
958         *p = *buf;
959         *buf = c;
960         ++buf;
961     }
962 
963     return len;
964 }
965 
966 
967 U_NAMESPACE_END
968 
969 /* U_HAVE_RBNF */
970 #endif
971 
972