1 /*
2 ******************************************************************************
3 * Copyright (C) 1997-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * file name: nfrs.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Modification history
12 * Date Name Comments
13 * 10/11/2001 Doug Ported from ICU4J
14 */
15
16 #include "nfrs.h"
17
18 #if U_HAVE_RBNF
19
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23 #include "patternprops.h"
24
25 #ifdef RBNF_DEBUG
26 #include "cmemory.h"
27 #endif
28
29 U_NAMESPACE_BEGIN
30
31 #if 0
32 // euclid's algorithm works with doubles
33 // note, doubles only get us up to one quadrillion or so, which
34 // isn't as much range as we get with longs. We probably still
35 // want either 64-bit math, or BigInteger.
36
37 static int64_t
38 util_lcm(int64_t x, int64_t y)
39 {
40 x.abs();
41 y.abs();
42
43 if (x == 0 || y == 0) {
44 return 0;
45 } else {
46 do {
47 if (x < y) {
48 int64_t t = x; x = y; y = t;
49 }
50 x -= y * (x/y);
51 } while (x != 0);
52
53 return y;
54 }
55 }
56
57 #else
58 /**
59 * Calculates the least common multiple of x and y.
60 */
61 static int64_t
util_lcm(int64_t x,int64_t y)62 util_lcm(int64_t x, int64_t y)
63 {
64 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
65 // vol. 2, 1st ed., pp. 298-299
66 int64_t x1 = x;
67 int64_t y1 = y;
68
69 int p2 = 0;
70 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
71 ++p2;
72 x1 >>= 1;
73 y1 >>= 1;
74 }
75
76 int64_t t;
77 if ((x1 & 1) == 1) {
78 t = -y1;
79 } else {
80 t = x1;
81 }
82
83 while (t != 0) {
84 while ((t & 1) == 0) {
85 t = t >> 1;
86 }
87 if (t > 0) {
88 x1 = t;
89 } else {
90 y1 = -t;
91 }
92 t = x1 - y1;
93 }
94
95 int64_t gcd = x1 << p2;
96
97 // x * y == gcd(x, y) * lcm(x, y)
98 return x / gcd * y;
99 }
100 #endif
101
102 static const UChar gPercent = 0x0025;
103 static const UChar gColon = 0x003a;
104 static const UChar gSemicolon = 0x003b;
105 static const UChar gLineFeed = 0x000a;
106
107 static const UChar gFourSpaces[] =
108 {
109 0x20, 0x20, 0x20, 0x20, 0
110 }; /* " " */
111 static const UChar gPercentPercent[] =
112 {
113 0x25, 0x25, 0
114 }; /* "%%" */
115
116 static const UChar gNoparse[] =
117 {
118 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
119 }; /* "@noparse" */
120
NFRuleSet(UnicodeString * descriptions,int32_t index,UErrorCode & status)121 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
122 : name()
123 , rules(0)
124 , negativeNumberRule(NULL)
125 , fIsFractionRuleSet(FALSE)
126 , fIsPublic(FALSE)
127 , fIsParseable(TRUE)
128 , fRecursionCount(0)
129 {
130 for (int i = 0; i < 3; ++i) {
131 fractionRules[i] = NULL;
132 }
133
134 if (U_FAILURE(status)) {
135 return;
136 }
137
138 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
139
140 if (description.length() == 0) {
141 // throw new IllegalArgumentException("Empty rule set description");
142 status = U_PARSE_ERROR;
143 return;
144 }
145
146 // if the description begins with a rule set name (the rule set
147 // name can be omitted in formatter descriptions that consist
148 // of only one rule set), copy it out into our "name" member
149 // and delete it from the description
150 if (description.charAt(0) == gPercent) {
151 int32_t pos = description.indexOf(gColon);
152 if (pos == -1) {
153 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
154 status = U_PARSE_ERROR;
155 } else {
156 name.setTo(description, 0, pos);
157 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
158 }
159 description.remove(0, pos);
160 }
161 } else {
162 name.setTo(UNICODE_STRING_SIMPLE("%default"));
163 }
164
165 if (description.length() == 0) {
166 // throw new IllegalArgumentException("Empty rule set description");
167 status = U_PARSE_ERROR;
168 }
169
170 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
171
172 if ( name.endsWith(gNoparse,8) ) {
173 fIsParseable = FALSE;
174 name.truncate(name.length()-8); // remove the @noparse from the name
175 }
176
177 // all of the other members of NFRuleSet are initialized
178 // by parseRules()
179 }
180
181 void
parseRules(UnicodeString & description,const RuleBasedNumberFormat * owner,UErrorCode & status)182 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
183 {
184 // start by creating a Vector whose elements are Strings containing
185 // the descriptions of the rules (one rule per element). The rules
186 // are separated by semicolons (there's no escape facility: ALL
187 // semicolons are rule delimiters)
188
189 if (U_FAILURE(status)) {
190 return;
191 }
192
193 // ensure we are starting with an empty rule list
194 rules.deleteAll();
195
196 // dlf - the original code kept a separate description array for no reason,
197 // so I got rid of it. The loop was too complex so I simplified it.
198
199 UnicodeString currentDescription;
200 int32_t oldP = 0;
201 while (oldP < description.length()) {
202 int32_t p = description.indexOf(gSemicolon, oldP);
203 if (p == -1) {
204 p = description.length();
205 }
206 currentDescription.setTo(description, oldP, p - oldP);
207 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
208 oldP = p + 1;
209 }
210
211 // for rules that didn't specify a base value, their base values
212 // were initialized to 0. Make another pass through the list and
213 // set all those rules' base values. We also remove any special
214 // rules from the list and put them into their own member variables
215 int64_t defaultBaseValue = 0;
216
217 // (this isn't a for loop because we might be deleting items from
218 // the vector-- we want to make sure we only increment i when
219 // we _didn't_ delete aything from the vector)
220 uint32_t i = 0;
221 while (i < rules.size()) {
222 NFRule* rule = rules[i];
223
224 switch (rule->getType()) {
225 // if the rule's base value is 0, fill in a default
226 // base value (this will be 1 plus the preceding
227 // rule's base value for regular rule sets, and the
228 // same as the preceding rule's base value in fraction
229 // rule sets)
230 case NFRule::kNoBase:
231 rule->setBaseValue(defaultBaseValue, status);
232 if (!isFractionRuleSet()) {
233 ++defaultBaseValue;
234 }
235 ++i;
236 break;
237
238 // if it's the negative-number rule, copy it into its own
239 // data member and delete it from the list
240 case NFRule::kNegativeNumberRule:
241 if (negativeNumberRule) {
242 delete negativeNumberRule;
243 }
244 negativeNumberRule = rules.remove(i);
245 break;
246
247 // if it's the improper fraction rule, copy it into the
248 // correct element of fractionRules
249 case NFRule::kImproperFractionRule:
250 if (fractionRules[0]) {
251 delete fractionRules[0];
252 }
253 fractionRules[0] = rules.remove(i);
254 break;
255
256 // if it's the proper fraction rule, copy it into the
257 // correct element of fractionRules
258 case NFRule::kProperFractionRule:
259 if (fractionRules[1]) {
260 delete fractionRules[1];
261 }
262 fractionRules[1] = rules.remove(i);
263 break;
264
265 // if it's the master rule, copy it into the
266 // correct element of fractionRules
267 case NFRule::kMasterRule:
268 if (fractionRules[2]) {
269 delete fractionRules[2];
270 }
271 fractionRules[2] = rules.remove(i);
272 break;
273
274 // if it's a regular rule that already knows its base value,
275 // check to make sure the rules are in order, and update
276 // the default base value for the next rule
277 default:
278 if (rule->getBaseValue() < defaultBaseValue) {
279 // throw new IllegalArgumentException("Rules are not in order");
280 status = U_PARSE_ERROR;
281 return;
282 }
283 defaultBaseValue = rule->getBaseValue();
284 if (!isFractionRuleSet()) {
285 ++defaultBaseValue;
286 }
287 ++i;
288 break;
289 }
290 }
291 }
292
~NFRuleSet()293 NFRuleSet::~NFRuleSet()
294 {
295 delete negativeNumberRule;
296 delete fractionRules[0];
297 delete fractionRules[1];
298 delete fractionRules[2];
299 }
300
301 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)302 util_equalRules(const NFRule* rule1, const NFRule* rule2)
303 {
304 if (rule1) {
305 if (rule2) {
306 return *rule1 == *rule2;
307 }
308 } else if (!rule2) {
309 return TRUE;
310 }
311 return FALSE;
312 }
313
314 UBool
operator ==(const NFRuleSet & rhs) const315 NFRuleSet::operator==(const NFRuleSet& rhs) const
316 {
317 if (rules.size() == rhs.rules.size() &&
318 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
319 name == rhs.name &&
320 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
321 util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
322 util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
323 util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
324
325 for (uint32_t i = 0; i < rules.size(); ++i) {
326 if (*rules[i] != *rhs.rules[i]) {
327 return FALSE;
328 }
329 }
330 return TRUE;
331 }
332 return FALSE;
333 }
334
335 #define RECURSION_LIMIT 50
336
337 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos) const338 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
339 {
340 NFRule *rule = findNormalRule(number);
341 if (rule) { // else error, but can't report it
342 NFRuleSet* ncThis = (NFRuleSet*)this;
343 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
344 // stop recursion
345 ncThis->fRecursionCount = 0;
346 } else {
347 rule->doFormat(number, toAppendTo, pos);
348 ncThis->fRecursionCount--;
349 }
350 }
351 }
352
353 void
format(double number,UnicodeString & toAppendTo,int32_t pos) const354 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
355 {
356 NFRule *rule = findDoubleRule(number);
357 if (rule) { // else error, but can't report it
358 NFRuleSet* ncThis = (NFRuleSet*)this;
359 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
360 // stop recursion
361 ncThis->fRecursionCount = 0;
362 } else {
363 rule->doFormat(number, toAppendTo, pos);
364 ncThis->fRecursionCount--;
365 }
366 }
367 }
368
369 NFRule*
findDoubleRule(double number) const370 NFRuleSet::findDoubleRule(double number) const
371 {
372 // if this is a fraction rule set, use findFractionRuleSetRule()
373 if (isFractionRuleSet()) {
374 return findFractionRuleSetRule(number);
375 }
376
377 // if the number is negative, return the negative number rule
378 // (if there isn't a negative-number rule, we pretend it's a
379 // positive number)
380 if (number < 0) {
381 if (negativeNumberRule) {
382 return negativeNumberRule;
383 } else {
384 number = -number;
385 }
386 }
387
388 // if the number isn't an integer, we use one of the fraction rules...
389 if (number != uprv_floor(number)) {
390 // if the number is between 0 and 1, return the proper
391 // fraction rule
392 if (number < 1 && fractionRules[1]) {
393 return fractionRules[1];
394 }
395 // otherwise, return the improper fraction rule
396 else if (fractionRules[0]) {
397 return fractionRules[0];
398 }
399 }
400
401 // if there's a master rule, use it to format the number
402 if (fractionRules[2]) {
403 return fractionRules[2];
404 }
405
406 // and if we haven't yet returned a rule, use findNormalRule()
407 // to find the applicable rule
408 int64_t r = util64_fromDouble(number + 0.5);
409 return findNormalRule(r);
410 }
411
412 NFRule *
findNormalRule(int64_t number) const413 NFRuleSet::findNormalRule(int64_t number) const
414 {
415 // if this is a fraction rule set, use findFractionRuleSetRule()
416 // to find the rule (we should only go into this clause if the
417 // value is 0)
418 if (fIsFractionRuleSet) {
419 return findFractionRuleSetRule((double)number);
420 }
421
422 // if the number is negative, return the negative-number rule
423 // (if there isn't one, pretend the number is positive)
424 if (number < 0) {
425 if (negativeNumberRule) {
426 return negativeNumberRule;
427 } else {
428 number = -number;
429 }
430 }
431
432 // we have to repeat the preceding two checks, even though we
433 // do them in findRule(), because the version of format() that
434 // takes a long bypasses findRule() and goes straight to this
435 // function. This function does skip the fraction rules since
436 // we know the value is an integer (it also skips the master
437 // rule, since it's considered a fraction rule. Skipping the
438 // master rule in this function is also how we avoid infinite
439 // recursion)
440
441 // {dlf} unfortunately this fails if there are no rules except
442 // special rules. If there are no rules, use the master rule.
443
444 // binary-search the rule list for the applicable rule
445 // (a rule is used for all values from its base value to
446 // the next rule's base value)
447 int32_t hi = rules.size();
448 if (hi > 0) {
449 int32_t lo = 0;
450
451 while (lo < hi) {
452 int32_t mid = (lo + hi) / 2;
453 if (rules[mid]->getBaseValue() == number) {
454 return rules[mid];
455 }
456 else if (rules[mid]->getBaseValue() > number) {
457 hi = mid;
458 }
459 else {
460 lo = mid + 1;
461 }
462 }
463 if (hi == 0) { // bad rule set, minimum base > 0
464 return NULL; // want to throw exception here
465 }
466
467 NFRule *result = rules[hi - 1];
468
469 // use shouldRollBack() to see whether we need to invoke the
470 // rollback rule (see shouldRollBack()'s documentation for
471 // an explanation of the rollback rule). If we do, roll back
472 // one rule and return that one instead of the one we'd normally
473 // return
474 if (result->shouldRollBack((double)number)) {
475 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
476 return NULL;
477 }
478 result = rules[hi - 2];
479 }
480 return result;
481 }
482 // else use the master rule
483 return fractionRules[2];
484 }
485
486 /**
487 * If this rule is a fraction rule set, this function is used by
488 * findRule() to select the most appropriate rule for formatting
489 * the number. Basically, the base value of each rule in the rule
490 * set is treated as the denominator of a fraction. Whichever
491 * denominator can produce the fraction closest in value to the
492 * number passed in is the result. If there's a tie, the earlier
493 * one in the list wins. (If there are two rules in a row with the
494 * same base value, the first one is used when the numerator of the
495 * fraction would be 1, and the second rule is used the rest of the
496 * time.
497 * @param number The number being formatted (which will always be
498 * a number between 0 and 1)
499 * @return The rule to use to format this number
500 */
501 NFRule*
findFractionRuleSetRule(double number) const502 NFRuleSet::findFractionRuleSetRule(double number) const
503 {
504 // the obvious way to do this (multiply the value being formatted
505 // by each rule's base value until you get an integral result)
506 // doesn't work because of rounding error. This method is more
507 // accurate
508
509 // find the least common multiple of the rules' base values
510 // and multiply this by the number being formatted. This is
511 // all the precision we need, and we can do all of the rest
512 // of the math using integer arithmetic
513 int64_t leastCommonMultiple = rules[0]->getBaseValue();
514 int64_t numerator;
515 {
516 for (uint32_t i = 1; i < rules.size(); ++i) {
517 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
518 }
519 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
520 }
521 // for each rule, do the following...
522 int64_t tempDifference;
523 int64_t difference = util64_fromDouble(uprv_maxMantissa());
524 int32_t winner = 0;
525 for (uint32_t i = 0; i < rules.size(); ++i) {
526 // "numerator" is the numerator of the fraction if the
527 // denominator is the LCD. The numerator if the rule's
528 // base value is the denominator is "numerator" times the
529 // base value divided bythe LCD. Here we check to see if
530 // that's an integer, and if not, how close it is to being
531 // an integer.
532 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
533
534
535 // normalize the result of the above calculation: we want
536 // the numerator's distance from the CLOSEST multiple
537 // of the LCD
538 if (leastCommonMultiple - tempDifference < tempDifference) {
539 tempDifference = leastCommonMultiple - tempDifference;
540 }
541
542 // if this is as close as we've come, keep track of how close
543 // that is, and the line number of the rule that did it. If
544 // we've scored a direct hit, we don't have to look at any more
545 // rules
546 if (tempDifference < difference) {
547 difference = tempDifference;
548 winner = i;
549 if (difference == 0) {
550 break;
551 }
552 }
553 }
554
555 // if we have two successive rules that both have the winning base
556 // value, then the first one (the one we found above) is used if
557 // the numerator of the fraction is 1 and the second one is used if
558 // the numerator of the fraction is anything else (this lets us
559 // do things like "one third"/"two thirds" without haveing to define
560 // a whole bunch of extra rule sets)
561 if ((unsigned)(winner + 1) < rules.size() &&
562 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
563 double n = ((double)rules[winner]->getBaseValue()) * number;
564 if (n < 0.5 || n >= 2) {
565 ++winner;
566 }
567 }
568
569 // finally, return the winning rule
570 return rules[winner];
571 }
572
573 /**
574 * Parses a string. Matches the string to be parsed against each
575 * of its rules (with a base value less than upperBound) and returns
576 * the value produced by the rule that matched the most charcters
577 * in the source string.
578 * @param text The string to parse
579 * @param parsePosition The initial position is ignored and assumed
580 * to be 0. On exit, this object has been updated to point to the
581 * first character position this rule set didn't consume.
582 * @param upperBound Limits the rules that can be allowed to match.
583 * Only rules whose base values are strictly less than upperBound
584 * are considered.
585 * @return The numerical result of parsing this string. This will
586 * be the matching rule's base value, composed appropriately with
587 * the results of matching any of its substitutions. The object
588 * will be an instance of Long if it's an integral value; otherwise,
589 * it will be an instance of Double. This function always returns
590 * a valid object: If nothing matched the input string at all,
591 * this function returns new Long(0), and the parse position is
592 * left unchanged.
593 */
594 #ifdef RBNF_DEBUG
595 #include <stdio.h>
596
dumpUS(FILE * f,const UnicodeString & us)597 static void dumpUS(FILE* f, const UnicodeString& us) {
598 int len = us.length();
599 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
600 if (buf != NULL) {
601 us.extract(0, len, buf);
602 buf[len] = 0;
603 fprintf(f, "%s", buf);
604 uprv_free(buf); //delete[] buf;
605 }
606 }
607 #endif
608
609 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const610 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
611 {
612 // try matching each rule in the rule set against the text being
613 // parsed. Whichever one matches the most characters is the one
614 // that determines the value we return.
615
616 result.setLong(0);
617
618 // dump out if there's no text to parse
619 if (text.length() == 0) {
620 return 0;
621 }
622
623 ParsePosition highWaterMark;
624 ParsePosition workingPos = pos;
625
626 #ifdef RBNF_DEBUG
627 fprintf(stderr, "<nfrs> %x '", this);
628 dumpUS(stderr, name);
629 fprintf(stderr, "' text '");
630 dumpUS(stderr, text);
631 fprintf(stderr, "'\n");
632 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
633 #endif
634
635 // start by trying the negative number rule (if there is one)
636 if (negativeNumberRule) {
637 Formattable tempResult;
638 #ifdef RBNF_DEBUG
639 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
640 #endif
641 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
642 #ifdef RBNF_DEBUG
643 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
644 #endif
645 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
646 result = tempResult;
647 highWaterMark = workingPos;
648 }
649 workingPos = pos;
650 }
651 #ifdef RBNF_DEBUG
652 fprintf(stderr, "<nfrs> continue fractional with text '");
653 dumpUS(stderr, text);
654 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
655 #endif
656 // then try each of the fraction rules
657 {
658 for (int i = 0; i < 3; i++) {
659 if (fractionRules[i]) {
660 Formattable tempResult;
661 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
662 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
663 result = tempResult;
664 highWaterMark = workingPos;
665 }
666 workingPos = pos;
667 }
668 }
669 }
670 #ifdef RBNF_DEBUG
671 fprintf(stderr, "<nfrs> continue other with text '");
672 dumpUS(stderr, text);
673 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
674 #endif
675
676 // finally, go through the regular rules one at a time. We start
677 // at the end of the list because we want to try matching the most
678 // sigificant rule first (this helps ensure that we parse
679 // "five thousand three hundred six" as
680 // "(five thousand) (three hundred) (six)" rather than
681 // "((five thousand three) hundred) (six)"). Skip rules whose
682 // base values are higher than the upper bound (again, this helps
683 // limit ambiguity by making sure the rules that match a rule's
684 // are less significant than the rule containing the substitutions)/
685 {
686 int64_t ub = util64_fromDouble(upperBound);
687 #ifdef RBNF_DEBUG
688 {
689 char ubstr[64];
690 util64_toa(ub, ubstr, 64);
691 char ubstrhex[64];
692 util64_toa(ub, ubstrhex, 64, 16);
693 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
694 }
695 #endif
696 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
697 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
698 continue;
699 }
700 Formattable tempResult;
701 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
702 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
703 result = tempResult;
704 highWaterMark = workingPos;
705 }
706 workingPos = pos;
707 }
708 }
709 #ifdef RBNF_DEBUG
710 fprintf(stderr, "<nfrs> exit\n");
711 #endif
712 // finally, update the parse postion we were passed to point to the
713 // first character we didn't use, and return the result that
714 // corresponds to that string of characters
715 pos = highWaterMark;
716
717 return 1;
718 }
719
720 void
appendRules(UnicodeString & result) const721 NFRuleSet::appendRules(UnicodeString& result) const
722 {
723 // the rule set name goes first...
724 result.append(name);
725 result.append(gColon);
726 result.append(gLineFeed);
727
728 // followed by the regular rules...
729 for (uint32_t i = 0; i < rules.size(); i++) {
730 result.append(gFourSpaces, 4);
731 rules[i]->_appendRuleText(result);
732 result.append(gLineFeed);
733 }
734
735 // followed by the special rules (if they exist)
736 if (negativeNumberRule) {
737 result.append(gFourSpaces, 4);
738 negativeNumberRule->_appendRuleText(result);
739 result.append(gLineFeed);
740 }
741
742 {
743 for (uint32_t i = 0; i < 3; ++i) {
744 if (fractionRules[i]) {
745 result.append(gFourSpaces, 4);
746 fractionRules[i]->_appendRuleText(result);
747 result.append(gLineFeed);
748 }
749 }
750 }
751 }
752
753 // utility functions
754
util64_fromDouble(double d)755 int64_t util64_fromDouble(double d) {
756 int64_t result = 0;
757 if (!uprv_isNaN(d)) {
758 double mant = uprv_maxMantissa();
759 if (d < -mant) {
760 d = -mant;
761 } else if (d > mant) {
762 d = mant;
763 }
764 UBool neg = d < 0;
765 if (neg) {
766 d = -d;
767 }
768 result = (int64_t)uprv_floor(d);
769 if (neg) {
770 result = -result;
771 }
772 }
773 return result;
774 }
775
util64_pow(int32_t r,uint32_t e)776 int64_t util64_pow(int32_t r, uint32_t e) {
777 if (r == 0) {
778 return 0;
779 } else if (e == 0) {
780 return 1;
781 } else {
782 int64_t n = r;
783 while (--e > 0) {
784 n *= r;
785 }
786 return n;
787 }
788 }
789
790 static const uint8_t asciiDigits[] = {
791 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
792 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
793 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
794 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
795 0x77u, 0x78u, 0x79u, 0x7au,
796 };
797
798 static const UChar kUMinus = (UChar)0x002d;
799
800 #ifdef RBNF_DEBUG
801 static const char kMinus = '-';
802
803 static const uint8_t digitInfo[] = {
804 0, 0, 0, 0, 0, 0, 0, 0,
805 0, 0, 0, 0, 0, 0, 0, 0,
806 0, 0, 0, 0, 0, 0, 0, 0,
807 0, 0, 0, 0, 0, 0, 0, 0,
808 0, 0, 0, 0, 0, 0, 0, 0,
809 0, 0, 0, 0, 0, 0, 0, 0,
810 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
811 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
812 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
813 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
814 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
815 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
816 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
817 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
818 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
819 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
820 };
821
util64_atoi(const char * str,uint32_t radix)822 int64_t util64_atoi(const char* str, uint32_t radix)
823 {
824 if (radix > 36) {
825 radix = 36;
826 } else if (radix < 2) {
827 radix = 2;
828 }
829 int64_t lradix = radix;
830
831 int neg = 0;
832 if (*str == kMinus) {
833 ++str;
834 neg = 1;
835 }
836 int64_t result = 0;
837 uint8_t b;
838 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
839 result *= lradix;
840 result += (int32_t)b;
841 }
842 if (neg) {
843 result = -result;
844 }
845 return result;
846 }
847
util64_utoi(const UChar * str,uint32_t radix)848 int64_t util64_utoi(const UChar* str, uint32_t radix)
849 {
850 if (radix > 36) {
851 radix = 36;
852 } else if (radix < 2) {
853 radix = 2;
854 }
855 int64_t lradix = radix;
856
857 int neg = 0;
858 if (*str == kUMinus) {
859 ++str;
860 neg = 1;
861 }
862 int64_t result = 0;
863 UChar c;
864 uint8_t b;
865 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
866 result *= lradix;
867 result += (int32_t)b;
868 }
869 if (neg) {
870 result = -result;
871 }
872 return result;
873 }
874
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)875 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
876 {
877 if (radix > 36) {
878 radix = 36;
879 } else if (radix < 2) {
880 radix = 2;
881 }
882 int64_t base = radix;
883
884 char* p = buf;
885 if (len && (w < 0) && (radix == 10) && !raw) {
886 w = -w;
887 *p++ = kMinus;
888 --len;
889 } else if (len && (w == 0)) {
890 *p++ = (char)raw ? 0 : asciiDigits[0];
891 --len;
892 }
893
894 while (len && w != 0) {
895 int64_t n = w / base;
896 int64_t m = n * base;
897 int32_t d = (int32_t)(w-m);
898 *p++ = raw ? (char)d : asciiDigits[d];
899 w = n;
900 --len;
901 }
902 if (len) {
903 *p = 0; // null terminate if room for caller convenience
904 }
905
906 len = p - buf;
907 if (*buf == kMinus) {
908 ++buf;
909 }
910 while (--p > buf) {
911 char c = *p;
912 *p = *buf;
913 *buf = c;
914 ++buf;
915 }
916
917 return len;
918 }
919 #endif
920
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)921 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
922 {
923 if (radix > 36) {
924 radix = 36;
925 } else if (radix < 2) {
926 radix = 2;
927 }
928 int64_t base = radix;
929
930 UChar* p = buf;
931 if (len && (w < 0) && (radix == 10) && !raw) {
932 w = -w;
933 *p++ = kUMinus;
934 --len;
935 } else if (len && (w == 0)) {
936 *p++ = (UChar)raw ? 0 : asciiDigits[0];
937 --len;
938 }
939
940 while (len && (w != 0)) {
941 int64_t n = w / base;
942 int64_t m = n * base;
943 int32_t d = (int32_t)(w-m);
944 *p++ = (UChar)(raw ? d : asciiDigits[d]);
945 w = n;
946 --len;
947 }
948 if (len) {
949 *p = 0; // null terminate if room for caller convenience
950 }
951
952 len = (uint32_t)(p - buf);
953 if (*buf == kUMinus) {
954 ++buf;
955 }
956 while (--p > buf) {
957 UChar c = *p;
958 *p = *buf;
959 *buf = c;
960 ++buf;
961 }
962
963 return len;
964 }
965
966
967 U_NAMESPACE_END
968
969 /* U_HAVE_RBNF */
970 #endif
971
972