1 /*
2 ******************************************************************************
3 * Copyright (C) 1997-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 ******************************************************************************
6 * file name: nfrs.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * Modification history
12 * Date Name Comments
13 * 10/11/2001 Doug Ported from ICU4J
14 */
15
16 #include "nfrs.h"
17
18 #if U_HAVE_RBNF
19
20 #include "unicode/uchar.h"
21 #include "nfrule.h"
22 #include "nfrlist.h"
23 #include "patternprops.h"
24
25 #ifdef RBNF_DEBUG
26 #include "cmemory.h"
27 #endif
28
29 U_NAMESPACE_BEGIN
30
31 #if 0
32 // euclid's algorithm works with doubles
33 // note, doubles only get us up to one quadrillion or so, which
34 // isn't as much range as we get with longs. We probably still
35 // want either 64-bit math, or BigInteger.
36
37 static int64_t
38 util_lcm(int64_t x, int64_t y)
39 {
40 x.abs();
41 y.abs();
42
43 if (x == 0 || y == 0) {
44 return 0;
45 } else {
46 do {
47 if (x < y) {
48 int64_t t = x; x = y; y = t;
49 }
50 x -= y * (x/y);
51 } while (x != 0);
52
53 return y;
54 }
55 }
56
57 #else
58 /**
59 * Calculates the least common multiple of x and y.
60 */
61 static int64_t
util_lcm(int64_t x,int64_t y)62 util_lcm(int64_t x, int64_t y)
63 {
64 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
65 // vol. 2, 1st ed., pp. 298-299
66 int64_t x1 = x;
67 int64_t y1 = y;
68
69 int p2 = 0;
70 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
71 ++p2;
72 x1 >>= 1;
73 y1 >>= 1;
74 }
75
76 int64_t t;
77 if ((x1 & 1) == 1) {
78 t = -y1;
79 } else {
80 t = x1;
81 }
82
83 while (t != 0) {
84 while ((t & 1) == 0) {
85 t = t >> 1;
86 }
87 if (t > 0) {
88 x1 = t;
89 } else {
90 y1 = -t;
91 }
92 t = x1 - y1;
93 }
94
95 int64_t gcd = x1 << p2;
96
97 // x * y == gcd(x, y) * lcm(x, y)
98 return x / gcd * y;
99 }
100 #endif
101
102 static const UChar gPercent = 0x0025;
103 static const UChar gColon = 0x003a;
104 static const UChar gSemicolon = 0x003b;
105 static const UChar gLineFeed = 0x000a;
106
107 static const UChar gFourSpaces[] =
108 {
109 0x20, 0x20, 0x20, 0x20, 0
110 }; /* " " */
111 static const UChar gPercentPercent[] =
112 {
113 0x25, 0x25, 0
114 }; /* "%%" */
115
NFRuleSet(UnicodeString * descriptions,int32_t index,UErrorCode & status)116 NFRuleSet::NFRuleSet(UnicodeString* descriptions, int32_t index, UErrorCode& status)
117 : name()
118 , rules(0)
119 , negativeNumberRule(NULL)
120 , fIsFractionRuleSet(FALSE)
121 , fIsPublic(FALSE)
122 , fRecursionCount(0)
123 {
124 for (int i = 0; i < 3; ++i) {
125 fractionRules[i] = NULL;
126 }
127
128 if (U_FAILURE(status)) {
129 return;
130 }
131
132 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
133
134 if (description.length() == 0) {
135 // throw new IllegalArgumentException("Empty rule set description");
136 status = U_PARSE_ERROR;
137 return;
138 }
139
140 // if the description begins with a rule set name (the rule set
141 // name can be omitted in formatter descriptions that consist
142 // of only one rule set), copy it out into our "name" member
143 // and delete it from the description
144 if (description.charAt(0) == gPercent) {
145 int32_t pos = description.indexOf(gColon);
146 if (pos == -1) {
147 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
148 status = U_PARSE_ERROR;
149 } else {
150 name.setTo(description, 0, pos);
151 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
152 }
153 description.remove(0, pos);
154 }
155 } else {
156 name.setTo(UNICODE_STRING_SIMPLE("%default"));
157 }
158
159 if (description.length() == 0) {
160 // throw new IllegalArgumentException("Empty rule set description");
161 status = U_PARSE_ERROR;
162 }
163
164 fIsPublic = name.indexOf(gPercentPercent) != 0;
165
166 // all of the other members of NFRuleSet are initialized
167 // by parseRules()
168 }
169
170 void
parseRules(UnicodeString & description,const RuleBasedNumberFormat * owner,UErrorCode & status)171 NFRuleSet::parseRules(UnicodeString& description, const RuleBasedNumberFormat* owner, UErrorCode& status)
172 {
173 // start by creating a Vector whose elements are Strings containing
174 // the descriptions of the rules (one rule per element). The rules
175 // are separated by semicolons (there's no escape facility: ALL
176 // semicolons are rule delimiters)
177
178 if (U_FAILURE(status)) {
179 return;
180 }
181
182 // dlf - the original code kept a separate description array for no reason,
183 // so I got rid of it. The loop was too complex so I simplified it.
184
185 UnicodeString currentDescription;
186 int32_t oldP = 0;
187 while (oldP < description.length()) {
188 int32_t p = description.indexOf(gSemicolon, oldP);
189 if (p == -1) {
190 p = description.length();
191 }
192 currentDescription.setTo(description, oldP, p - oldP);
193 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
194 oldP = p + 1;
195 }
196
197 // for rules that didn't specify a base value, their base values
198 // were initialized to 0. Make another pass through the list and
199 // set all those rules' base values. We also remove any special
200 // rules from the list and put them into their own member variables
201 int64_t defaultBaseValue = 0;
202
203 // (this isn't a for loop because we might be deleting items from
204 // the vector-- we want to make sure we only increment i when
205 // we _didn't_ delete aything from the vector)
206 uint32_t i = 0;
207 while (i < rules.size()) {
208 NFRule* rule = rules[i];
209
210 switch (rule->getType()) {
211 // if the rule's base value is 0, fill in a default
212 // base value (this will be 1 plus the preceding
213 // rule's base value for regular rule sets, and the
214 // same as the preceding rule's base value in fraction
215 // rule sets)
216 case NFRule::kNoBase:
217 rule->setBaseValue(defaultBaseValue, status);
218 if (!isFractionRuleSet()) {
219 ++defaultBaseValue;
220 }
221 ++i;
222 break;
223
224 // if it's the negative-number rule, copy it into its own
225 // data member and delete it from the list
226 case NFRule::kNegativeNumberRule:
227 negativeNumberRule = rules.remove(i);
228 break;
229
230 // if it's the improper fraction rule, copy it into the
231 // correct element of fractionRules
232 case NFRule::kImproperFractionRule:
233 fractionRules[0] = rules.remove(i);
234 break;
235
236 // if it's the proper fraction rule, copy it into the
237 // correct element of fractionRules
238 case NFRule::kProperFractionRule:
239 fractionRules[1] = rules.remove(i);
240 break;
241
242 // if it's the master rule, copy it into the
243 // correct element of fractionRules
244 case NFRule::kMasterRule:
245 fractionRules[2] = rules.remove(i);
246 break;
247
248 // if it's a regular rule that already knows its base value,
249 // check to make sure the rules are in order, and update
250 // the default base value for the next rule
251 default:
252 if (rule->getBaseValue() < defaultBaseValue) {
253 // throw new IllegalArgumentException("Rules are not in order");
254 status = U_PARSE_ERROR;
255 return;
256 }
257 defaultBaseValue = rule->getBaseValue();
258 if (!isFractionRuleSet()) {
259 ++defaultBaseValue;
260 }
261 ++i;
262 break;
263 }
264 }
265 }
266
~NFRuleSet()267 NFRuleSet::~NFRuleSet()
268 {
269 delete negativeNumberRule;
270 delete fractionRules[0];
271 delete fractionRules[1];
272 delete fractionRules[2];
273 }
274
275 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)276 util_equalRules(const NFRule* rule1, const NFRule* rule2)
277 {
278 if (rule1) {
279 if (rule2) {
280 return *rule1 == *rule2;
281 }
282 } else if (!rule2) {
283 return TRUE;
284 }
285 return FALSE;
286 }
287
288 UBool
operator ==(const NFRuleSet & rhs) const289 NFRuleSet::operator==(const NFRuleSet& rhs) const
290 {
291 if (rules.size() == rhs.rules.size() &&
292 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
293 name == rhs.name &&
294 util_equalRules(negativeNumberRule, rhs.negativeNumberRule) &&
295 util_equalRules(fractionRules[0], rhs.fractionRules[0]) &&
296 util_equalRules(fractionRules[1], rhs.fractionRules[1]) &&
297 util_equalRules(fractionRules[2], rhs.fractionRules[2])) {
298
299 for (uint32_t i = 0; i < rules.size(); ++i) {
300 if (*rules[i] != *rhs.rules[i]) {
301 return FALSE;
302 }
303 }
304 return TRUE;
305 }
306 return FALSE;
307 }
308
309 #define RECURSION_LIMIT 50
310
311 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos) const312 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos) const
313 {
314 NFRule *rule = findNormalRule(number);
315 if (rule) { // else error, but can't report it
316 NFRuleSet* ncThis = (NFRuleSet*)this;
317 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
318 // stop recursion
319 ncThis->fRecursionCount = 0;
320 } else {
321 rule->doFormat(number, toAppendTo, pos);
322 ncThis->fRecursionCount--;
323 }
324 }
325 }
326
327 void
format(double number,UnicodeString & toAppendTo,int32_t pos) const328 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos) const
329 {
330 NFRule *rule = findDoubleRule(number);
331 if (rule) { // else error, but can't report it
332 NFRuleSet* ncThis = (NFRuleSet*)this;
333 if (ncThis->fRecursionCount++ >= RECURSION_LIMIT) {
334 // stop recursion
335 ncThis->fRecursionCount = 0;
336 } else {
337 rule->doFormat(number, toAppendTo, pos);
338 ncThis->fRecursionCount--;
339 }
340 }
341 }
342
343 NFRule*
findDoubleRule(double number) const344 NFRuleSet::findDoubleRule(double number) const
345 {
346 // if this is a fraction rule set, use findFractionRuleSetRule()
347 if (isFractionRuleSet()) {
348 return findFractionRuleSetRule(number);
349 }
350
351 // if the number is negative, return the negative number rule
352 // (if there isn't a negative-number rule, we pretend it's a
353 // positive number)
354 if (number < 0) {
355 if (negativeNumberRule) {
356 return negativeNumberRule;
357 } else {
358 number = -number;
359 }
360 }
361
362 // if the number isn't an integer, we use one of the fraction rules...
363 if (number != uprv_floor(number)) {
364 // if the number is between 0 and 1, return the proper
365 // fraction rule
366 if (number < 1 && fractionRules[1]) {
367 return fractionRules[1];
368 }
369 // otherwise, return the improper fraction rule
370 else if (fractionRules[0]) {
371 return fractionRules[0];
372 }
373 }
374
375 // if there's a master rule, use it to format the number
376 if (fractionRules[2]) {
377 return fractionRules[2];
378 }
379
380 // and if we haven't yet returned a rule, use findNormalRule()
381 // to find the applicable rule
382 int64_t r = util64_fromDouble(number + 0.5);
383 return findNormalRule(r);
384 }
385
386 NFRule *
findNormalRule(int64_t number) const387 NFRuleSet::findNormalRule(int64_t number) const
388 {
389 // if this is a fraction rule set, use findFractionRuleSetRule()
390 // to find the rule (we should only go into this clause if the
391 // value is 0)
392 if (fIsFractionRuleSet) {
393 return findFractionRuleSetRule((double)number);
394 }
395
396 // if the number is negative, return the negative-number rule
397 // (if there isn't one, pretend the number is positive)
398 if (number < 0) {
399 if (negativeNumberRule) {
400 return negativeNumberRule;
401 } else {
402 number = -number;
403 }
404 }
405
406 // we have to repeat the preceding two checks, even though we
407 // do them in findRule(), because the version of format() that
408 // takes a long bypasses findRule() and goes straight to this
409 // function. This function does skip the fraction rules since
410 // we know the value is an integer (it also skips the master
411 // rule, since it's considered a fraction rule. Skipping the
412 // master rule in this function is also how we avoid infinite
413 // recursion)
414
415 // {dlf} unfortunately this fails if there are no rules except
416 // special rules. If there are no rules, use the master rule.
417
418 // binary-search the rule list for the applicable rule
419 // (a rule is used for all values from its base value to
420 // the next rule's base value)
421 int32_t hi = rules.size();
422 if (hi > 0) {
423 int32_t lo = 0;
424
425 while (lo < hi) {
426 int32_t mid = (lo + hi) / 2;
427 if (rules[mid]->getBaseValue() == number) {
428 return rules[mid];
429 }
430 else if (rules[mid]->getBaseValue() > number) {
431 hi = mid;
432 }
433 else {
434 lo = mid + 1;
435 }
436 }
437 if (hi == 0) { // bad rule set, minimum base > 0
438 return NULL; // want to throw exception here
439 }
440
441 NFRule *result = rules[hi - 1];
442
443 // use shouldRollBack() to see whether we need to invoke the
444 // rollback rule (see shouldRollBack()'s documentation for
445 // an explanation of the rollback rule). If we do, roll back
446 // one rule and return that one instead of the one we'd normally
447 // return
448 if (result->shouldRollBack((double)number)) {
449 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
450 return NULL;
451 }
452 result = rules[hi - 2];
453 }
454 return result;
455 }
456 // else use the master rule
457 return fractionRules[2];
458 }
459
460 /**
461 * If this rule is a fraction rule set, this function is used by
462 * findRule() to select the most appropriate rule for formatting
463 * the number. Basically, the base value of each rule in the rule
464 * set is treated as the denominator of a fraction. Whichever
465 * denominator can produce the fraction closest in value to the
466 * number passed in is the result. If there's a tie, the earlier
467 * one in the list wins. (If there are two rules in a row with the
468 * same base value, the first one is used when the numerator of the
469 * fraction would be 1, and the second rule is used the rest of the
470 * time.
471 * @param number The number being formatted (which will always be
472 * a number between 0 and 1)
473 * @return The rule to use to format this number
474 */
475 NFRule*
findFractionRuleSetRule(double number) const476 NFRuleSet::findFractionRuleSetRule(double number) const
477 {
478 // the obvious way to do this (multiply the value being formatted
479 // by each rule's base value until you get an integral result)
480 // doesn't work because of rounding error. This method is more
481 // accurate
482
483 // find the least common multiple of the rules' base values
484 // and multiply this by the number being formatted. This is
485 // all the precision we need, and we can do all of the rest
486 // of the math using integer arithmetic
487 int64_t leastCommonMultiple = rules[0]->getBaseValue();
488 int64_t numerator;
489 {
490 for (uint32_t i = 1; i < rules.size(); ++i) {
491 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
492 }
493 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
494 }
495 // for each rule, do the following...
496 int64_t tempDifference;
497 int64_t difference = util64_fromDouble(uprv_maxMantissa());
498 int32_t winner = 0;
499 for (uint32_t i = 0; i < rules.size(); ++i) {
500 // "numerator" is the numerator of the fraction if the
501 // denominator is the LCD. The numerator if the rule's
502 // base value is the denominator is "numerator" times the
503 // base value divided bythe LCD. Here we check to see if
504 // that's an integer, and if not, how close it is to being
505 // an integer.
506 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
507
508
509 // normalize the result of the above calculation: we want
510 // the numerator's distance from the CLOSEST multiple
511 // of the LCD
512 if (leastCommonMultiple - tempDifference < tempDifference) {
513 tempDifference = leastCommonMultiple - tempDifference;
514 }
515
516 // if this is as close as we've come, keep track of how close
517 // that is, and the line number of the rule that did it. If
518 // we've scored a direct hit, we don't have to look at any more
519 // rules
520 if (tempDifference < difference) {
521 difference = tempDifference;
522 winner = i;
523 if (difference == 0) {
524 break;
525 }
526 }
527 }
528
529 // if we have two successive rules that both have the winning base
530 // value, then the first one (the one we found above) is used if
531 // the numerator of the fraction is 1 and the second one is used if
532 // the numerator of the fraction is anything else (this lets us
533 // do things like "one third"/"two thirds" without haveing to define
534 // a whole bunch of extra rule sets)
535 if ((unsigned)(winner + 1) < rules.size() &&
536 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
537 double n = ((double)rules[winner]->getBaseValue()) * number;
538 if (n < 0.5 || n >= 2) {
539 ++winner;
540 }
541 }
542
543 // finally, return the winning rule
544 return rules[winner];
545 }
546
547 /**
548 * Parses a string. Matches the string to be parsed against each
549 * of its rules (with a base value less than upperBound) and returns
550 * the value produced by the rule that matched the most charcters
551 * in the source string.
552 * @param text The string to parse
553 * @param parsePosition The initial position is ignored and assumed
554 * to be 0. On exit, this object has been updated to point to the
555 * first character position this rule set didn't consume.
556 * @param upperBound Limits the rules that can be allowed to match.
557 * Only rules whose base values are strictly less than upperBound
558 * are considered.
559 * @return The numerical result of parsing this string. This will
560 * be the matching rule's base value, composed appropriately with
561 * the results of matching any of its substitutions. The object
562 * will be an instance of Long if it's an integral value; otherwise,
563 * it will be an instance of Double. This function always returns
564 * a valid object: If nothing matched the input string at all,
565 * this function returns new Long(0), and the parse position is
566 * left unchanged.
567 */
568 #ifdef RBNF_DEBUG
569 #include <stdio.h>
570
dumpUS(FILE * f,const UnicodeString & us)571 static void dumpUS(FILE* f, const UnicodeString& us) {
572 int len = us.length();
573 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
574 if (buf != NULL) {
575 us.extract(0, len, buf);
576 buf[len] = 0;
577 fprintf(f, "%s", buf);
578 uprv_free(buf); //delete[] buf;
579 }
580 }
581 #endif
582
583 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const584 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
585 {
586 // try matching each rule in the rule set against the text being
587 // parsed. Whichever one matches the most characters is the one
588 // that determines the value we return.
589
590 result.setLong(0);
591
592 // dump out if there's no text to parse
593 if (text.length() == 0) {
594 return 0;
595 }
596
597 ParsePosition highWaterMark;
598 ParsePosition workingPos = pos;
599
600 #ifdef RBNF_DEBUG
601 fprintf(stderr, "<nfrs> %x '", this);
602 dumpUS(stderr, name);
603 fprintf(stderr, "' text '");
604 dumpUS(stderr, text);
605 fprintf(stderr, "'\n");
606 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
607 #endif
608
609 // start by trying the negative number rule (if there is one)
610 if (negativeNumberRule) {
611 Formattable tempResult;
612 #ifdef RBNF_DEBUG
613 fprintf(stderr, " <nfrs before negative> %x ub: %g\n", negativeNumberRule, upperBound);
614 #endif
615 UBool success = negativeNumberRule->doParse(text, workingPos, 0, upperBound, tempResult);
616 #ifdef RBNF_DEBUG
617 fprintf(stderr, " <nfrs after negative> success: %d wpi: %d\n", success, workingPos.getIndex());
618 #endif
619 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
620 result = tempResult;
621 highWaterMark = workingPos;
622 }
623 workingPos = pos;
624 }
625 #ifdef RBNF_DEBUG
626 fprintf(stderr, "<nfrs> continue fractional with text '");
627 dumpUS(stderr, text);
628 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
629 #endif
630 // then try each of the fraction rules
631 {
632 for (int i = 0; i < 3; i++) {
633 if (fractionRules[i]) {
634 Formattable tempResult;
635 UBool success = fractionRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
636 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
637 result = tempResult;
638 highWaterMark = workingPos;
639 }
640 workingPos = pos;
641 }
642 }
643 }
644 #ifdef RBNF_DEBUG
645 fprintf(stderr, "<nfrs> continue other with text '");
646 dumpUS(stderr, text);
647 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
648 #endif
649
650 // finally, go through the regular rules one at a time. We start
651 // at the end of the list because we want to try matching the most
652 // sigificant rule first (this helps ensure that we parse
653 // "five thousand three hundred six" as
654 // "(five thousand) (three hundred) (six)" rather than
655 // "((five thousand three) hundred) (six)"). Skip rules whose
656 // base values are higher than the upper bound (again, this helps
657 // limit ambiguity by making sure the rules that match a rule's
658 // are less significant than the rule containing the substitutions)/
659 {
660 int64_t ub = util64_fromDouble(upperBound);
661 #ifdef RBNF_DEBUG
662 {
663 char ubstr[64];
664 util64_toa(ub, ubstr, 64);
665 char ubstrhex[64];
666 util64_toa(ub, ubstrhex, 64, 16);
667 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
668 }
669 #endif
670 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
671 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
672 continue;
673 }
674 Formattable tempResult;
675 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
676 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
677 result = tempResult;
678 highWaterMark = workingPos;
679 }
680 workingPos = pos;
681 }
682 }
683 #ifdef RBNF_DEBUG
684 fprintf(stderr, "<nfrs> exit\n");
685 #endif
686 // finally, update the parse postion we were passed to point to the
687 // first character we didn't use, and return the result that
688 // corresponds to that string of characters
689 pos = highWaterMark;
690
691 return 1;
692 }
693
694 void
appendRules(UnicodeString & result) const695 NFRuleSet::appendRules(UnicodeString& result) const
696 {
697 // the rule set name goes first...
698 result.append(name);
699 result.append(gColon);
700 result.append(gLineFeed);
701
702 // followed by the regular rules...
703 for (uint32_t i = 0; i < rules.size(); i++) {
704 result.append(gFourSpaces);
705 rules[i]->_appendRuleText(result);
706 result.append(gLineFeed);
707 }
708
709 // followed by the special rules (if they exist)
710 if (negativeNumberRule) {
711 result.append(gFourSpaces);
712 negativeNumberRule->_appendRuleText(result);
713 result.append(gLineFeed);
714 }
715
716 {
717 for (uint32_t i = 0; i < 3; ++i) {
718 if (fractionRules[i]) {
719 result.append(gFourSpaces);
720 fractionRules[i]->_appendRuleText(result);
721 result.append(gLineFeed);
722 }
723 }
724 }
725 }
726
727 // utility functions
728
util64_fromDouble(double d)729 int64_t util64_fromDouble(double d) {
730 int64_t result = 0;
731 if (!uprv_isNaN(d)) {
732 double mant = uprv_maxMantissa();
733 if (d < -mant) {
734 d = -mant;
735 } else if (d > mant) {
736 d = mant;
737 }
738 UBool neg = d < 0;
739 if (neg) {
740 d = -d;
741 }
742 result = (int64_t)uprv_floor(d);
743 if (neg) {
744 result = -result;
745 }
746 }
747 return result;
748 }
749
util64_pow(int32_t r,uint32_t e)750 int64_t util64_pow(int32_t r, uint32_t e) {
751 if (r == 0) {
752 return 0;
753 } else if (e == 0) {
754 return 1;
755 } else {
756 int64_t n = r;
757 while (--e > 0) {
758 n *= r;
759 }
760 return n;
761 }
762 }
763
764 static const uint8_t asciiDigits[] = {
765 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
766 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
767 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
768 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
769 0x77u, 0x78u, 0x79u, 0x7au,
770 };
771
772 static const UChar kUMinus = (UChar)0x002d;
773
774 #ifdef RBNF_DEBUG
775 static const char kMinus = '-';
776
777 static const uint8_t digitInfo[] = {
778 0, 0, 0, 0, 0, 0, 0, 0,
779 0, 0, 0, 0, 0, 0, 0, 0,
780 0, 0, 0, 0, 0, 0, 0, 0,
781 0, 0, 0, 0, 0, 0, 0, 0,
782 0, 0, 0, 0, 0, 0, 0, 0,
783 0, 0, 0, 0, 0, 0, 0, 0,
784 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
785 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
786 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
787 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
788 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
789 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
790 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
791 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
792 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
793 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
794 };
795
util64_atoi(const char * str,uint32_t radix)796 int64_t util64_atoi(const char* str, uint32_t radix)
797 {
798 if (radix > 36) {
799 radix = 36;
800 } else if (radix < 2) {
801 radix = 2;
802 }
803 int64_t lradix = radix;
804
805 int neg = 0;
806 if (*str == kMinus) {
807 ++str;
808 neg = 1;
809 }
810 int64_t result = 0;
811 uint8_t b;
812 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
813 result *= lradix;
814 result += (int32_t)b;
815 }
816 if (neg) {
817 result = -result;
818 }
819 return result;
820 }
821
util64_utoi(const UChar * str,uint32_t radix)822 int64_t util64_utoi(const UChar* str, uint32_t radix)
823 {
824 if (radix > 36) {
825 radix = 36;
826 } else if (radix < 2) {
827 radix = 2;
828 }
829 int64_t lradix = radix;
830
831 int neg = 0;
832 if (*str == kUMinus) {
833 ++str;
834 neg = 1;
835 }
836 int64_t result = 0;
837 UChar c;
838 uint8_t b;
839 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
840 result *= lradix;
841 result += (int32_t)b;
842 }
843 if (neg) {
844 result = -result;
845 }
846 return result;
847 }
848
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)849 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
850 {
851 if (radix > 36) {
852 radix = 36;
853 } else if (radix < 2) {
854 radix = 2;
855 }
856 int64_t base = radix;
857
858 char* p = buf;
859 if (len && (w < 0) && (radix == 10) && !raw) {
860 w = -w;
861 *p++ = kMinus;
862 --len;
863 } else if (len && (w == 0)) {
864 *p++ = (char)raw ? 0 : asciiDigits[0];
865 --len;
866 }
867
868 while (len && w != 0) {
869 int64_t n = w / base;
870 int64_t m = n * base;
871 int32_t d = (int32_t)(w-m);
872 *p++ = raw ? (char)d : asciiDigits[d];
873 w = n;
874 --len;
875 }
876 if (len) {
877 *p = 0; // null terminate if room for caller convenience
878 }
879
880 len = p - buf;
881 if (*buf == kMinus) {
882 ++buf;
883 }
884 while (--p > buf) {
885 char c = *p;
886 *p = *buf;
887 *buf = c;
888 ++buf;
889 }
890
891 return len;
892 }
893 #endif
894
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)895 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
896 {
897 if (radix > 36) {
898 radix = 36;
899 } else if (radix < 2) {
900 radix = 2;
901 }
902 int64_t base = radix;
903
904 UChar* p = buf;
905 if (len && (w < 0) && (radix == 10) && !raw) {
906 w = -w;
907 *p++ = kUMinus;
908 --len;
909 } else if (len && (w == 0)) {
910 *p++ = (UChar)raw ? 0 : asciiDigits[0];
911 --len;
912 }
913
914 while (len && (w != 0)) {
915 int64_t n = w / base;
916 int64_t m = n * base;
917 int32_t d = (int32_t)(w-m);
918 *p++ = (UChar)(raw ? d : asciiDigits[d]);
919 w = n;
920 --len;
921 }
922 if (len) {
923 *p = 0; // null terminate if room for caller convenience
924 }
925
926 len = (uint32_t)(p - buf);
927 if (*buf == kUMinus) {
928 ++buf;
929 }
930 while (--p > buf) {
931 UChar c = *p;
932 *p = *buf;
933 *buf = c;
934 ++buf;
935 }
936
937 return len;
938 }
939
940
941 U_NAMESPACE_END
942
943 /* U_HAVE_RBNF */
944 #endif
945
946