1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 * file name: nfrs.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * Modification history
14 * Date Name Comments
15 * 10/11/2001 Doug Ported from ICU4J
16 */
17
18 #include "nfrs.h"
19
20 #if U_HAVE_RBNF
21
22 #include "unicode/uchar.h"
23 #include "nfrule.h"
24 #include "nfrlist.h"
25 #include "patternprops.h"
26
27 #ifdef RBNF_DEBUG
28 #include "cmemory.h"
29 #endif
30
31 enum {
32 /** -x */
33 NEGATIVE_RULE_INDEX = 0,
34 /** x.x */
35 IMPROPER_FRACTION_RULE_INDEX = 1,
36 /** 0.x */
37 PROPER_FRACTION_RULE_INDEX = 2,
38 /** x.0 */
39 MASTER_RULE_INDEX = 3,
40 /** Inf */
41 INFINITY_RULE_INDEX = 4,
42 /** NaN */
43 NAN_RULE_INDEX = 5,
44 NON_NUMERICAL_RULE_LENGTH = 6
45 };
46
47 U_NAMESPACE_BEGIN
48
49 #if 0
50 // euclid's algorithm works with doubles
51 // note, doubles only get us up to one quadrillion or so, which
52 // isn't as much range as we get with longs. We probably still
53 // want either 64-bit math, or BigInteger.
54
55 static int64_t
56 util_lcm(int64_t x, int64_t y)
57 {
58 x.abs();
59 y.abs();
60
61 if (x == 0 || y == 0) {
62 return 0;
63 } else {
64 do {
65 if (x < y) {
66 int64_t t = x; x = y; y = t;
67 }
68 x -= y * (x/y);
69 } while (x != 0);
70
71 return y;
72 }
73 }
74
75 #else
76 /**
77 * Calculates the least common multiple of x and y.
78 */
79 static int64_t
util_lcm(int64_t x,int64_t y)80 util_lcm(int64_t x, int64_t y)
81 {
82 // binary gcd algorithm from Knuth, "The Art of Computer Programming,"
83 // vol. 2, 1st ed., pp. 298-299
84 int64_t x1 = x;
85 int64_t y1 = y;
86
87 int p2 = 0;
88 while ((x1 & 1) == 0 && (y1 & 1) == 0) {
89 ++p2;
90 x1 >>= 1;
91 y1 >>= 1;
92 }
93
94 int64_t t;
95 if ((x1 & 1) == 1) {
96 t = -y1;
97 } else {
98 t = x1;
99 }
100
101 while (t != 0) {
102 while ((t & 1) == 0) {
103 t = t >> 1;
104 }
105 if (t > 0) {
106 x1 = t;
107 } else {
108 y1 = -t;
109 }
110 t = x1 - y1;
111 }
112
113 int64_t gcd = x1 << p2;
114
115 // x * y == gcd(x, y) * lcm(x, y)
116 return x / gcd * y;
117 }
118 #endif
119
120 static const UChar gPercent = 0x0025;
121 static const UChar gColon = 0x003a;
122 static const UChar gSemicolon = 0x003b;
123 static const UChar gLineFeed = 0x000a;
124
125 static const UChar gPercentPercent[] =
126 {
127 0x25, 0x25, 0
128 }; /* "%%" */
129
130 static const UChar gNoparse[] =
131 {
132 0x40, 0x6E, 0x6F, 0x70, 0x61, 0x72, 0x73, 0x65, 0
133 }; /* "@noparse" */
134
NFRuleSet(RuleBasedNumberFormat * _owner,UnicodeString * descriptions,int32_t index,UErrorCode & status)135 NFRuleSet::NFRuleSet(RuleBasedNumberFormat *_owner, UnicodeString* descriptions, int32_t index, UErrorCode& status)
136 : name()
137 , rules(0)
138 , owner(_owner)
139 , fractionRules()
140 , fIsFractionRuleSet(FALSE)
141 , fIsPublic(FALSE)
142 , fIsParseable(TRUE)
143 {
144 for (int32_t i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
145 nonNumericalRules[i] = NULL;
146 }
147
148 if (U_FAILURE(status)) {
149 return;
150 }
151
152 UnicodeString& description = descriptions[index]; // !!! make sure index is valid
153
154 if (description.length() == 0) {
155 // throw new IllegalArgumentException("Empty rule set description");
156 status = U_PARSE_ERROR;
157 return;
158 }
159
160 // if the description begins with a rule set name (the rule set
161 // name can be omitted in formatter descriptions that consist
162 // of only one rule set), copy it out into our "name" member
163 // and delete it from the description
164 if (description.charAt(0) == gPercent) {
165 int32_t pos = description.indexOf(gColon);
166 if (pos == -1) {
167 // throw new IllegalArgumentException("Rule set name doesn't end in colon");
168 status = U_PARSE_ERROR;
169 } else {
170 name.setTo(description, 0, pos);
171 while (pos < description.length() && PatternProps::isWhiteSpace(description.charAt(++pos))) {
172 }
173 description.remove(0, pos);
174 }
175 } else {
176 name.setTo(UNICODE_STRING_SIMPLE("%default"));
177 }
178
179 if (description.length() == 0) {
180 // throw new IllegalArgumentException("Empty rule set description");
181 status = U_PARSE_ERROR;
182 }
183
184 fIsPublic = name.indexOf(gPercentPercent, 2, 0) != 0;
185
186 if ( name.endsWith(gNoparse,8) ) {
187 fIsParseable = FALSE;
188 name.truncate(name.length()-8); // remove the @noparse from the name
189 }
190
191 // all of the other members of NFRuleSet are initialized
192 // by parseRules()
193 }
194
195 void
parseRules(UnicodeString & description,UErrorCode & status)196 NFRuleSet::parseRules(UnicodeString& description, UErrorCode& status)
197 {
198 // start by creating a Vector whose elements are Strings containing
199 // the descriptions of the rules (one rule per element). The rules
200 // are separated by semicolons (there's no escape facility: ALL
201 // semicolons are rule delimiters)
202
203 if (U_FAILURE(status)) {
204 return;
205 }
206
207 // ensure we are starting with an empty rule list
208 rules.deleteAll();
209
210 // dlf - the original code kept a separate description array for no reason,
211 // so I got rid of it. The loop was too complex so I simplified it.
212
213 UnicodeString currentDescription;
214 int32_t oldP = 0;
215 while (oldP < description.length()) {
216 int32_t p = description.indexOf(gSemicolon, oldP);
217 if (p == -1) {
218 p = description.length();
219 }
220 currentDescription.setTo(description, oldP, p - oldP);
221 NFRule::makeRules(currentDescription, this, rules.last(), owner, rules, status);
222 oldP = p + 1;
223 }
224
225 // for rules that didn't specify a base value, their base values
226 // were initialized to 0. Make another pass through the list and
227 // set all those rules' base values. We also remove any special
228 // rules from the list and put them into their own member variables
229 int64_t defaultBaseValue = 0;
230
231 // (this isn't a for loop because we might be deleting items from
232 // the vector-- we want to make sure we only increment i when
233 // we _didn't_ delete aything from the vector)
234 int32_t rulesSize = rules.size();
235 for (int32_t i = 0; i < rulesSize; i++) {
236 NFRule* rule = rules[i];
237 int64_t baseValue = rule->getBaseValue();
238
239 if (baseValue == 0) {
240 // if the rule's base value is 0, fill in a default
241 // base value (this will be 1 plus the preceding
242 // rule's base value for regular rule sets, and the
243 // same as the preceding rule's base value in fraction
244 // rule sets)
245 rule->setBaseValue(defaultBaseValue, status);
246 }
247 else {
248 // if it's a regular rule that already knows its base value,
249 // check to make sure the rules are in order, and update
250 // the default base value for the next rule
251 if (baseValue < defaultBaseValue) {
252 // throw new IllegalArgumentException("Rules are not in order");
253 status = U_PARSE_ERROR;
254 return;
255 }
256 defaultBaseValue = baseValue;
257 }
258 if (!fIsFractionRuleSet) {
259 ++defaultBaseValue;
260 }
261 }
262 }
263
264 /**
265 * Set one of the non-numerical rules.
266 * @param rule The rule to set.
267 */
setNonNumericalRule(NFRule * rule)268 void NFRuleSet::setNonNumericalRule(NFRule *rule) {
269 int64_t baseValue = rule->getBaseValue();
270 if (baseValue == NFRule::kNegativeNumberRule) {
271 delete nonNumericalRules[NEGATIVE_RULE_INDEX];
272 nonNumericalRules[NEGATIVE_RULE_INDEX] = rule;
273 }
274 else if (baseValue == NFRule::kImproperFractionRule) {
275 setBestFractionRule(IMPROPER_FRACTION_RULE_INDEX, rule, TRUE);
276 }
277 else if (baseValue == NFRule::kProperFractionRule) {
278 setBestFractionRule(PROPER_FRACTION_RULE_INDEX, rule, TRUE);
279 }
280 else if (baseValue == NFRule::kMasterRule) {
281 setBestFractionRule(MASTER_RULE_INDEX, rule, TRUE);
282 }
283 else if (baseValue == NFRule::kInfinityRule) {
284 delete nonNumericalRules[INFINITY_RULE_INDEX];
285 nonNumericalRules[INFINITY_RULE_INDEX] = rule;
286 }
287 else if (baseValue == NFRule::kNaNRule) {
288 delete nonNumericalRules[NAN_RULE_INDEX];
289 nonNumericalRules[NAN_RULE_INDEX] = rule;
290 }
291 }
292
293 /**
294 * Determine the best fraction rule to use. Rules matching the decimal point from
295 * DecimalFormatSymbols become the main set of rules to use.
296 * @param originalIndex The index into nonNumericalRules
297 * @param newRule The new rule to consider
298 * @param rememberRule Should the new rule be added to fractionRules.
299 */
setBestFractionRule(int32_t originalIndex,NFRule * newRule,UBool rememberRule)300 void NFRuleSet::setBestFractionRule(int32_t originalIndex, NFRule *newRule, UBool rememberRule) {
301 if (rememberRule) {
302 fractionRules.add(newRule);
303 }
304 NFRule *bestResult = nonNumericalRules[originalIndex];
305 if (bestResult == NULL) {
306 nonNumericalRules[originalIndex] = newRule;
307 }
308 else {
309 // We have more than one. Which one is better?
310 const DecimalFormatSymbols *decimalFormatSymbols = owner->getDecimalFormatSymbols();
311 if (decimalFormatSymbols->getSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol).charAt(0)
312 == newRule->getDecimalPoint())
313 {
314 nonNumericalRules[originalIndex] = newRule;
315 }
316 // else leave it alone
317 }
318 }
319
~NFRuleSet()320 NFRuleSet::~NFRuleSet()
321 {
322 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
323 if (i != IMPROPER_FRACTION_RULE_INDEX
324 && i != PROPER_FRACTION_RULE_INDEX
325 && i != MASTER_RULE_INDEX)
326 {
327 delete nonNumericalRules[i];
328 }
329 // else it will be deleted via NFRuleList fractionRules
330 }
331 }
332
333 static UBool
util_equalRules(const NFRule * rule1,const NFRule * rule2)334 util_equalRules(const NFRule* rule1, const NFRule* rule2)
335 {
336 if (rule1) {
337 if (rule2) {
338 return *rule1 == *rule2;
339 }
340 } else if (!rule2) {
341 return TRUE;
342 }
343 return FALSE;
344 }
345
346 UBool
operator ==(const NFRuleSet & rhs) const347 NFRuleSet::operator==(const NFRuleSet& rhs) const
348 {
349 if (rules.size() == rhs.rules.size() &&
350 fIsFractionRuleSet == rhs.fIsFractionRuleSet &&
351 name == rhs.name) {
352
353 // ...then compare the non-numerical rule lists...
354 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
355 if (!util_equalRules(nonNumericalRules[i], rhs.nonNumericalRules[i])) {
356 return FALSE;
357 }
358 }
359
360 // ...then compare the rule lists...
361 for (uint32_t i = 0; i < rules.size(); ++i) {
362 if (*rules[i] != *rhs.rules[i]) {
363 return FALSE;
364 }
365 }
366 return TRUE;
367 }
368 return FALSE;
369 }
370
371 void
setDecimalFormatSymbols(const DecimalFormatSymbols & newSymbols,UErrorCode & status)372 NFRuleSet::setDecimalFormatSymbols(const DecimalFormatSymbols &newSymbols, UErrorCode& status) {
373 for (uint32_t i = 0; i < rules.size(); ++i) {
374 rules[i]->setDecimalFormatSymbols(newSymbols, status);
375 }
376 // Switch the fraction rules to mirror the DecimalFormatSymbols.
377 for (int32_t nonNumericalIdx = IMPROPER_FRACTION_RULE_INDEX; nonNumericalIdx <= MASTER_RULE_INDEX; nonNumericalIdx++) {
378 if (nonNumericalRules[nonNumericalIdx]) {
379 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
380 NFRule *fractionRule = fractionRules[fIdx];
381 if (nonNumericalRules[nonNumericalIdx]->getBaseValue() == fractionRule->getBaseValue()) {
382 setBestFractionRule(nonNumericalIdx, fractionRule, FALSE);
383 }
384 }
385 }
386 }
387
388 for (uint32_t nnrIdx = 0; nnrIdx < NON_NUMERICAL_RULE_LENGTH; nnrIdx++) {
389 NFRule *rule = nonNumericalRules[nnrIdx];
390 if (rule) {
391 rule->setDecimalFormatSymbols(newSymbols, status);
392 }
393 }
394 }
395
396 #define RECURSION_LIMIT 64
397
398 void
format(int64_t number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const399 NFRuleSet::format(int64_t number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
400 {
401 if (recursionCount >= RECURSION_LIMIT) {
402 // stop recursion
403 status = U_INVALID_STATE_ERROR;
404 return;
405 }
406 const NFRule *rule = findNormalRule(number);
407 if (rule) { // else error, but can't report it
408 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
409 }
410 }
411
412 void
format(double number,UnicodeString & toAppendTo,int32_t pos,int32_t recursionCount,UErrorCode & status) const413 NFRuleSet::format(double number, UnicodeString& toAppendTo, int32_t pos, int32_t recursionCount, UErrorCode& status) const
414 {
415 if (recursionCount >= RECURSION_LIMIT) {
416 // stop recursion
417 status = U_INVALID_STATE_ERROR;
418 return;
419 }
420 const NFRule *rule = findDoubleRule(number);
421 if (rule) { // else error, but can't report it
422 rule->doFormat(number, toAppendTo, pos, ++recursionCount, status);
423 }
424 }
425
426 const NFRule*
findDoubleRule(double number) const427 NFRuleSet::findDoubleRule(double number) const
428 {
429 // if this is a fraction rule set, use findFractionRuleSetRule()
430 if (isFractionRuleSet()) {
431 return findFractionRuleSetRule(number);
432 }
433
434 if (uprv_isNaN(number)) {
435 const NFRule *rule = nonNumericalRules[NAN_RULE_INDEX];
436 if (!rule) {
437 rule = owner->getDefaultNaNRule();
438 }
439 return rule;
440 }
441
442 // if the number is negative, return the negative number rule
443 // (if there isn't a negative-number rule, we pretend it's a
444 // positive number)
445 if (number < 0) {
446 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
447 return nonNumericalRules[NEGATIVE_RULE_INDEX];
448 } else {
449 number = -number;
450 }
451 }
452
453 if (uprv_isInfinite(number)) {
454 const NFRule *rule = nonNumericalRules[INFINITY_RULE_INDEX];
455 if (!rule) {
456 rule = owner->getDefaultInfinityRule();
457 }
458 return rule;
459 }
460
461 // if the number isn't an integer, we use one of the fraction rules...
462 if (number != uprv_floor(number)) {
463 // if the number is between 0 and 1, return the proper
464 // fraction rule
465 if (number < 1 && nonNumericalRules[PROPER_FRACTION_RULE_INDEX]) {
466 return nonNumericalRules[PROPER_FRACTION_RULE_INDEX];
467 }
468 // otherwise, return the improper fraction rule
469 else if (nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX]) {
470 return nonNumericalRules[IMPROPER_FRACTION_RULE_INDEX];
471 }
472 }
473
474 // if there's a master rule, use it to format the number
475 if (nonNumericalRules[MASTER_RULE_INDEX]) {
476 return nonNumericalRules[MASTER_RULE_INDEX];
477 }
478
479 // and if we haven't yet returned a rule, use findNormalRule()
480 // to find the applicable rule
481 int64_t r = util64_fromDouble(number + 0.5);
482 return findNormalRule(r);
483 }
484
485 const NFRule *
findNormalRule(int64_t number) const486 NFRuleSet::findNormalRule(int64_t number) const
487 {
488 // if this is a fraction rule set, use findFractionRuleSetRule()
489 // to find the rule (we should only go into this clause if the
490 // value is 0)
491 if (fIsFractionRuleSet) {
492 return findFractionRuleSetRule((double)number);
493 }
494
495 // if the number is negative, return the negative-number rule
496 // (if there isn't one, pretend the number is positive)
497 if (number < 0) {
498 if (nonNumericalRules[NEGATIVE_RULE_INDEX]) {
499 return nonNumericalRules[NEGATIVE_RULE_INDEX];
500 } else {
501 number = -number;
502 }
503 }
504
505 // we have to repeat the preceding two checks, even though we
506 // do them in findRule(), because the version of format() that
507 // takes a long bypasses findRule() and goes straight to this
508 // function. This function does skip the fraction rules since
509 // we know the value is an integer (it also skips the master
510 // rule, since it's considered a fraction rule. Skipping the
511 // master rule in this function is also how we avoid infinite
512 // recursion)
513
514 // {dlf} unfortunately this fails if there are no rules except
515 // special rules. If there are no rules, use the master rule.
516
517 // binary-search the rule list for the applicable rule
518 // (a rule is used for all values from its base value to
519 // the next rule's base value)
520 int32_t hi = rules.size();
521 if (hi > 0) {
522 int32_t lo = 0;
523
524 while (lo < hi) {
525 int32_t mid = (lo + hi) / 2;
526 if (rules[mid]->getBaseValue() == number) {
527 return rules[mid];
528 }
529 else if (rules[mid]->getBaseValue() > number) {
530 hi = mid;
531 }
532 else {
533 lo = mid + 1;
534 }
535 }
536 if (hi == 0) { // bad rule set, minimum base > 0
537 return NULL; // want to throw exception here
538 }
539
540 NFRule *result = rules[hi - 1];
541
542 // use shouldRollBack() to see whether we need to invoke the
543 // rollback rule (see shouldRollBack()'s documentation for
544 // an explanation of the rollback rule). If we do, roll back
545 // one rule and return that one instead of the one we'd normally
546 // return
547 if (result->shouldRollBack((double)number)) {
548 if (hi == 1) { // bad rule set, no prior rule to rollback to from this base
549 return NULL;
550 }
551 result = rules[hi - 2];
552 }
553 return result;
554 }
555 // else use the master rule
556 return nonNumericalRules[MASTER_RULE_INDEX];
557 }
558
559 /**
560 * If this rule is a fraction rule set, this function is used by
561 * findRule() to select the most appropriate rule for formatting
562 * the number. Basically, the base value of each rule in the rule
563 * set is treated as the denominator of a fraction. Whichever
564 * denominator can produce the fraction closest in value to the
565 * number passed in is the result. If there's a tie, the earlier
566 * one in the list wins. (If there are two rules in a row with the
567 * same base value, the first one is used when the numerator of the
568 * fraction would be 1, and the second rule is used the rest of the
569 * time.
570 * @param number The number being formatted (which will always be
571 * a number between 0 and 1)
572 * @return The rule to use to format this number
573 */
574 const NFRule*
findFractionRuleSetRule(double number) const575 NFRuleSet::findFractionRuleSetRule(double number) const
576 {
577 // the obvious way to do this (multiply the value being formatted
578 // by each rule's base value until you get an integral result)
579 // doesn't work because of rounding error. This method is more
580 // accurate
581
582 // find the least common multiple of the rules' base values
583 // and multiply this by the number being formatted. This is
584 // all the precision we need, and we can do all of the rest
585 // of the math using integer arithmetic
586 int64_t leastCommonMultiple = rules[0]->getBaseValue();
587 int64_t numerator;
588 {
589 for (uint32_t i = 1; i < rules.size(); ++i) {
590 leastCommonMultiple = util_lcm(leastCommonMultiple, rules[i]->getBaseValue());
591 }
592 numerator = util64_fromDouble(number * (double)leastCommonMultiple + 0.5);
593 }
594 // for each rule, do the following...
595 int64_t tempDifference;
596 int64_t difference = util64_fromDouble(uprv_maxMantissa());
597 int32_t winner = 0;
598 for (uint32_t i = 0; i < rules.size(); ++i) {
599 // "numerator" is the numerator of the fraction if the
600 // denominator is the LCD. The numerator if the rule's
601 // base value is the denominator is "numerator" times the
602 // base value divided bythe LCD. Here we check to see if
603 // that's an integer, and if not, how close it is to being
604 // an integer.
605 tempDifference = numerator * rules[i]->getBaseValue() % leastCommonMultiple;
606
607
608 // normalize the result of the above calculation: we want
609 // the numerator's distance from the CLOSEST multiple
610 // of the LCD
611 if (leastCommonMultiple - tempDifference < tempDifference) {
612 tempDifference = leastCommonMultiple - tempDifference;
613 }
614
615 // if this is as close as we've come, keep track of how close
616 // that is, and the line number of the rule that did it. If
617 // we've scored a direct hit, we don't have to look at any more
618 // rules
619 if (tempDifference < difference) {
620 difference = tempDifference;
621 winner = i;
622 if (difference == 0) {
623 break;
624 }
625 }
626 }
627
628 // if we have two successive rules that both have the winning base
629 // value, then the first one (the one we found above) is used if
630 // the numerator of the fraction is 1 and the second one is used if
631 // the numerator of the fraction is anything else (this lets us
632 // do things like "one third"/"two thirds" without haveing to define
633 // a whole bunch of extra rule sets)
634 if ((unsigned)(winner + 1) < rules.size() &&
635 rules[winner + 1]->getBaseValue() == rules[winner]->getBaseValue()) {
636 double n = ((double)rules[winner]->getBaseValue()) * number;
637 if (n < 0.5 || n >= 2) {
638 ++winner;
639 }
640 }
641
642 // finally, return the winning rule
643 return rules[winner];
644 }
645
646 /**
647 * Parses a string. Matches the string to be parsed against each
648 * of its rules (with a base value less than upperBound) and returns
649 * the value produced by the rule that matched the most charcters
650 * in the source string.
651 * @param text The string to parse
652 * @param parsePosition The initial position is ignored and assumed
653 * to be 0. On exit, this object has been updated to point to the
654 * first character position this rule set didn't consume.
655 * @param upperBound Limits the rules that can be allowed to match.
656 * Only rules whose base values are strictly less than upperBound
657 * are considered.
658 * @return The numerical result of parsing this string. This will
659 * be the matching rule's base value, composed appropriately with
660 * the results of matching any of its substitutions. The object
661 * will be an instance of Long if it's an integral value; otherwise,
662 * it will be an instance of Double. This function always returns
663 * a valid object: If nothing matched the input string at all,
664 * this function returns new Long(0), and the parse position is
665 * left unchanged.
666 */
667 #ifdef RBNF_DEBUG
668 #include <stdio.h>
669
dumpUS(FILE * f,const UnicodeString & us)670 static void dumpUS(FILE* f, const UnicodeString& us) {
671 int len = us.length();
672 char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1];
673 if (buf != NULL) {
674 us.extract(0, len, buf);
675 buf[len] = 0;
676 fprintf(f, "%s", buf);
677 uprv_free(buf); //delete[] buf;
678 }
679 }
680 #endif
681
682 UBool
parse(const UnicodeString & text,ParsePosition & pos,double upperBound,Formattable & result) const683 NFRuleSet::parse(const UnicodeString& text, ParsePosition& pos, double upperBound, Formattable& result) const
684 {
685 // try matching each rule in the rule set against the text being
686 // parsed. Whichever one matches the most characters is the one
687 // that determines the value we return.
688
689 result.setLong(0);
690
691 // dump out if there's no text to parse
692 if (text.length() == 0) {
693 return 0;
694 }
695
696 ParsePosition highWaterMark;
697 ParsePosition workingPos = pos;
698
699 #ifdef RBNF_DEBUG
700 fprintf(stderr, "<nfrs> %x '", this);
701 dumpUS(stderr, name);
702 fprintf(stderr, "' text '");
703 dumpUS(stderr, text);
704 fprintf(stderr, "'\n");
705 fprintf(stderr, " parse negative: %d\n", this, negativeNumberRule != 0);
706 #endif
707 // Try each of the negative rules, fraction rules, infinity rules and NaN rules
708 for (int i = 0; i < NON_NUMERICAL_RULE_LENGTH; i++) {
709 if (nonNumericalRules[i]) {
710 Formattable tempResult;
711 UBool success = nonNumericalRules[i]->doParse(text, workingPos, 0, upperBound, tempResult);
712 if (success && (workingPos.getIndex() > highWaterMark.getIndex())) {
713 result = tempResult;
714 highWaterMark = workingPos;
715 }
716 workingPos = pos;
717 }
718 }
719 #ifdef RBNF_DEBUG
720 fprintf(stderr, "<nfrs> continue other with text '");
721 dumpUS(stderr, text);
722 fprintf(stderr, "' hwm: %d\n", highWaterMark.getIndex());
723 #endif
724
725 // finally, go through the regular rules one at a time. We start
726 // at the end of the list because we want to try matching the most
727 // sigificant rule first (this helps ensure that we parse
728 // "five thousand three hundred six" as
729 // "(five thousand) (three hundred) (six)" rather than
730 // "((five thousand three) hundred) (six)"). Skip rules whose
731 // base values are higher than the upper bound (again, this helps
732 // limit ambiguity by making sure the rules that match a rule's
733 // are less significant than the rule containing the substitutions)/
734 {
735 int64_t ub = util64_fromDouble(upperBound);
736 #ifdef RBNF_DEBUG
737 {
738 char ubstr[64];
739 util64_toa(ub, ubstr, 64);
740 char ubstrhex[64];
741 util64_toa(ub, ubstrhex, 64, 16);
742 fprintf(stderr, "ub: %g, i64: %s (%s)\n", upperBound, ubstr, ubstrhex);
743 }
744 #endif
745 for (int32_t i = rules.size(); --i >= 0 && highWaterMark.getIndex() < text.length();) {
746 if ((!fIsFractionRuleSet) && (rules[i]->getBaseValue() >= ub)) {
747 continue;
748 }
749 Formattable tempResult;
750 UBool success = rules[i]->doParse(text, workingPos, fIsFractionRuleSet, upperBound, tempResult);
751 if (success && workingPos.getIndex() > highWaterMark.getIndex()) {
752 result = tempResult;
753 highWaterMark = workingPos;
754 }
755 workingPos = pos;
756 }
757 }
758 #ifdef RBNF_DEBUG
759 fprintf(stderr, "<nfrs> exit\n");
760 #endif
761 // finally, update the parse postion we were passed to point to the
762 // first character we didn't use, and return the result that
763 // corresponds to that string of characters
764 pos = highWaterMark;
765
766 return 1;
767 }
768
769 void
appendRules(UnicodeString & result) const770 NFRuleSet::appendRules(UnicodeString& result) const
771 {
772 uint32_t i;
773
774 // the rule set name goes first...
775 result.append(name);
776 result.append(gColon);
777 result.append(gLineFeed);
778
779 // followed by the regular rules...
780 for (i = 0; i < rules.size(); i++) {
781 rules[i]->_appendRuleText(result);
782 result.append(gLineFeed);
783 }
784
785 // followed by the special rules (if they exist)
786 for (i = 0; i < NON_NUMERICAL_RULE_LENGTH; ++i) {
787 NFRule *rule = nonNumericalRules[i];
788 if (nonNumericalRules[i]) {
789 if (rule->getBaseValue() == NFRule::kImproperFractionRule
790 || rule->getBaseValue() == NFRule::kProperFractionRule
791 || rule->getBaseValue() == NFRule::kMasterRule)
792 {
793 for (uint32_t fIdx = 0; fIdx < fractionRules.size(); fIdx++) {
794 NFRule *fractionRule = fractionRules[fIdx];
795 if (fractionRule->getBaseValue() == rule->getBaseValue()) {
796 fractionRule->_appendRuleText(result);
797 result.append(gLineFeed);
798 }
799 }
800 }
801 else {
802 rule->_appendRuleText(result);
803 result.append(gLineFeed);
804 }
805 }
806 }
807 }
808
809 // utility functions
810
util64_fromDouble(double d)811 int64_t util64_fromDouble(double d) {
812 int64_t result = 0;
813 if (!uprv_isNaN(d)) {
814 double mant = uprv_maxMantissa();
815 if (d < -mant) {
816 d = -mant;
817 } else if (d > mant) {
818 d = mant;
819 }
820 UBool neg = d < 0;
821 if (neg) {
822 d = -d;
823 }
824 result = (int64_t)uprv_floor(d);
825 if (neg) {
826 result = -result;
827 }
828 }
829 return result;
830 }
831
util64_pow(int32_t r,uint32_t e)832 int64_t util64_pow(int32_t r, uint32_t e) {
833 if (r == 0) {
834 return 0;
835 } else if (e == 0) {
836 return 1;
837 } else {
838 int64_t n = r;
839 while (--e > 0) {
840 n *= r;
841 }
842 return n;
843 }
844 }
845
846 static const uint8_t asciiDigits[] = {
847 0x30u, 0x31u, 0x32u, 0x33u, 0x34u, 0x35u, 0x36u, 0x37u,
848 0x38u, 0x39u, 0x61u, 0x62u, 0x63u, 0x64u, 0x65u, 0x66u,
849 0x67u, 0x68u, 0x69u, 0x6au, 0x6bu, 0x6cu, 0x6du, 0x6eu,
850 0x6fu, 0x70u, 0x71u, 0x72u, 0x73u, 0x74u, 0x75u, 0x76u,
851 0x77u, 0x78u, 0x79u, 0x7au,
852 };
853
854 static const UChar kUMinus = (UChar)0x002d;
855
856 #ifdef RBNF_DEBUG
857 static const char kMinus = '-';
858
859 static const uint8_t digitInfo[] = {
860 0, 0, 0, 0, 0, 0, 0, 0,
861 0, 0, 0, 0, 0, 0, 0, 0,
862 0, 0, 0, 0, 0, 0, 0, 0,
863 0, 0, 0, 0, 0, 0, 0, 0,
864 0, 0, 0, 0, 0, 0, 0, 0,
865 0, 0, 0, 0, 0, 0, 0, 0,
866 0x80u, 0x81u, 0x82u, 0x83u, 0x84u, 0x85u, 0x86u, 0x87u,
867 0x88u, 0x89u, 0, 0, 0, 0, 0, 0,
868 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
869 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
870 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
871 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
872 0, 0x8au, 0x8bu, 0x8cu, 0x8du, 0x8eu, 0x8fu, 0x90u,
873 0x91u, 0x92u, 0x93u, 0x94u, 0x95u, 0x96u, 0x97u, 0x98u,
874 0x99u, 0x9au, 0x9bu, 0x9cu, 0x9du, 0x9eu, 0x9fu, 0xa0u,
875 0xa1u, 0xa2u, 0xa3u, 0, 0, 0, 0, 0,
876 };
877
util64_atoi(const char * str,uint32_t radix)878 int64_t util64_atoi(const char* str, uint32_t radix)
879 {
880 if (radix > 36) {
881 radix = 36;
882 } else if (radix < 2) {
883 radix = 2;
884 }
885 int64_t lradix = radix;
886
887 int neg = 0;
888 if (*str == kMinus) {
889 ++str;
890 neg = 1;
891 }
892 int64_t result = 0;
893 uint8_t b;
894 while ((b = digitInfo[*str++]) && ((b &= 0x7f) < radix)) {
895 result *= lradix;
896 result += (int32_t)b;
897 }
898 if (neg) {
899 result = -result;
900 }
901 return result;
902 }
903
util64_utoi(const UChar * str,uint32_t radix)904 int64_t util64_utoi(const UChar* str, uint32_t radix)
905 {
906 if (radix > 36) {
907 radix = 36;
908 } else if (radix < 2) {
909 radix = 2;
910 }
911 int64_t lradix = radix;
912
913 int neg = 0;
914 if (*str == kUMinus) {
915 ++str;
916 neg = 1;
917 }
918 int64_t result = 0;
919 UChar c;
920 uint8_t b;
921 while (((c = *str++) < 0x0080) && (b = digitInfo[c]) && ((b &= 0x7f) < radix)) {
922 result *= lradix;
923 result += (int32_t)b;
924 }
925 if (neg) {
926 result = -result;
927 }
928 return result;
929 }
930
util64_toa(int64_t w,char * buf,uint32_t len,uint32_t radix,UBool raw)931 uint32_t util64_toa(int64_t w, char* buf, uint32_t len, uint32_t radix, UBool raw)
932 {
933 if (radix > 36) {
934 radix = 36;
935 } else if (radix < 2) {
936 radix = 2;
937 }
938 int64_t base = radix;
939
940 char* p = buf;
941 if (len && (w < 0) && (radix == 10) && !raw) {
942 w = -w;
943 *p++ = kMinus;
944 --len;
945 } else if (len && (w == 0)) {
946 *p++ = (char)raw ? 0 : asciiDigits[0];
947 --len;
948 }
949
950 while (len && w != 0) {
951 int64_t n = w / base;
952 int64_t m = n * base;
953 int32_t d = (int32_t)(w-m);
954 *p++ = raw ? (char)d : asciiDigits[d];
955 w = n;
956 --len;
957 }
958 if (len) {
959 *p = 0; // null terminate if room for caller convenience
960 }
961
962 len = p - buf;
963 if (*buf == kMinus) {
964 ++buf;
965 }
966 while (--p > buf) {
967 char c = *p;
968 *p = *buf;
969 *buf = c;
970 ++buf;
971 }
972
973 return len;
974 }
975 #endif
976
util64_tou(int64_t w,UChar * buf,uint32_t len,uint32_t radix,UBool raw)977 uint32_t util64_tou(int64_t w, UChar* buf, uint32_t len, uint32_t radix, UBool raw)
978 {
979 if (radix > 36) {
980 radix = 36;
981 } else if (radix < 2) {
982 radix = 2;
983 }
984 int64_t base = radix;
985
986 UChar* p = buf;
987 if (len && (w < 0) && (radix == 10) && !raw) {
988 w = -w;
989 *p++ = kUMinus;
990 --len;
991 } else if (len && (w == 0)) {
992 *p++ = (UChar)raw ? 0 : asciiDigits[0];
993 --len;
994 }
995
996 while (len && (w != 0)) {
997 int64_t n = w / base;
998 int64_t m = n * base;
999 int32_t d = (int32_t)(w-m);
1000 *p++ = (UChar)(raw ? d : asciiDigits[d]);
1001 w = n;
1002 --len;
1003 }
1004 if (len) {
1005 *p = 0; // null terminate if room for caller convenience
1006 }
1007
1008 len = (uint32_t)(p - buf);
1009 if (*buf == kUMinus) {
1010 ++buf;
1011 }
1012 while (--p > buf) {
1013 UChar c = *p;
1014 *p = *buf;
1015 *buf = c;
1016 ++buf;
1017 }
1018
1019 return len;
1020 }
1021
1022
1023 U_NAMESPACE_END
1024
1025 /* U_HAVE_RBNF */
1026 #endif
1027
1028