1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2013, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "utypeinfo.h" // for 'typeid' to work
9
10 #include "unicode/rbnf.h"
11
12 #if U_HAVE_RBNF
13
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
24 #include "nfrs.h"
25
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "patternprops.h"
29 #include "uresimp.h"
30
31 // debugging
32 // #define DEBUG
33
34 #ifdef DEBUG
35 #include "stdio.h"
36 #endif
37
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
39
40 static const UChar gPercentPercent[] =
41 {
42 0x25, 0x25, 0
43 }; /* "%%" */
44
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse[] =
48 {
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon = 0x003B;
52 static const UChar gSemiPercent[] =
53 {
54 0x3B, 0x25, 0
55 }; /* ";%" */
56
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60
61 U_NAMESPACE_BEGIN
62
63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
64
65 /*
66 This is a utility class. It does not use ICU's RTTI.
67 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
68 Please make sure that intltest passes on Windows in Release mode,
69 since the string pooling per compilation unit will mess up how RTTI works.
70 The RTTI code was also removed due to lack of code coverage.
71 */
72 class LocalizationInfo : public UMemory {
73 protected:
74 virtual ~LocalizationInfo();
75 uint32_t refcount;
76
77 public:
LocalizationInfo()78 LocalizationInfo() : refcount(0) {}
79
ref(void)80 LocalizationInfo* ref(void) {
81 ++refcount;
82 return this;
83 }
84
unref(void)85 LocalizationInfo* unref(void) {
86 if (refcount && --refcount == 0) {
87 delete this;
88 }
89 return NULL;
90 }
91
92 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const93 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
94
95 virtual int32_t getNumberOfRuleSets(void) const = 0;
96 virtual const UChar* getRuleSetName(int32_t index) const = 0;
97 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
98 virtual const UChar* getLocaleName(int32_t index) const = 0;
99 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
100
101 virtual int32_t indexForLocale(const UChar* locale) const;
102 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
103
104 // virtual UClassID getDynamicClassID() const = 0;
105 // static UClassID getStaticClassID(void);
106 };
107
~LocalizationInfo()108 LocalizationInfo::~LocalizationInfo() {}
109
110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
111
112 // if both strings are NULL, this returns TRUE
113 static UBool
streq(const UChar * lhs,const UChar * rhs)114 streq(const UChar* lhs, const UChar* rhs) {
115 if (rhs == lhs) {
116 return TRUE;
117 }
118 if (lhs && rhs) {
119 return u_strcmp(lhs, rhs) == 0;
120 }
121 return FALSE;
122 }
123
124 UBool
operator ==(const LocalizationInfo * rhs) const125 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
126 if (rhs) {
127 if (this == rhs) {
128 return TRUE;
129 }
130
131 int32_t rsc = getNumberOfRuleSets();
132 if (rsc == rhs->getNumberOfRuleSets()) {
133 for (int i = 0; i < rsc; ++i) {
134 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
135 return FALSE;
136 }
137 }
138 int32_t dlc = getNumberOfDisplayLocales();
139 if (dlc == rhs->getNumberOfDisplayLocales()) {
140 for (int i = 0; i < dlc; ++i) {
141 const UChar* locale = getLocaleName(i);
142 int32_t ix = rhs->indexForLocale(locale);
143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
144 if (!streq(locale, rhs->getLocaleName(ix))) {
145 return FALSE;
146 }
147 for (int j = 0; j < rsc; ++j) {
148 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
149 return FALSE;
150 }
151 }
152 }
153 return TRUE;
154 }
155 }
156 }
157 return FALSE;
158 }
159
160 int32_t
indexForLocale(const UChar * locale) const161 LocalizationInfo::indexForLocale(const UChar* locale) const {
162 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
163 if (streq(locale, getLocaleName(i))) {
164 return i;
165 }
166 }
167 return -1;
168 }
169
170 int32_t
indexForRuleSet(const UChar * ruleset) const171 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
172 if (ruleset) {
173 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
174 if (streq(ruleset, getRuleSetName(i))) {
175 return i;
176 }
177 }
178 }
179 return -1;
180 }
181
182
183 typedef void (*Fn_Deleter)(void*);
184
185 class VArray {
186 void** buf;
187 int32_t cap;
188 int32_t size;
189 Fn_Deleter deleter;
190 public:
VArray()191 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
192
VArray(Fn_Deleter del)193 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
194
~VArray()195 ~VArray() {
196 if (deleter) {
197 for (int i = 0; i < size; ++i) {
198 (*deleter)(buf[i]);
199 }
200 }
201 uprv_free(buf);
202 }
203
length()204 int32_t length() {
205 return size;
206 }
207
add(void * elem,UErrorCode & status)208 void add(void* elem, UErrorCode& status) {
209 if (U_SUCCESS(status)) {
210 if (size == cap) {
211 if (cap == 0) {
212 cap = 1;
213 } else if (cap < 256) {
214 cap *= 2;
215 } else {
216 cap += 256;
217 }
218 if (buf == NULL) {
219 buf = (void**)uprv_malloc(cap * sizeof(void*));
220 } else {
221 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
222 }
223 if (buf == NULL) {
224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
225 status = U_MEMORY_ALLOCATION_ERROR;
226 return;
227 }
228 void* start = &buf[size];
229 size_t count = (cap - size) * sizeof(void*);
230 uprv_memset(start, 0, count); // fill with nulls, just because
231 }
232 buf[size++] = elem;
233 }
234 }
235
release(void)236 void** release(void) {
237 void** result = buf;
238 buf = NULL;
239 cap = 0;
240 size = 0;
241 return result;
242 }
243 };
244
245 class LocDataParser;
246
247 class StringLocalizationInfo : public LocalizationInfo {
248 UChar* info;
249 UChar*** data;
250 int32_t numRuleSets;
251 int32_t numLocales;
252
253 friend class LocDataParser;
254
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)255 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
256 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
257 {
258 }
259
260 public:
261 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
262
263 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
265 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
267 virtual const UChar* getLocaleName(int32_t index) const;
268 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
269
270 // virtual UClassID getDynamicClassID() const;
271 // static UClassID getStaticClassID(void);
272
273 private:
274 void init(UErrorCode& status) const;
275 };
276
277
278 enum {
279 OPEN_ANGLE = 0x003c, /* '<' */
280 CLOSE_ANGLE = 0x003e, /* '>' */
281 COMMA = 0x002c,
282 TICK = 0x0027,
283 QUOTE = 0x0022,
284 SPACE = 0x0020
285 };
286
287 /**
288 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
289 */
290 class LocDataParser {
291 UChar* data;
292 const UChar* e;
293 UChar* p;
294 UChar ch;
295 UParseError& pe;
296 UErrorCode& ec;
297
298 public:
LocDataParser(UParseError & parseError,UErrorCode & status)299 LocDataParser(UParseError& parseError, UErrorCode& status)
300 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()301 ~LocDataParser() {}
302
303 /*
304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
306 */
307 StringLocalizationInfo* parse(UChar* data, int32_t len);
308
309 private:
310
inc(void)311 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)312 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)313 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)314 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const315 UBool inList(UChar c, const UChar* list) const {
316 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
317 while (*list && *list != c) ++list; return *list == c;
318 }
319 void parseError(const char* msg);
320
321 StringLocalizationInfo* doParse(void);
322
323 UChar** nextArray(int32_t& requiredLength);
324 UChar* nextString(void);
325 };
326
327 #ifdef DEBUG
328 #define ERROR(msg) parseError(msg); return NULL;
329 #else
330 #define ERROR(msg) parseError(NULL); return NULL;
331 #endif
332
333
334 static const UChar DQUOTE_STOPLIST[] = {
335 QUOTE, 0
336 };
337
338 static const UChar SQUOTE_STOPLIST[] = {
339 TICK, 0
340 };
341
342 static const UChar NOQUOTE_STOPLIST[] = {
343 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
344 };
345
346 static void
DeleteFn(void * p)347 DeleteFn(void* p) {
348 uprv_free(p);
349 }
350
351 StringLocalizationInfo*
parse(UChar * _data,int32_t len)352 LocDataParser::parse(UChar* _data, int32_t len) {
353 if (U_FAILURE(ec)) {
354 if (_data) uprv_free(_data);
355 return NULL;
356 }
357
358 pe.line = 0;
359 pe.offset = -1;
360 pe.postContext[0] = 0;
361 pe.preContext[0] = 0;
362
363 if (_data == NULL) {
364 ec = U_ILLEGAL_ARGUMENT_ERROR;
365 return NULL;
366 }
367
368 if (len <= 0) {
369 ec = U_ILLEGAL_ARGUMENT_ERROR;
370 uprv_free(_data);
371 return NULL;
372 }
373
374 data = _data;
375 e = data + len;
376 p = _data;
377 ch = 0xffff;
378
379 return doParse();
380 }
381
382
383 StringLocalizationInfo*
doParse(void)384 LocDataParser::doParse(void) {
385 skipWhitespace();
386 if (!checkInc(OPEN_ANGLE)) {
387 ERROR("Missing open angle");
388 } else {
389 VArray array(DeleteFn);
390 UBool mightHaveNext = TRUE;
391 int32_t requiredLength = -1;
392 while (mightHaveNext) {
393 mightHaveNext = FALSE;
394 UChar** elem = nextArray(requiredLength);
395 skipWhitespace();
396 UBool haveComma = check(COMMA);
397 if (elem) {
398 array.add(elem, ec);
399 if (haveComma) {
400 inc();
401 mightHaveNext = TRUE;
402 }
403 } else if (haveComma) {
404 ERROR("Unexpected character");
405 }
406 }
407
408 skipWhitespace();
409 if (!checkInc(CLOSE_ANGLE)) {
410 if (check(OPEN_ANGLE)) {
411 ERROR("Missing comma in outer array");
412 } else {
413 ERROR("Missing close angle bracket in outer array");
414 }
415 }
416
417 skipWhitespace();
418 if (p != e) {
419 ERROR("Extra text after close of localization data");
420 }
421
422 array.add(NULL, ec);
423 if (U_SUCCESS(ec)) {
424 int32_t numLocs = array.length() - 2; // subtract first, NULL
425 UChar*** result = (UChar***)array.release();
426
427 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
428 }
429 }
430
431 ERROR("Unknown error");
432 }
433
434 UChar**
nextArray(int32_t & requiredLength)435 LocDataParser::nextArray(int32_t& requiredLength) {
436 if (U_FAILURE(ec)) {
437 return NULL;
438 }
439
440 skipWhitespace();
441 if (!checkInc(OPEN_ANGLE)) {
442 ERROR("Missing open angle");
443 }
444
445 VArray array;
446 UBool mightHaveNext = TRUE;
447 while (mightHaveNext) {
448 mightHaveNext = FALSE;
449 UChar* elem = nextString();
450 skipWhitespace();
451 UBool haveComma = check(COMMA);
452 if (elem) {
453 array.add(elem, ec);
454 if (haveComma) {
455 inc();
456 mightHaveNext = TRUE;
457 }
458 } else if (haveComma) {
459 ERROR("Unexpected comma");
460 }
461 }
462 skipWhitespace();
463 if (!checkInc(CLOSE_ANGLE)) {
464 if (check(OPEN_ANGLE)) {
465 ERROR("Missing close angle bracket in inner array");
466 } else {
467 ERROR("Missing comma in inner array");
468 }
469 }
470
471 array.add(NULL, ec);
472 if (U_SUCCESS(ec)) {
473 if (requiredLength == -1) {
474 requiredLength = array.length() + 1;
475 } else if (array.length() != requiredLength) {
476 ec = U_ILLEGAL_ARGUMENT_ERROR;
477 ERROR("Array not of required length");
478 }
479
480 return (UChar**)array.release();
481 }
482 ERROR("Unknown Error");
483 }
484
485 UChar*
nextString()486 LocDataParser::nextString() {
487 UChar* result = NULL;
488
489 skipWhitespace();
490 if (p < e) {
491 const UChar* terminators;
492 UChar c = *p;
493 UBool haveQuote = c == QUOTE || c == TICK;
494 if (haveQuote) {
495 inc();
496 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
497 } else {
498 terminators = NOQUOTE_STOPLIST;
499 }
500 UChar* start = p;
501 while (p < e && !inList(*p, terminators)) ++p;
502 if (p == e) {
503 ERROR("Unexpected end of data");
504 }
505
506 UChar x = *p;
507 if (p > start) {
508 ch = x;
509 *p = 0x0; // terminate by writing to data
510 result = start; // just point into data
511 }
512 if (haveQuote) {
513 if (x != c) {
514 ERROR("Missing matching quote");
515 } else if (p == start) {
516 ERROR("Empty string");
517 }
518 inc();
519 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
520 ERROR("Unexpected character in string");
521 }
522 }
523
524 // ok for there to be no next string
525 return result;
526 }
527
528 void
parseError(const char *)529 LocDataParser::parseError(const char* /*str*/) {
530 if (!data) {
531 return;
532 }
533
534 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
535 if (start < data) {
536 start = data;
537 }
538 for (UChar* x = p; --x >= start;) {
539 if (!*x) {
540 start = x+1;
541 break;
542 }
543 }
544 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
545 if (limit > e) {
546 limit = e;
547 }
548 u_strncpy(pe.preContext, start, (int32_t)(p-start));
549 pe.preContext[p-start] = 0;
550 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
551 pe.postContext[limit-p] = 0;
552 pe.offset = (int32_t)(p - data);
553
554 #ifdef DEBUG
555 fprintf(stderr, "%s at or near character %d: ", str, p-data);
556
557 UnicodeString msg;
558 msg.append(start, p - start);
559 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
560 msg.append(p, limit-p);
561 msg.append("'");
562
563 char buf[128];
564 int32_t len = msg.extract(0, msg.length(), buf, 128);
565 if (len >= 128) {
566 buf[127] = 0;
567 } else {
568 buf[len] = 0;
569 }
570 fprintf(stderr, "%s\n", buf);
571 fflush(stderr);
572 #endif
573
574 uprv_free(data);
575 data = NULL;
576 p = NULL;
577 e = NULL;
578
579 if (U_SUCCESS(ec)) {
580 ec = U_PARSE_ERROR;
581 }
582 }
583
584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
585
586 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)587 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
588 if (U_FAILURE(status)) {
589 return NULL;
590 }
591
592 int32_t len = info.length();
593 if (len == 0) {
594 return NULL; // no error;
595 }
596
597 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
598 if (!p) {
599 status = U_MEMORY_ALLOCATION_ERROR;
600 return NULL;
601 }
602 info.extract(p, len, status);
603 if (!U_FAILURE(status)) {
604 status = U_ZERO_ERROR; // clear warning about non-termination
605 }
606
607 LocDataParser parser(perror, status);
608 return parser.parse(p, len);
609 }
610
~StringLocalizationInfo()611 StringLocalizationInfo::~StringLocalizationInfo() {
612 for (UChar*** p = (UChar***)data; *p; ++p) {
613 // remaining data is simply pointer into our unicode string data.
614 if (*p) uprv_free(*p);
615 }
616 if (data) uprv_free(data);
617 if (info) uprv_free(info);
618 }
619
620
621 const UChar*
getRuleSetName(int32_t index) const622 StringLocalizationInfo::getRuleSetName(int32_t index) const {
623 if (index >= 0 && index < getNumberOfRuleSets()) {
624 return data[0][index];
625 }
626 return NULL;
627 }
628
629 const UChar*
getLocaleName(int32_t index) const630 StringLocalizationInfo::getLocaleName(int32_t index) const {
631 if (index >= 0 && index < getNumberOfDisplayLocales()) {
632 return data[index+1][0];
633 }
634 return NULL;
635 }
636
637 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const638 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
639 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
640 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
641 return data[localeIndex+1][ruleIndex+1];
642 }
643 return NULL;
644 }
645
646 // ----------
647
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
649 const UnicodeString& locs,
650 const Locale& alocale, UParseError& perror, UErrorCode& status)
651 : ruleSets(NULL)
652 , ruleSetDescriptions(NULL)
653 , numRuleSets(0)
654 , defaultRuleSet(NULL)
655 , locale(alocale)
656 , collator(NULL)
657 , decimalFormatSymbols(NULL)
658 , lenient(FALSE)
659 , lenientParseRules(NULL)
660 , localizations(NULL)
661 {
662 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
663 init(description, locinfo, perror, status);
664 }
665
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)666 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
667 const UnicodeString& locs,
668 UParseError& perror, UErrorCode& status)
669 : ruleSets(NULL)
670 , ruleSetDescriptions(NULL)
671 , numRuleSets(0)
672 , defaultRuleSet(NULL)
673 , locale(Locale::getDefault())
674 , collator(NULL)
675 , decimalFormatSymbols(NULL)
676 , lenient(FALSE)
677 , lenientParseRules(NULL)
678 , localizations(NULL)
679 {
680 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681 init(description, locinfo, perror, status);
682 }
683
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685 LocalizationInfo* info,
686 const Locale& alocale, UParseError& perror, UErrorCode& status)
687 : ruleSets(NULL)
688 , ruleSetDescriptions(NULL)
689 , numRuleSets(0)
690 , defaultRuleSet(NULL)
691 , locale(alocale)
692 , collator(NULL)
693 , decimalFormatSymbols(NULL)
694 , lenient(FALSE)
695 , lenientParseRules(NULL)
696 , localizations(NULL)
697 {
698 init(description, info, perror, status);
699 }
700
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)701 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
702 UParseError& perror,
703 UErrorCode& status)
704 : ruleSets(NULL)
705 , ruleSetDescriptions(NULL)
706 , numRuleSets(0)
707 , defaultRuleSet(NULL)
708 , locale(Locale::getDefault())
709 , collator(NULL)
710 , decimalFormatSymbols(NULL)
711 , lenient(FALSE)
712 , lenientParseRules(NULL)
713 , localizations(NULL)
714 {
715 init(description, NULL, perror, status);
716 }
717
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)718 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
719 const Locale& aLocale,
720 UParseError& perror,
721 UErrorCode& status)
722 : ruleSets(NULL)
723 , ruleSetDescriptions(NULL)
724 , numRuleSets(0)
725 , defaultRuleSet(NULL)
726 , locale(aLocale)
727 , collator(NULL)
728 , decimalFormatSymbols(NULL)
729 , lenient(FALSE)
730 , lenientParseRules(NULL)
731 , localizations(NULL)
732 {
733 init(description, NULL, perror, status);
734 }
735
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)736 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
737 : ruleSets(NULL)
738 , ruleSetDescriptions(NULL)
739 , numRuleSets(0)
740 , defaultRuleSet(NULL)
741 , locale(alocale)
742 , collator(NULL)
743 , decimalFormatSymbols(NULL)
744 , lenient(FALSE)
745 , lenientParseRules(NULL)
746 , localizations(NULL)
747 {
748 if (U_FAILURE(status)) {
749 return;
750 }
751
752 const char* rules_tag = "RBNFRules";
753 const char* fmt_tag = "";
754 switch (tag) {
755 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
756 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
757 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
758 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
759 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
760 }
761
762 // TODO: read localization info from resource
763 LocalizationInfo* locinfo = NULL;
764
765 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
766 if (U_SUCCESS(status)) {
767 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
768 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
769
770 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
771 if (U_FAILURE(status)) {
772 ures_close(nfrb);
773 }
774 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
775 if (U_FAILURE(status)) {
776 ures_close(rbnfRules);
777 ures_close(nfrb);
778 return;
779 }
780
781 UnicodeString desc;
782 while (ures_hasNext(ruleSets)) {
783 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
784 }
785 UParseError perror;
786
787 init (desc, locinfo, perror, status);
788
789 ures_close(ruleSets);
790 ures_close(rbnfRules);
791 }
792 ures_close(nfrb);
793 }
794
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)795 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
796 : NumberFormat(rhs)
797 , ruleSets(NULL)
798 , ruleSetDescriptions(NULL)
799 , numRuleSets(0)
800 , defaultRuleSet(NULL)
801 , locale(rhs.locale)
802 , collator(NULL)
803 , decimalFormatSymbols(NULL)
804 , lenient(FALSE)
805 , lenientParseRules(NULL)
806 , localizations(NULL)
807 {
808 this->operator=(rhs);
809 }
810
811 // --------
812
813 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)814 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
815 {
816 UErrorCode status = U_ZERO_ERROR;
817 dispose();
818 locale = rhs.locale;
819 lenient = rhs.lenient;
820
821 UnicodeString rules = rhs.getRules();
822 UParseError perror;
823 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
824
825 return *this;
826 }
827
~RuleBasedNumberFormat()828 RuleBasedNumberFormat::~RuleBasedNumberFormat()
829 {
830 dispose();
831 }
832
833 Format*
clone(void) const834 RuleBasedNumberFormat::clone(void) const
835 {
836 RuleBasedNumberFormat * result = NULL;
837 UnicodeString rules = getRules();
838 UErrorCode status = U_ZERO_ERROR;
839 UParseError perror;
840 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
841 /* test for NULL */
842 if (result == 0) {
843 status = U_MEMORY_ALLOCATION_ERROR;
844 return 0;
845 }
846 if (U_FAILURE(status)) {
847 delete result;
848 result = 0;
849 } else {
850 result->lenient = lenient;
851 }
852 return result;
853 }
854
855 UBool
operator ==(const Format & other) const856 RuleBasedNumberFormat::operator==(const Format& other) const
857 {
858 if (this == &other) {
859 return TRUE;
860 }
861
862 if (typeid(*this) == typeid(other)) {
863 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
864 if (locale == rhs.locale &&
865 lenient == rhs.lenient &&
866 (localizations == NULL
867 ? rhs.localizations == NULL
868 : (rhs.localizations == NULL
869 ? FALSE
870 : *localizations == rhs.localizations))) {
871
872 NFRuleSet** p = ruleSets;
873 NFRuleSet** q = rhs.ruleSets;
874 if (p == NULL) {
875 return q == NULL;
876 } else if (q == NULL) {
877 return FALSE;
878 }
879 while (*p && *q && (**p == **q)) {
880 ++p;
881 ++q;
882 }
883 return *q == NULL && *p == NULL;
884 }
885 }
886
887 return FALSE;
888 }
889
890 UnicodeString
getRules() const891 RuleBasedNumberFormat::getRules() const
892 {
893 UnicodeString result;
894 if (ruleSets != NULL) {
895 for (NFRuleSet** p = ruleSets; *p; ++p) {
896 (*p)->appendRules(result);
897 }
898 }
899 return result;
900 }
901
902 UnicodeString
getRuleSetName(int32_t index) const903 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
904 {
905 if (localizations) {
906 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
907 return string;
908 } else if (ruleSets) {
909 UnicodeString result;
910 for (NFRuleSet** p = ruleSets; *p; ++p) {
911 NFRuleSet* rs = *p;
912 if (rs->isPublic()) {
913 if (--index == -1) {
914 rs->getName(result);
915 return result;
916 }
917 }
918 }
919 }
920 UnicodeString empty;
921 return empty;
922 }
923
924 int32_t
getNumberOfRuleSetNames() const925 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
926 {
927 int32_t result = 0;
928 if (localizations) {
929 result = localizations->getNumberOfRuleSets();
930 } else if (ruleSets) {
931 for (NFRuleSet** p = ruleSets; *p; ++p) {
932 if ((**p).isPublic()) {
933 ++result;
934 }
935 }
936 }
937 return result;
938 }
939
940 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const941 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
942 if (localizations) {
943 return localizations->getNumberOfDisplayLocales();
944 }
945 return 0;
946 }
947
948 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const949 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
950 if (U_FAILURE(status)) {
951 return Locale("");
952 }
953 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
954 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
955 char buffer[64];
956 int32_t cap = name.length() + 1;
957 char* bp = buffer;
958 if (cap > 64) {
959 bp = (char *)uprv_malloc(cap);
960 if (bp == NULL) {
961 status = U_MEMORY_ALLOCATION_ERROR;
962 return Locale("");
963 }
964 }
965 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
966 Locale retLocale(bp);
967 if (bp != buffer) {
968 uprv_free(bp);
969 }
970 return retLocale;
971 }
972 status = U_ILLEGAL_ARGUMENT_ERROR;
973 Locale retLocale;
974 return retLocale;
975 }
976
977 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)978 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
979 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
980 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
981 int32_t len = localeName.length();
982 UChar* localeStr = localeName.getBuffer(len + 1);
983 while (len >= 0) {
984 localeStr[len] = 0;
985 int32_t ix = localizations->indexForLocale(localeStr);
986 if (ix >= 0) {
987 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
988 return name;
989 }
990
991 // trim trailing portion, skipping over ommitted sections
992 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
993 while (len > 0 && localeStr[len-1] == 0x005F) --len;
994 }
995 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
996 return name;
997 }
998 UnicodeString bogus;
999 bogus.setToBogus();
1000 return bogus;
1001 }
1002
1003 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1004 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1005 if (localizations) {
1006 UnicodeString rsn(ruleSetName);
1007 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1008 return getRuleSetDisplayName(ix, localeParam);
1009 }
1010 UnicodeString bogus;
1011 bogus.setToBogus();
1012 return bogus;
1013 }
1014
1015 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1016 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1017 {
1018 if (U_SUCCESS(status) && ruleSets) {
1019 for (NFRuleSet** p = ruleSets; *p; ++p) {
1020 NFRuleSet* rs = *p;
1021 if (rs->isNamed(name)) {
1022 return rs;
1023 }
1024 }
1025 status = U_ILLEGAL_ARGUMENT_ERROR;
1026 }
1027 return NULL;
1028 }
1029
1030 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1031 RuleBasedNumberFormat::format(int32_t number,
1032 UnicodeString& toAppendTo,
1033 FieldPosition& /* pos */) const
1034 {
1035 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1036 return toAppendTo;
1037 }
1038
1039
1040 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1041 RuleBasedNumberFormat::format(int64_t number,
1042 UnicodeString& toAppendTo,
1043 FieldPosition& /* pos */) const
1044 {
1045 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1046 return toAppendTo;
1047 }
1048
1049
1050 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1051 RuleBasedNumberFormat::format(double number,
1052 UnicodeString& toAppendTo,
1053 FieldPosition& /* pos */) const
1054 {
1055 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1056 if (uprv_isNaN(number)) {
1057 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1058 if (decFmtSyms) {
1059 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1060 }
1061 } else if (defaultRuleSet) {
1062 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1063 }
1064 return toAppendTo;
1065 }
1066
1067
1068 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1069 RuleBasedNumberFormat::format(int32_t number,
1070 const UnicodeString& ruleSetName,
1071 UnicodeString& toAppendTo,
1072 FieldPosition& /* pos */,
1073 UErrorCode& status) const
1074 {
1075 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1076 if (U_SUCCESS(status)) {
1077 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1078 // throw new IllegalArgumentException("Can't use internal rule set");
1079 status = U_ILLEGAL_ARGUMENT_ERROR;
1080 } else {
1081 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1082 if (rs) {
1083 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1084 }
1085 }
1086 }
1087 return toAppendTo;
1088 }
1089
1090
1091 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1092 RuleBasedNumberFormat::format(int64_t number,
1093 const UnicodeString& ruleSetName,
1094 UnicodeString& toAppendTo,
1095 FieldPosition& /* pos */,
1096 UErrorCode& status) const
1097 {
1098 if (U_SUCCESS(status)) {
1099 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1100 // throw new IllegalArgumentException("Can't use internal rule set");
1101 status = U_ILLEGAL_ARGUMENT_ERROR;
1102 } else {
1103 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1104 if (rs) {
1105 rs->format(number, toAppendTo, toAppendTo.length());
1106 }
1107 }
1108 }
1109 return toAppendTo;
1110 }
1111
1112
1113 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1114 RuleBasedNumberFormat::format(double number,
1115 const UnicodeString& ruleSetName,
1116 UnicodeString& toAppendTo,
1117 FieldPosition& /* pos */,
1118 UErrorCode& status) const
1119 {
1120 if (U_SUCCESS(status)) {
1121 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1122 // throw new IllegalArgumentException("Can't use internal rule set");
1123 status = U_ILLEGAL_ARGUMENT_ERROR;
1124 } else {
1125 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1126 if (rs) {
1127 rs->format(number, toAppendTo, toAppendTo.length());
1128 }
1129 }
1130 }
1131 return toAppendTo;
1132 }
1133
1134 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1135 RuleBasedNumberFormat::parse(const UnicodeString& text,
1136 Formattable& result,
1137 ParsePosition& parsePosition) const
1138 {
1139 if (!ruleSets) {
1140 parsePosition.setErrorIndex(0);
1141 return;
1142 }
1143
1144 UnicodeString workingText(text, parsePosition.getIndex());
1145 ParsePosition workingPos(0);
1146
1147 ParsePosition high_pp(0);
1148 Formattable high_result;
1149
1150 for (NFRuleSet** p = ruleSets; *p; ++p) {
1151 NFRuleSet *rp = *p;
1152 if (rp->isPublic() && rp->isParseable()) {
1153 ParsePosition working_pp(0);
1154 Formattable working_result;
1155
1156 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1157 if (working_pp.getIndex() > high_pp.getIndex()) {
1158 high_pp = working_pp;
1159 high_result = working_result;
1160
1161 if (high_pp.getIndex() == workingText.length()) {
1162 break;
1163 }
1164 }
1165 }
1166 }
1167
1168 int32_t startIndex = parsePosition.getIndex();
1169 parsePosition.setIndex(startIndex + high_pp.getIndex());
1170 if (high_pp.getIndex() > 0) {
1171 parsePosition.setErrorIndex(-1);
1172 } else {
1173 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1174 parsePosition.setErrorIndex(startIndex + errorIndex);
1175 }
1176 result = high_result;
1177 if (result.getType() == Formattable::kDouble) {
1178 int32_t r = (int32_t)result.getDouble();
1179 if ((double)r == result.getDouble()) {
1180 result.setLong(r);
1181 }
1182 }
1183 }
1184
1185 #if !UCONFIG_NO_COLLATION
1186
1187 void
setLenient(UBool enabled)1188 RuleBasedNumberFormat::setLenient(UBool enabled)
1189 {
1190 lenient = enabled;
1191 if (!enabled && collator) {
1192 delete collator;
1193 collator = NULL;
1194 }
1195 }
1196
1197 #endif
1198
1199 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1200 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1201 if (U_SUCCESS(status)) {
1202 if (ruleSetName.isEmpty()) {
1203 if (localizations) {
1204 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1205 defaultRuleSet = findRuleSet(name, status);
1206 } else {
1207 initDefaultRuleSet();
1208 }
1209 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1210 status = U_ILLEGAL_ARGUMENT_ERROR;
1211 } else {
1212 NFRuleSet* result = findRuleSet(ruleSetName, status);
1213 if (result != NULL) {
1214 defaultRuleSet = result;
1215 }
1216 }
1217 }
1218 }
1219
1220 UnicodeString
getDefaultRuleSetName() const1221 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1222 UnicodeString result;
1223 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1224 defaultRuleSet->getName(result);
1225 } else {
1226 result.setToBogus();
1227 }
1228 return result;
1229 }
1230
1231 void
initDefaultRuleSet()1232 RuleBasedNumberFormat::initDefaultRuleSet()
1233 {
1234 defaultRuleSet = NULL;
1235 if (!ruleSets) {
1236 return;
1237 }
1238
1239 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1240 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1241 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1242
1243 NFRuleSet**p = &ruleSets[0];
1244 while (*p) {
1245 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1246 defaultRuleSet = *p;
1247 return;
1248 } else {
1249 ++p;
1250 }
1251 }
1252
1253 defaultRuleSet = *--p;
1254 if (!defaultRuleSet->isPublic()) {
1255 while (p != ruleSets) {
1256 if ((*--p)->isPublic()) {
1257 defaultRuleSet = *p;
1258 break;
1259 }
1260 }
1261 }
1262 }
1263
1264
1265 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1266 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1267 UParseError& pErr, UErrorCode& status)
1268 {
1269 // TODO: implement UParseError
1270 uprv_memset(&pErr, 0, sizeof(UParseError));
1271 // Note: this can leave ruleSets == NULL, so remaining code should check
1272 if (U_FAILURE(status)) {
1273 return;
1274 }
1275
1276 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1277
1278 UnicodeString description(rules);
1279 if (!description.length()) {
1280 status = U_MEMORY_ALLOCATION_ERROR;
1281 return;
1282 }
1283
1284 // start by stripping the trailing whitespace from all the rules
1285 // (this is all the whitespace follwing each semicolon in the
1286 // description). This allows us to look for rule-set boundaries
1287 // by searching for ";%" without having to worry about whitespace
1288 // between the ; and the %
1289 stripWhitespace(description);
1290
1291 // check to see if there's a set of lenient-parse rules. If there
1292 // is, pull them out into our temporary holding place for them,
1293 // and delete them from the description before the real desciption-
1294 // parsing code sees them
1295 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1296 if (lp != -1) {
1297 // we've got to make sure we're not in the middle of a rule
1298 // (where "%%lenient-parse" would actually get treated as
1299 // rule text)
1300 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1301 // locate the beginning and end of the actual collation
1302 // rules (there may be whitespace between the name and
1303 // the first token in the description)
1304 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1305
1306 if (lpEnd == -1) {
1307 lpEnd = description.length() - 1;
1308 }
1309 int lpStart = lp + u_strlen(gLenientParse);
1310 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1311 ++lpStart;
1312 }
1313
1314 // copy out the lenient-parse rules and delete them
1315 // from the description
1316 lenientParseRules = new UnicodeString();
1317 /* test for NULL */
1318 if (lenientParseRules == 0) {
1319 status = U_MEMORY_ALLOCATION_ERROR;
1320 return;
1321 }
1322 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1323
1324 description.remove(lp, lpEnd + 1 - lp);
1325 }
1326 }
1327
1328 // pre-flight parsing the description and count the number of
1329 // rule sets (";%" marks the end of one rule set and the beginning
1330 // of the next)
1331 numRuleSets = 0;
1332 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1333 ++numRuleSets;
1334 ++p;
1335 }
1336 ++numRuleSets;
1337
1338 // our rule list is an array of the appropriate size
1339 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1340 /* test for NULL */
1341 if (ruleSets == 0) {
1342 status = U_MEMORY_ALLOCATION_ERROR;
1343 return;
1344 }
1345
1346 for (int i = 0; i <= numRuleSets; ++i) {
1347 ruleSets[i] = NULL;
1348 }
1349
1350 // divide up the descriptions into individual rule-set descriptions
1351 // and store them in a temporary array. At each step, we also
1352 // new up a rule set, but all this does is initialize its name
1353 // and remove it from its description. We can't actually parse
1354 // the rest of the descriptions and finish initializing everything
1355 // because we have to know the names and locations of all the rule
1356 // sets before we can actually set everything up
1357 if(!numRuleSets) {
1358 status = U_ILLEGAL_ARGUMENT_ERROR;
1359 return;
1360 }
1361
1362 ruleSetDescriptions = new UnicodeString[numRuleSets];
1363 if (ruleSetDescriptions == 0) {
1364 status = U_MEMORY_ALLOCATION_ERROR;
1365 return;
1366 }
1367
1368 {
1369 int curRuleSet = 0;
1370 int32_t start = 0;
1371 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1372 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1373 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1374 if (ruleSets[curRuleSet] == 0) {
1375 status = U_MEMORY_ALLOCATION_ERROR;
1376 return;
1377 }
1378 ++curRuleSet;
1379 start = p + 1;
1380 }
1381 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1382 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1383 if (ruleSets[curRuleSet] == 0) {
1384 status = U_MEMORY_ALLOCATION_ERROR;
1385 return;
1386 }
1387 }
1388
1389 // now we can take note of the formatter's default rule set, which
1390 // is the last public rule set in the description (it's the last
1391 // rather than the first so that a user can create a new formatter
1392 // from an existing formatter and change its default behavior just
1393 // by appending more rule sets to the end)
1394
1395 // {dlf} Initialization of a fraction rule set requires the default rule
1396 // set to be known. For purposes of initialization, this is always the
1397 // last public rule set, no matter what the localization data says.
1398 initDefaultRuleSet();
1399
1400 // finally, we can go back through the temporary descriptions
1401 // list and finish seting up the substructure (and we throw
1402 // away the temporary descriptions as we go)
1403 {
1404 for (int i = 0; i < numRuleSets; i++) {
1405 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1406 }
1407 }
1408
1409 // Now that the rules are initialized, the 'real' default rule
1410 // set can be adjusted by the localization data.
1411
1412 // The C code keeps the localization array as is, rather than building
1413 // a separate array of the public rule set names, so we have less work
1414 // to do here-- but we still need to check the names.
1415
1416 if (localizationInfos) {
1417 // confirm the names, if any aren't in the rules, that's an error
1418 // it is ok if the rules contain public rule sets that are not in this list
1419 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1420 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1421 NFRuleSet* rs = findRuleSet(name, status);
1422 if (rs == NULL) {
1423 break; // error
1424 }
1425 if (i == 0) {
1426 defaultRuleSet = rs;
1427 }
1428 }
1429 } else {
1430 defaultRuleSet = getDefaultRuleSet();
1431 }
1432 }
1433
1434 void
stripWhitespace(UnicodeString & description)1435 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1436 {
1437 // iterate through the characters...
1438 UnicodeString result;
1439
1440 int start = 0;
1441 while (start != -1 && start < description.length()) {
1442 // seek to the first non-whitespace character...
1443 while (start < description.length()
1444 && PatternProps::isWhiteSpace(description.charAt(start))) {
1445 ++start;
1446 }
1447
1448 // locate the next semicolon in the text and copy the text from
1449 // our current position up to that semicolon into the result
1450 int32_t p = description.indexOf(gSemiColon, start);
1451 if (p == -1) {
1452 // or if we don't find a semicolon, just copy the rest of
1453 // the string into the result
1454 result.append(description, start, description.length() - start);
1455 start = -1;
1456 }
1457 else if (p < description.length()) {
1458 result.append(description, start, p + 1 - start);
1459 start = p + 1;
1460 }
1461
1462 // when we get here, we've seeked off the end of the sring, and
1463 // we terminate the loop (we continue until *start* is -1 rather
1464 // than until *p* is -1, because otherwise we'd miss the last
1465 // rule in the description)
1466 else {
1467 start = -1;
1468 }
1469 }
1470
1471 description.setTo(result);
1472 }
1473
1474
1475 void
dispose()1476 RuleBasedNumberFormat::dispose()
1477 {
1478 if (ruleSets) {
1479 for (NFRuleSet** p = ruleSets; *p; ++p) {
1480 delete *p;
1481 }
1482 uprv_free(ruleSets);
1483 ruleSets = NULL;
1484 }
1485
1486 if (ruleSetDescriptions) {
1487 delete [] ruleSetDescriptions;
1488 }
1489
1490 #if !UCONFIG_NO_COLLATION
1491 delete collator;
1492 #endif
1493 collator = NULL;
1494
1495 delete decimalFormatSymbols;
1496 decimalFormatSymbols = NULL;
1497
1498 delete lenientParseRules;
1499 lenientParseRules = NULL;
1500
1501 if (localizations) localizations = localizations->unref();
1502 }
1503
1504
1505 //-----------------------------------------------------------------------
1506 // package-internal API
1507 //-----------------------------------------------------------------------
1508
1509 /**
1510 * Returns the collator to use for lenient parsing. The collator is lazily created:
1511 * this function creates it the first time it's called.
1512 * @return The collator to use for lenient parsing, or null if lenient parsing
1513 * is turned off.
1514 */
1515 Collator*
getCollator() const1516 RuleBasedNumberFormat::getCollator() const
1517 {
1518 #if !UCONFIG_NO_COLLATION
1519 if (!ruleSets) {
1520 return NULL;
1521 }
1522
1523 // lazy-evaulate the collator
1524 if (collator == NULL && lenient) {
1525 // create a default collator based on the formatter's locale,
1526 // then pull out that collator's rules, append any additional
1527 // rules specified in the description, and create a _new_
1528 // collator based on the combinaiton of those rules
1529
1530 UErrorCode status = U_ZERO_ERROR;
1531
1532 Collator* temp = Collator::createInstance(locale, status);
1533 RuleBasedCollator* newCollator;
1534 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1535 if (lenientParseRules) {
1536 UnicodeString rules(newCollator->getRules());
1537 rules.append(*lenientParseRules);
1538
1539 newCollator = new RuleBasedCollator(rules, status);
1540 // Exit if newCollator could not be created.
1541 if (newCollator == NULL) {
1542 return NULL;
1543 }
1544 } else {
1545 temp = NULL;
1546 }
1547 if (U_SUCCESS(status)) {
1548 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1549 // cast away const
1550 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1551 } else {
1552 delete newCollator;
1553 }
1554 }
1555 delete temp;
1556 }
1557 #endif
1558
1559 // if lenient-parse mode is off, this will be null
1560 // (see setLenientParseMode())
1561 return collator;
1562 }
1563
1564
1565 /**
1566 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1567 * instances owned by this formatter. This object is lazily created: this function
1568 * creates it the first time it's called.
1569 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1570 * instances owned by this formatter.
1571 */
1572 DecimalFormatSymbols*
getDecimalFormatSymbols() const1573 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1574 {
1575 // lazy-evaluate the DecimalFormatSymbols object. This object
1576 // is shared by all DecimalFormat instances belonging to this
1577 // formatter
1578 if (decimalFormatSymbols == NULL) {
1579 UErrorCode status = U_ZERO_ERROR;
1580 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1581 if (U_SUCCESS(status)) {
1582 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1583 } else {
1584 delete temp;
1585 }
1586 }
1587 return decimalFormatSymbols;
1588 }
1589
1590 // De-owning the current localized symbols and adopt the new symbols.
1591 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1592 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1593 {
1594 if (symbolsToAdopt == NULL) {
1595 return; // do not allow caller to set decimalFormatSymbols to NULL
1596 }
1597
1598 if (decimalFormatSymbols != NULL) {
1599 delete decimalFormatSymbols;
1600 }
1601
1602 decimalFormatSymbols = symbolsToAdopt;
1603
1604 {
1605 // Apply the new decimalFormatSymbols by reparsing the rulesets
1606 UErrorCode status = U_ZERO_ERROR;
1607
1608 for (int32_t i = 0; i < numRuleSets; i++) {
1609 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1610 }
1611 }
1612 }
1613
1614 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1615 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1616 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1617 {
1618 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1619 }
1620
1621 U_NAMESPACE_END
1622
1623 /* U_HAVE_RBNF */
1624 #endif
1625