1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2012, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "utypeinfo.h" // for 'typeid' to work
9
10 #include "unicode/rbnf.h"
11
12 #if U_HAVE_RBNF
13
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
24 #include "nfrs.h"
25
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "patternprops.h"
29 #include "uresimp.h"
30
31 // debugging
32 // #define DEBUG
33
34 #ifdef DEBUG
35 #include "stdio.h"
36 #endif
37
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
39
40 static const UChar gPercentPercent[] =
41 {
42 0x25, 0x25, 0
43 }; /* "%%" */
44
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse[] =
48 {
49 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon = 0x003B;
52 static const UChar gSemiPercent[] =
53 {
54 0x3B, 0x25, 0
55 }; /* ";%" */
56
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60
61 U_NAMESPACE_BEGIN
62
63 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
64
65 /*
66 This is a utility class. It does not use ICU's RTTI.
67 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
68 Please make sure that intltest passes on Windows in Release mode,
69 since the string pooling per compilation unit will mess up how RTTI works.
70 The RTTI code was also removed due to lack of code coverage.
71 */
72 class LocalizationInfo : public UMemory {
73 protected:
74 virtual ~LocalizationInfo();
75 uint32_t refcount;
76
77 public:
LocalizationInfo()78 LocalizationInfo() : refcount(0) {}
79
ref(void)80 LocalizationInfo* ref(void) {
81 ++refcount;
82 return this;
83 }
84
unref(void)85 LocalizationInfo* unref(void) {
86 if (refcount && --refcount == 0) {
87 delete this;
88 }
89 return NULL;
90 }
91
92 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const93 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
94
95 virtual int32_t getNumberOfRuleSets(void) const = 0;
96 virtual const UChar* getRuleSetName(int32_t index) const = 0;
97 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
98 virtual const UChar* getLocaleName(int32_t index) const = 0;
99 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
100
101 virtual int32_t indexForLocale(const UChar* locale) const;
102 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
103
104 // virtual UClassID getDynamicClassID() const = 0;
105 // static UClassID getStaticClassID(void);
106 };
107
~LocalizationInfo()108 LocalizationInfo::~LocalizationInfo() {}
109
110 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
111
112 // if both strings are NULL, this returns TRUE
113 static UBool
streq(const UChar * lhs,const UChar * rhs)114 streq(const UChar* lhs, const UChar* rhs) {
115 if (rhs == lhs) {
116 return TRUE;
117 }
118 if (lhs && rhs) {
119 return u_strcmp(lhs, rhs) == 0;
120 }
121 return FALSE;
122 }
123
124 UBool
operator ==(const LocalizationInfo * rhs) const125 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
126 if (rhs) {
127 if (this == rhs) {
128 return TRUE;
129 }
130
131 int32_t rsc = getNumberOfRuleSets();
132 if (rsc == rhs->getNumberOfRuleSets()) {
133 for (int i = 0; i < rsc; ++i) {
134 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
135 return FALSE;
136 }
137 }
138 int32_t dlc = getNumberOfDisplayLocales();
139 if (dlc == rhs->getNumberOfDisplayLocales()) {
140 for (int i = 0; i < dlc; ++i) {
141 const UChar* locale = getLocaleName(i);
142 int32_t ix = rhs->indexForLocale(locale);
143 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
144 if (!streq(locale, rhs->getLocaleName(ix))) {
145 return FALSE;
146 }
147 for (int j = 0; j < rsc; ++j) {
148 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
149 return FALSE;
150 }
151 }
152 }
153 return TRUE;
154 }
155 }
156 }
157 return FALSE;
158 }
159
160 int32_t
indexForLocale(const UChar * locale) const161 LocalizationInfo::indexForLocale(const UChar* locale) const {
162 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
163 if (streq(locale, getLocaleName(i))) {
164 return i;
165 }
166 }
167 return -1;
168 }
169
170 int32_t
indexForRuleSet(const UChar * ruleset) const171 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
172 if (ruleset) {
173 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
174 if (streq(ruleset, getRuleSetName(i))) {
175 return i;
176 }
177 }
178 }
179 return -1;
180 }
181
182
183 typedef void (*Fn_Deleter)(void*);
184
185 class VArray {
186 void** buf;
187 int32_t cap;
188 int32_t size;
189 Fn_Deleter deleter;
190 public:
VArray()191 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
192
VArray(Fn_Deleter del)193 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
194
~VArray()195 ~VArray() {
196 if (deleter) {
197 for (int i = 0; i < size; ++i) {
198 (*deleter)(buf[i]);
199 }
200 }
201 uprv_free(buf);
202 }
203
length()204 int32_t length() {
205 return size;
206 }
207
add(void * elem,UErrorCode & status)208 void add(void* elem, UErrorCode& status) {
209 if (U_SUCCESS(status)) {
210 if (size == cap) {
211 if (cap == 0) {
212 cap = 1;
213 } else if (cap < 256) {
214 cap *= 2;
215 } else {
216 cap += 256;
217 }
218 if (buf == NULL) {
219 buf = (void**)uprv_malloc(cap * sizeof(void*));
220 } else {
221 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
222 }
223 if (buf == NULL) {
224 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
225 status = U_MEMORY_ALLOCATION_ERROR;
226 return;
227 }
228 void* start = &buf[size];
229 size_t count = (cap - size) * sizeof(void*);
230 uprv_memset(start, 0, count); // fill with nulls, just because
231 }
232 buf[size++] = elem;
233 }
234 }
235
release(void)236 void** release(void) {
237 void** result = buf;
238 buf = NULL;
239 cap = 0;
240 size = 0;
241 return result;
242 }
243 };
244
245 class LocDataParser;
246
247 class StringLocalizationInfo : public LocalizationInfo {
248 UChar* info;
249 UChar*** data;
250 int32_t numRuleSets;
251 int32_t numLocales;
252
253 friend class LocDataParser;
254
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)255 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
256 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
257 {
258 }
259
260 public:
261 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
262
263 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const264 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
265 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const266 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
267 virtual const UChar* getLocaleName(int32_t index) const;
268 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
269
270 // virtual UClassID getDynamicClassID() const;
271 // static UClassID getStaticClassID(void);
272
273 private:
274 void init(UErrorCode& status) const;
275 };
276
277
278 enum {
279 OPEN_ANGLE = 0x003c, /* '<' */
280 CLOSE_ANGLE = 0x003e, /* '>' */
281 COMMA = 0x002c,
282 TICK = 0x0027,
283 QUOTE = 0x0022,
284 SPACE = 0x0020
285 };
286
287 /**
288 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
289 */
290 class LocDataParser {
291 UChar* data;
292 const UChar* e;
293 UChar* p;
294 UChar ch;
295 UParseError& pe;
296 UErrorCode& ec;
297
298 public:
LocDataParser(UParseError & parseError,UErrorCode & status)299 LocDataParser(UParseError& parseError, UErrorCode& status)
300 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()301 ~LocDataParser() {}
302
303 /*
304 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
305 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
306 */
307 StringLocalizationInfo* parse(UChar* data, int32_t len);
308
309 private:
310
inc(void)311 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)312 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)313 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)314 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const315 UBool inList(UChar c, const UChar* list) const {
316 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
317 while (*list && *list != c) ++list; return *list == c;
318 }
319 void parseError(const char* msg);
320
321 StringLocalizationInfo* doParse(void);
322
323 UChar** nextArray(int32_t& requiredLength);
324 UChar* nextString(void);
325 };
326
327 #ifdef DEBUG
328 #define ERROR(msg) parseError(msg); return NULL;
329 #else
330 #define ERROR(msg) parseError(NULL); return NULL;
331 #endif
332
333
334 static const UChar DQUOTE_STOPLIST[] = {
335 QUOTE, 0
336 };
337
338 static const UChar SQUOTE_STOPLIST[] = {
339 TICK, 0
340 };
341
342 static const UChar NOQUOTE_STOPLIST[] = {
343 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
344 };
345
346 static void
DeleteFn(void * p)347 DeleteFn(void* p) {
348 uprv_free(p);
349 }
350
351 StringLocalizationInfo*
parse(UChar * _data,int32_t len)352 LocDataParser::parse(UChar* _data, int32_t len) {
353 if (U_FAILURE(ec)) {
354 if (_data) uprv_free(_data);
355 return NULL;
356 }
357
358 pe.line = 0;
359 pe.offset = -1;
360 pe.postContext[0] = 0;
361 pe.preContext[0] = 0;
362
363 if (_data == NULL) {
364 ec = U_ILLEGAL_ARGUMENT_ERROR;
365 return NULL;
366 }
367
368 if (len <= 0) {
369 ec = U_ILLEGAL_ARGUMENT_ERROR;
370 uprv_free(_data);
371 return NULL;
372 }
373
374 data = _data;
375 e = data + len;
376 p = _data;
377 ch = 0xffff;
378
379 return doParse();
380 }
381
382
383 StringLocalizationInfo*
doParse(void)384 LocDataParser::doParse(void) {
385 skipWhitespace();
386 if (!checkInc(OPEN_ANGLE)) {
387 ERROR("Missing open angle");
388 } else {
389 VArray array(DeleteFn);
390 UBool mightHaveNext = TRUE;
391 int32_t requiredLength = -1;
392 while (mightHaveNext) {
393 mightHaveNext = FALSE;
394 UChar** elem = nextArray(requiredLength);
395 skipWhitespace();
396 UBool haveComma = check(COMMA);
397 if (elem) {
398 array.add(elem, ec);
399 if (haveComma) {
400 inc();
401 mightHaveNext = TRUE;
402 }
403 } else if (haveComma) {
404 ERROR("Unexpected character");
405 }
406 }
407
408 skipWhitespace();
409 if (!checkInc(CLOSE_ANGLE)) {
410 if (check(OPEN_ANGLE)) {
411 ERROR("Missing comma in outer array");
412 } else {
413 ERROR("Missing close angle bracket in outer array");
414 }
415 }
416
417 skipWhitespace();
418 if (p != e) {
419 ERROR("Extra text after close of localization data");
420 }
421
422 array.add(NULL, ec);
423 if (U_SUCCESS(ec)) {
424 int32_t numLocs = array.length() - 2; // subtract first, NULL
425 UChar*** result = (UChar***)array.release();
426
427 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
428 }
429 }
430
431 ERROR("Unknown error");
432 }
433
434 UChar**
nextArray(int32_t & requiredLength)435 LocDataParser::nextArray(int32_t& requiredLength) {
436 if (U_FAILURE(ec)) {
437 return NULL;
438 }
439
440 skipWhitespace();
441 if (!checkInc(OPEN_ANGLE)) {
442 ERROR("Missing open angle");
443 }
444
445 VArray array;
446 UBool mightHaveNext = TRUE;
447 while (mightHaveNext) {
448 mightHaveNext = FALSE;
449 UChar* elem = nextString();
450 skipWhitespace();
451 UBool haveComma = check(COMMA);
452 if (elem) {
453 array.add(elem, ec);
454 if (haveComma) {
455 inc();
456 mightHaveNext = TRUE;
457 }
458 } else if (haveComma) {
459 ERROR("Unexpected comma");
460 }
461 }
462 skipWhitespace();
463 if (!checkInc(CLOSE_ANGLE)) {
464 if (check(OPEN_ANGLE)) {
465 ERROR("Missing close angle bracket in inner array");
466 } else {
467 ERROR("Missing comma in inner array");
468 }
469 }
470
471 array.add(NULL, ec);
472 if (U_SUCCESS(ec)) {
473 if (requiredLength == -1) {
474 requiredLength = array.length() + 1;
475 } else if (array.length() != requiredLength) {
476 ec = U_ILLEGAL_ARGUMENT_ERROR;
477 ERROR("Array not of required length");
478 }
479
480 return (UChar**)array.release();
481 }
482 ERROR("Unknown Error");
483 }
484
485 UChar*
nextString()486 LocDataParser::nextString() {
487 UChar* result = NULL;
488
489 skipWhitespace();
490 if (p < e) {
491 const UChar* terminators;
492 UChar c = *p;
493 UBool haveQuote = c == QUOTE || c == TICK;
494 if (haveQuote) {
495 inc();
496 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
497 } else {
498 terminators = NOQUOTE_STOPLIST;
499 }
500 UChar* start = p;
501 while (p < e && !inList(*p, terminators)) ++p;
502 if (p == e) {
503 ERROR("Unexpected end of data");
504 }
505
506 UChar x = *p;
507 if (p > start) {
508 ch = x;
509 *p = 0x0; // terminate by writing to data
510 result = start; // just point into data
511 }
512 if (haveQuote) {
513 if (x != c) {
514 ERROR("Missing matching quote");
515 } else if (p == start) {
516 ERROR("Empty string");
517 }
518 inc();
519 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
520 ERROR("Unexpected character in string");
521 }
522 }
523
524 // ok for there to be no next string
525 return result;
526 }
527
528 void
parseError(const char *)529 LocDataParser::parseError(const char* /*str*/) {
530 if (!data) {
531 return;
532 }
533
534 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
535 if (start < data) {
536 start = data;
537 }
538 for (UChar* x = p; --x >= start;) {
539 if (!*x) {
540 start = x+1;
541 break;
542 }
543 }
544 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
545 if (limit > e) {
546 limit = e;
547 }
548 u_strncpy(pe.preContext, start, (int32_t)(p-start));
549 pe.preContext[p-start] = 0;
550 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
551 pe.postContext[limit-p] = 0;
552 pe.offset = (int32_t)(p - data);
553
554 #ifdef DEBUG
555 fprintf(stderr, "%s at or near character %d: ", str, p-data);
556
557 UnicodeString msg;
558 msg.append(start, p - start);
559 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
560 msg.append(p, limit-p);
561 msg.append("'");
562
563 char buf[128];
564 int32_t len = msg.extract(0, msg.length(), buf, 128);
565 if (len >= 128) {
566 buf[127] = 0;
567 } else {
568 buf[len] = 0;
569 }
570 fprintf(stderr, "%s\n", buf);
571 fflush(stderr);
572 #endif
573
574 uprv_free(data);
575 data = NULL;
576 p = NULL;
577 e = NULL;
578
579 if (U_SUCCESS(ec)) {
580 ec = U_PARSE_ERROR;
581 }
582 }
583
584 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
585
586 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)587 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
588 if (U_FAILURE(status)) {
589 return NULL;
590 }
591
592 int32_t len = info.length();
593 if (len == 0) {
594 return NULL; // no error;
595 }
596
597 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
598 if (!p) {
599 status = U_MEMORY_ALLOCATION_ERROR;
600 return NULL;
601 }
602 info.extract(p, len, status);
603 if (!U_FAILURE(status)) {
604 status = U_ZERO_ERROR; // clear warning about non-termination
605 }
606
607 LocDataParser parser(perror, status);
608 return parser.parse(p, len);
609 }
610
~StringLocalizationInfo()611 StringLocalizationInfo::~StringLocalizationInfo() {
612 for (UChar*** p = (UChar***)data; *p; ++p) {
613 // remaining data is simply pointer into our unicode string data.
614 if (*p) uprv_free(*p);
615 }
616 if (data) uprv_free(data);
617 if (info) uprv_free(info);
618 }
619
620
621 const UChar*
getRuleSetName(int32_t index) const622 StringLocalizationInfo::getRuleSetName(int32_t index) const {
623 if (index >= 0 && index < getNumberOfRuleSets()) {
624 return data[0][index];
625 }
626 return NULL;
627 }
628
629 const UChar*
getLocaleName(int32_t index) const630 StringLocalizationInfo::getLocaleName(int32_t index) const {
631 if (index >= 0 && index < getNumberOfDisplayLocales()) {
632 return data[index+1][0];
633 }
634 return NULL;
635 }
636
637 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const638 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
639 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
640 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
641 return data[localeIndex+1][ruleIndex+1];
642 }
643 return NULL;
644 }
645
646 // ----------
647
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)648 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
649 const UnicodeString& locs,
650 const Locale& alocale, UParseError& perror, UErrorCode& status)
651 : ruleSets(NULL)
652 , ruleSetDescriptions(NULL)
653 , numRuleSets(0)
654 , defaultRuleSet(NULL)
655 , locale(alocale)
656 , collator(NULL)
657 , decimalFormatSymbols(NULL)
658 , lenient(FALSE)
659 , lenientParseRules(NULL)
660 , localizations(NULL)
661 {
662 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
663 init(description, locinfo, perror, status);
664 }
665
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)666 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
667 const UnicodeString& locs,
668 UParseError& perror, UErrorCode& status)
669 : ruleSets(NULL)
670 , ruleSetDescriptions(NULL)
671 , numRuleSets(0)
672 , defaultRuleSet(NULL)
673 , locale(Locale::getDefault())
674 , collator(NULL)
675 , decimalFormatSymbols(NULL)
676 , lenient(FALSE)
677 , lenientParseRules(NULL)
678 , localizations(NULL)
679 {
680 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681 init(description, locinfo, perror, status);
682 }
683
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685 LocalizationInfo* info,
686 const Locale& alocale, UParseError& perror, UErrorCode& status)
687 : ruleSets(NULL)
688 , ruleSetDescriptions(NULL)
689 , numRuleSets(0)
690 , defaultRuleSet(NULL)
691 , locale(alocale)
692 , collator(NULL)
693 , decimalFormatSymbols(NULL)
694 , lenient(FALSE)
695 , lenientParseRules(NULL)
696 , localizations(NULL)
697 {
698 init(description, info, perror, status);
699 }
700
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)701 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
702 UParseError& perror,
703 UErrorCode& status)
704 : ruleSets(NULL)
705 , ruleSetDescriptions(NULL)
706 , numRuleSets(0)
707 , defaultRuleSet(NULL)
708 , locale(Locale::getDefault())
709 , collator(NULL)
710 , decimalFormatSymbols(NULL)
711 , lenient(FALSE)
712 , lenientParseRules(NULL)
713 , localizations(NULL)
714 {
715 init(description, NULL, perror, status);
716 }
717
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)718 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
719 const Locale& aLocale,
720 UParseError& perror,
721 UErrorCode& status)
722 : ruleSets(NULL)
723 , ruleSetDescriptions(NULL)
724 , numRuleSets(0)
725 , defaultRuleSet(NULL)
726 , locale(aLocale)
727 , collator(NULL)
728 , decimalFormatSymbols(NULL)
729 , lenient(FALSE)
730 , lenientParseRules(NULL)
731 , localizations(NULL)
732 {
733 init(description, NULL, perror, status);
734 }
735
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)736 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
737 : ruleSets(NULL)
738 , ruleSetDescriptions(NULL)
739 , numRuleSets(0)
740 , defaultRuleSet(NULL)
741 , locale(alocale)
742 , collator(NULL)
743 , decimalFormatSymbols(NULL)
744 , lenient(FALSE)
745 , lenientParseRules(NULL)
746 , localizations(NULL)
747 {
748 if (U_FAILURE(status)) {
749 return;
750 }
751
752 const char* rules_tag = "RBNFRules";
753 const char* fmt_tag = "";
754 switch (tag) {
755 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
756 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
757 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
758 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
759 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
760 }
761
762 // TODO: read localization info from resource
763 LocalizationInfo* locinfo = NULL;
764
765 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
766 if (U_SUCCESS(status)) {
767 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
768 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
769
770 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
771 if (U_FAILURE(status)) {
772 ures_close(nfrb);
773 }
774 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
775 if (U_FAILURE(status)) {
776 ures_close(rbnfRules);
777 ures_close(nfrb);
778 return;
779 }
780
781 UnicodeString desc;
782 while (ures_hasNext(ruleSets)) {
783 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
784 }
785 UParseError perror;
786
787 init (desc, locinfo, perror, status);
788
789 ures_close(ruleSets);
790 ures_close(rbnfRules);
791 }
792 ures_close(nfrb);
793 }
794
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)795 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
796 : NumberFormat(rhs)
797 , ruleSets(NULL)
798 , ruleSetDescriptions(NULL)
799 , numRuleSets(0)
800 , defaultRuleSet(NULL)
801 , locale(rhs.locale)
802 , collator(NULL)
803 , decimalFormatSymbols(NULL)
804 , lenient(FALSE)
805 , lenientParseRules(NULL)
806 , localizations(NULL)
807 {
808 this->operator=(rhs);
809 }
810
811 // --------
812
813 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)814 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
815 {
816 UErrorCode status = U_ZERO_ERROR;
817 dispose();
818 locale = rhs.locale;
819 lenient = rhs.lenient;
820
821 UnicodeString rules = rhs.getRules();
822 UParseError perror;
823 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
824
825 return *this;
826 }
827
~RuleBasedNumberFormat()828 RuleBasedNumberFormat::~RuleBasedNumberFormat()
829 {
830 dispose();
831 }
832
833 Format*
clone(void) const834 RuleBasedNumberFormat::clone(void) const
835 {
836 RuleBasedNumberFormat * result = NULL;
837 UnicodeString rules = getRules();
838 UErrorCode status = U_ZERO_ERROR;
839 UParseError perror;
840 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
841 /* test for NULL */
842 if (result == 0) {
843 status = U_MEMORY_ALLOCATION_ERROR;
844 return 0;
845 }
846 if (U_FAILURE(status)) {
847 delete result;
848 result = 0;
849 } else {
850 result->lenient = lenient;
851 }
852 return result;
853 }
854
855 UBool
operator ==(const Format & other) const856 RuleBasedNumberFormat::operator==(const Format& other) const
857 {
858 if (this == &other) {
859 return TRUE;
860 }
861
862 if (typeid(*this) == typeid(other)) {
863 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
864 if (locale == rhs.locale &&
865 lenient == rhs.lenient &&
866 (localizations == NULL
867 ? rhs.localizations == NULL
868 : (rhs.localizations == NULL
869 ? FALSE
870 : *localizations == rhs.localizations))) {
871
872 NFRuleSet** p = ruleSets;
873 NFRuleSet** q = rhs.ruleSets;
874 if (p == NULL) {
875 return q == NULL;
876 } else if (q == NULL) {
877 return FALSE;
878 }
879 while (*p && *q && (**p == **q)) {
880 ++p;
881 ++q;
882 }
883 return *q == NULL && *p == NULL;
884 }
885 }
886
887 return FALSE;
888 }
889
890 UnicodeString
getRules() const891 RuleBasedNumberFormat::getRules() const
892 {
893 UnicodeString result;
894 if (ruleSets != NULL) {
895 for (NFRuleSet** p = ruleSets; *p; ++p) {
896 (*p)->appendRules(result);
897 }
898 }
899 return result;
900 }
901
902 UnicodeString
getRuleSetName(int32_t index) const903 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
904 {
905 if (localizations) {
906 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
907 return string;
908 } else if (ruleSets) {
909 UnicodeString result;
910 for (NFRuleSet** p = ruleSets; *p; ++p) {
911 NFRuleSet* rs = *p;
912 if (rs->isPublic()) {
913 if (--index == -1) {
914 rs->getName(result);
915 return result;
916 }
917 }
918 }
919 }
920 UnicodeString empty;
921 return empty;
922 }
923
924 int32_t
getNumberOfRuleSetNames() const925 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
926 {
927 int32_t result = 0;
928 if (localizations) {
929 result = localizations->getNumberOfRuleSets();
930 } else if (ruleSets) {
931 for (NFRuleSet** p = ruleSets; *p; ++p) {
932 if ((**p).isPublic()) {
933 ++result;
934 }
935 }
936 }
937 return result;
938 }
939
940 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const941 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
942 if (localizations) {
943 return localizations->getNumberOfDisplayLocales();
944 }
945 return 0;
946 }
947
948 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const949 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
950 if (U_FAILURE(status)) {
951 return Locale("");
952 }
953 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
954 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
955 char buffer[64];
956 int32_t cap = name.length() + 1;
957 char* bp = buffer;
958 if (cap > 64) {
959 bp = (char *)uprv_malloc(cap);
960 if (bp == NULL) {
961 status = U_MEMORY_ALLOCATION_ERROR;
962 return Locale("");
963 }
964 }
965 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
966 Locale retLocale(bp);
967 if (bp != buffer) {
968 uprv_free(bp);
969 }
970 return retLocale;
971 }
972 status = U_ILLEGAL_ARGUMENT_ERROR;
973 Locale retLocale;
974 return retLocale;
975 }
976
977 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)978 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
979 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
980 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
981 int32_t len = localeName.length();
982 UChar* localeStr = localeName.getBuffer(len + 1);
983 while (len >= 0) {
984 localeStr[len] = 0;
985 int32_t ix = localizations->indexForLocale(localeStr);
986 if (ix >= 0) {
987 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
988 return name;
989 }
990
991 // trim trailing portion, skipping over ommitted sections
992 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
993 while (len > 0 && localeStr[len-1] == 0x005F) --len;
994 }
995 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
996 return name;
997 }
998 UnicodeString bogus;
999 bogus.setToBogus();
1000 return bogus;
1001 }
1002
1003 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1004 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1005 if (localizations) {
1006 UnicodeString rsn(ruleSetName);
1007 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1008 return getRuleSetDisplayName(ix, localeParam);
1009 }
1010 UnicodeString bogus;
1011 bogus.setToBogus();
1012 return bogus;
1013 }
1014
1015 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1016 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1017 {
1018 if (U_SUCCESS(status) && ruleSets) {
1019 for (NFRuleSet** p = ruleSets; *p; ++p) {
1020 NFRuleSet* rs = *p;
1021 if (rs->isNamed(name)) {
1022 return rs;
1023 }
1024 }
1025 status = U_ILLEGAL_ARGUMENT_ERROR;
1026 }
1027 return NULL;
1028 }
1029
1030 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1031 RuleBasedNumberFormat::format(int32_t number,
1032 UnicodeString& toAppendTo,
1033 FieldPosition& /* pos */) const
1034 {
1035 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1036 return toAppendTo;
1037 }
1038
1039
1040 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1041 RuleBasedNumberFormat::format(int64_t number,
1042 UnicodeString& toAppendTo,
1043 FieldPosition& /* pos */) const
1044 {
1045 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1046 return toAppendTo;
1047 }
1048
1049
1050 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1051 RuleBasedNumberFormat::format(double number,
1052 UnicodeString& toAppendTo,
1053 FieldPosition& /* pos */) const
1054 {
1055 // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1056 if (uprv_isNaN(number)) {
1057 DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1058 if (decFmtSyms) {
1059 toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1060 }
1061 } else if (defaultRuleSet) {
1062 defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1063 }
1064 return toAppendTo;
1065 }
1066
1067
1068 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1069 RuleBasedNumberFormat::format(int32_t number,
1070 const UnicodeString& ruleSetName,
1071 UnicodeString& toAppendTo,
1072 FieldPosition& /* pos */,
1073 UErrorCode& status) const
1074 {
1075 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1076 if (U_SUCCESS(status)) {
1077 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1078 // throw new IllegalArgumentException("Can't use internal rule set");
1079 status = U_ILLEGAL_ARGUMENT_ERROR;
1080 } else {
1081 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1082 if (rs) {
1083 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1084 }
1085 }
1086 }
1087 return toAppendTo;
1088 }
1089
1090
1091 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1092 RuleBasedNumberFormat::format(int64_t number,
1093 const UnicodeString& ruleSetName,
1094 UnicodeString& toAppendTo,
1095 FieldPosition& /* pos */,
1096 UErrorCode& status) const
1097 {
1098 if (U_SUCCESS(status)) {
1099 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1100 // throw new IllegalArgumentException("Can't use internal rule set");
1101 status = U_ILLEGAL_ARGUMENT_ERROR;
1102 } else {
1103 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1104 if (rs) {
1105 rs->format(number, toAppendTo, toAppendTo.length());
1106 }
1107 }
1108 }
1109 return toAppendTo;
1110 }
1111
1112
1113 // make linker happy
1114 UnicodeString&
format(const Formattable & obj,UnicodeString & toAppendTo,FieldPosition & pos,UErrorCode & status) const1115 RuleBasedNumberFormat::format(const Formattable& obj,
1116 UnicodeString& toAppendTo,
1117 FieldPosition& pos,
1118 UErrorCode& status) const
1119 {
1120 return NumberFormat::format(obj, toAppendTo, pos, status);
1121 }
1122
1123 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1124 RuleBasedNumberFormat::format(double number,
1125 const UnicodeString& ruleSetName,
1126 UnicodeString& toAppendTo,
1127 FieldPosition& /* pos */,
1128 UErrorCode& status) const
1129 {
1130 if (U_SUCCESS(status)) {
1131 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1132 // throw new IllegalArgumentException("Can't use internal rule set");
1133 status = U_ILLEGAL_ARGUMENT_ERROR;
1134 } else {
1135 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1136 if (rs) {
1137 rs->format(number, toAppendTo, toAppendTo.length());
1138 }
1139 }
1140 }
1141 return toAppendTo;
1142 }
1143
1144 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1145 RuleBasedNumberFormat::parse(const UnicodeString& text,
1146 Formattable& result,
1147 ParsePosition& parsePosition) const
1148 {
1149 if (!ruleSets) {
1150 parsePosition.setErrorIndex(0);
1151 return;
1152 }
1153
1154 UnicodeString workingText(text, parsePosition.getIndex());
1155 ParsePosition workingPos(0);
1156
1157 ParsePosition high_pp(0);
1158 Formattable high_result;
1159
1160 for (NFRuleSet** p = ruleSets; *p; ++p) {
1161 NFRuleSet *rp = *p;
1162 if (rp->isPublic() && rp->isParseable()) {
1163 ParsePosition working_pp(0);
1164 Formattable working_result;
1165
1166 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1167 if (working_pp.getIndex() > high_pp.getIndex()) {
1168 high_pp = working_pp;
1169 high_result = working_result;
1170
1171 if (high_pp.getIndex() == workingText.length()) {
1172 break;
1173 }
1174 }
1175 }
1176 }
1177
1178 int32_t startIndex = parsePosition.getIndex();
1179 parsePosition.setIndex(startIndex + high_pp.getIndex());
1180 if (high_pp.getIndex() > 0) {
1181 parsePosition.setErrorIndex(-1);
1182 } else {
1183 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1184 parsePosition.setErrorIndex(startIndex + errorIndex);
1185 }
1186 result = high_result;
1187 if (result.getType() == Formattable::kDouble) {
1188 int32_t r = (int32_t)result.getDouble();
1189 if ((double)r == result.getDouble()) {
1190 result.setLong(r);
1191 }
1192 }
1193 }
1194
1195 #if !UCONFIG_NO_COLLATION
1196
1197 void
setLenient(UBool enabled)1198 RuleBasedNumberFormat::setLenient(UBool enabled)
1199 {
1200 lenient = enabled;
1201 if (!enabled && collator) {
1202 delete collator;
1203 collator = NULL;
1204 }
1205 }
1206
1207 #endif
1208
1209 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1210 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1211 if (U_SUCCESS(status)) {
1212 if (ruleSetName.isEmpty()) {
1213 if (localizations) {
1214 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1215 defaultRuleSet = findRuleSet(name, status);
1216 } else {
1217 initDefaultRuleSet();
1218 }
1219 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1220 status = U_ILLEGAL_ARGUMENT_ERROR;
1221 } else {
1222 NFRuleSet* result = findRuleSet(ruleSetName, status);
1223 if (result != NULL) {
1224 defaultRuleSet = result;
1225 }
1226 }
1227 }
1228 }
1229
1230 UnicodeString
getDefaultRuleSetName() const1231 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1232 UnicodeString result;
1233 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1234 defaultRuleSet->getName(result);
1235 } else {
1236 result.setToBogus();
1237 }
1238 return result;
1239 }
1240
1241 void
initDefaultRuleSet()1242 RuleBasedNumberFormat::initDefaultRuleSet()
1243 {
1244 defaultRuleSet = NULL;
1245 if (!ruleSets) {
1246 return;
1247 }
1248
1249 const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1250 const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1251 const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1252
1253 NFRuleSet**p = &ruleSets[0];
1254 while (*p) {
1255 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1256 defaultRuleSet = *p;
1257 return;
1258 } else {
1259 ++p;
1260 }
1261 }
1262
1263 defaultRuleSet = *--p;
1264 if (!defaultRuleSet->isPublic()) {
1265 while (p != ruleSets) {
1266 if ((*--p)->isPublic()) {
1267 defaultRuleSet = *p;
1268 break;
1269 }
1270 }
1271 }
1272 }
1273
1274
1275 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1276 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1277 UParseError& pErr, UErrorCode& status)
1278 {
1279 // TODO: implement UParseError
1280 uprv_memset(&pErr, 0, sizeof(UParseError));
1281 // Note: this can leave ruleSets == NULL, so remaining code should check
1282 if (U_FAILURE(status)) {
1283 return;
1284 }
1285
1286 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1287
1288 UnicodeString description(rules);
1289 if (!description.length()) {
1290 status = U_MEMORY_ALLOCATION_ERROR;
1291 return;
1292 }
1293
1294 // start by stripping the trailing whitespace from all the rules
1295 // (this is all the whitespace follwing each semicolon in the
1296 // description). This allows us to look for rule-set boundaries
1297 // by searching for ";%" without having to worry about whitespace
1298 // between the ; and the %
1299 stripWhitespace(description);
1300
1301 // check to see if there's a set of lenient-parse rules. If there
1302 // is, pull them out into our temporary holding place for them,
1303 // and delete them from the description before the real desciption-
1304 // parsing code sees them
1305 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1306 if (lp != -1) {
1307 // we've got to make sure we're not in the middle of a rule
1308 // (where "%%lenient-parse" would actually get treated as
1309 // rule text)
1310 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1311 // locate the beginning and end of the actual collation
1312 // rules (there may be whitespace between the name and
1313 // the first token in the description)
1314 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1315
1316 if (lpEnd == -1) {
1317 lpEnd = description.length() - 1;
1318 }
1319 int lpStart = lp + u_strlen(gLenientParse);
1320 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1321 ++lpStart;
1322 }
1323
1324 // copy out the lenient-parse rules and delete them
1325 // from the description
1326 lenientParseRules = new UnicodeString();
1327 /* test for NULL */
1328 if (lenientParseRules == 0) {
1329 status = U_MEMORY_ALLOCATION_ERROR;
1330 return;
1331 }
1332 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1333
1334 description.remove(lp, lpEnd + 1 - lp);
1335 }
1336 }
1337
1338 // pre-flight parsing the description and count the number of
1339 // rule sets (";%" marks the end of one rule set and the beginning
1340 // of the next)
1341 numRuleSets = 0;
1342 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1343 ++numRuleSets;
1344 ++p;
1345 }
1346 ++numRuleSets;
1347
1348 // our rule list is an array of the appropriate size
1349 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1350 /* test for NULL */
1351 if (ruleSets == 0) {
1352 status = U_MEMORY_ALLOCATION_ERROR;
1353 return;
1354 }
1355
1356 for (int i = 0; i <= numRuleSets; ++i) {
1357 ruleSets[i] = NULL;
1358 }
1359
1360 // divide up the descriptions into individual rule-set descriptions
1361 // and store them in a temporary array. At each step, we also
1362 // new up a rule set, but all this does is initialize its name
1363 // and remove it from its description. We can't actually parse
1364 // the rest of the descriptions and finish initializing everything
1365 // because we have to know the names and locations of all the rule
1366 // sets before we can actually set everything up
1367 if(!numRuleSets) {
1368 status = U_ILLEGAL_ARGUMENT_ERROR;
1369 return;
1370 }
1371
1372 ruleSetDescriptions = new UnicodeString[numRuleSets];
1373 if (ruleSetDescriptions == 0) {
1374 status = U_MEMORY_ALLOCATION_ERROR;
1375 return;
1376 }
1377
1378 {
1379 int curRuleSet = 0;
1380 int32_t start = 0;
1381 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1382 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1383 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1384 if (ruleSets[curRuleSet] == 0) {
1385 status = U_MEMORY_ALLOCATION_ERROR;
1386 return;
1387 }
1388 ++curRuleSet;
1389 start = p + 1;
1390 }
1391 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1392 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1393 if (ruleSets[curRuleSet] == 0) {
1394 status = U_MEMORY_ALLOCATION_ERROR;
1395 return;
1396 }
1397 }
1398
1399 // now we can take note of the formatter's default rule set, which
1400 // is the last public rule set in the description (it's the last
1401 // rather than the first so that a user can create a new formatter
1402 // from an existing formatter and change its default behavior just
1403 // by appending more rule sets to the end)
1404
1405 // {dlf} Initialization of a fraction rule set requires the default rule
1406 // set to be known. For purposes of initialization, this is always the
1407 // last public rule set, no matter what the localization data says.
1408 initDefaultRuleSet();
1409
1410 // finally, we can go back through the temporary descriptions
1411 // list and finish seting up the substructure (and we throw
1412 // away the temporary descriptions as we go)
1413 {
1414 for (int i = 0; i < numRuleSets; i++) {
1415 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1416 }
1417 }
1418
1419 // Now that the rules are initialized, the 'real' default rule
1420 // set can be adjusted by the localization data.
1421
1422 // The C code keeps the localization array as is, rather than building
1423 // a separate array of the public rule set names, so we have less work
1424 // to do here-- but we still need to check the names.
1425
1426 if (localizationInfos) {
1427 // confirm the names, if any aren't in the rules, that's an error
1428 // it is ok if the rules contain public rule sets that are not in this list
1429 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1430 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1431 NFRuleSet* rs = findRuleSet(name, status);
1432 if (rs == NULL) {
1433 break; // error
1434 }
1435 if (i == 0) {
1436 defaultRuleSet = rs;
1437 }
1438 }
1439 } else {
1440 defaultRuleSet = getDefaultRuleSet();
1441 }
1442 }
1443
1444 void
stripWhitespace(UnicodeString & description)1445 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1446 {
1447 // iterate through the characters...
1448 UnicodeString result;
1449
1450 int start = 0;
1451 while (start != -1 && start < description.length()) {
1452 // seek to the first non-whitespace character...
1453 while (start < description.length()
1454 && PatternProps::isWhiteSpace(description.charAt(start))) {
1455 ++start;
1456 }
1457
1458 // locate the next semicolon in the text and copy the text from
1459 // our current position up to that semicolon into the result
1460 int32_t p = description.indexOf(gSemiColon, start);
1461 if (p == -1) {
1462 // or if we don't find a semicolon, just copy the rest of
1463 // the string into the result
1464 result.append(description, start, description.length() - start);
1465 start = -1;
1466 }
1467 else if (p < description.length()) {
1468 result.append(description, start, p + 1 - start);
1469 start = p + 1;
1470 }
1471
1472 // when we get here, we've seeked off the end of the sring, and
1473 // we terminate the loop (we continue until *start* is -1 rather
1474 // than until *p* is -1, because otherwise we'd miss the last
1475 // rule in the description)
1476 else {
1477 start = -1;
1478 }
1479 }
1480
1481 description.setTo(result);
1482 }
1483
1484
1485 void
dispose()1486 RuleBasedNumberFormat::dispose()
1487 {
1488 if (ruleSets) {
1489 for (NFRuleSet** p = ruleSets; *p; ++p) {
1490 delete *p;
1491 }
1492 uprv_free(ruleSets);
1493 ruleSets = NULL;
1494 }
1495
1496 if (ruleSetDescriptions) {
1497 delete [] ruleSetDescriptions;
1498 }
1499
1500 #if !UCONFIG_NO_COLLATION
1501 delete collator;
1502 #endif
1503 collator = NULL;
1504
1505 delete decimalFormatSymbols;
1506 decimalFormatSymbols = NULL;
1507
1508 delete lenientParseRules;
1509 lenientParseRules = NULL;
1510
1511 if (localizations) localizations = localizations->unref();
1512 }
1513
1514
1515 //-----------------------------------------------------------------------
1516 // package-internal API
1517 //-----------------------------------------------------------------------
1518
1519 /**
1520 * Returns the collator to use for lenient parsing. The collator is lazily created:
1521 * this function creates it the first time it's called.
1522 * @return The collator to use for lenient parsing, or null if lenient parsing
1523 * is turned off.
1524 */
1525 Collator*
getCollator() const1526 RuleBasedNumberFormat::getCollator() const
1527 {
1528 #if !UCONFIG_NO_COLLATION
1529 if (!ruleSets) {
1530 return NULL;
1531 }
1532
1533 // lazy-evaulate the collator
1534 if (collator == NULL && lenient) {
1535 // create a default collator based on the formatter's locale,
1536 // then pull out that collator's rules, append any additional
1537 // rules specified in the description, and create a _new_
1538 // collator based on the combinaiton of those rules
1539
1540 UErrorCode status = U_ZERO_ERROR;
1541
1542 Collator* temp = Collator::createInstance(locale, status);
1543 RuleBasedCollator* newCollator;
1544 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1545 if (lenientParseRules) {
1546 UnicodeString rules(newCollator->getRules());
1547 rules.append(*lenientParseRules);
1548
1549 newCollator = new RuleBasedCollator(rules, status);
1550 // Exit if newCollator could not be created.
1551 if (newCollator == NULL) {
1552 return NULL;
1553 }
1554 } else {
1555 temp = NULL;
1556 }
1557 if (U_SUCCESS(status)) {
1558 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1559 // cast away const
1560 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1561 } else {
1562 delete newCollator;
1563 }
1564 }
1565 delete temp;
1566 }
1567 #endif
1568
1569 // if lenient-parse mode is off, this will be null
1570 // (see setLenientParseMode())
1571 return collator;
1572 }
1573
1574
1575 /**
1576 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1577 * instances owned by this formatter. This object is lazily created: this function
1578 * creates it the first time it's called.
1579 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1580 * instances owned by this formatter.
1581 */
1582 DecimalFormatSymbols*
getDecimalFormatSymbols() const1583 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1584 {
1585 // lazy-evaluate the DecimalFormatSymbols object. This object
1586 // is shared by all DecimalFormat instances belonging to this
1587 // formatter
1588 if (decimalFormatSymbols == NULL) {
1589 UErrorCode status = U_ZERO_ERROR;
1590 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1591 if (U_SUCCESS(status)) {
1592 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1593 } else {
1594 delete temp;
1595 }
1596 }
1597 return decimalFormatSymbols;
1598 }
1599
1600 // De-owning the current localized symbols and adopt the new symbols.
1601 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1602 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1603 {
1604 if (symbolsToAdopt == NULL) {
1605 return; // do not allow caller to set decimalFormatSymbols to NULL
1606 }
1607
1608 if (decimalFormatSymbols != NULL) {
1609 delete decimalFormatSymbols;
1610 }
1611
1612 decimalFormatSymbols = symbolsToAdopt;
1613
1614 {
1615 // Apply the new decimalFormatSymbols by reparsing the rulesets
1616 UErrorCode status = U_ZERO_ERROR;
1617
1618 for (int32_t i = 0; i < numRuleSets; i++) {
1619 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1620 }
1621 }
1622 }
1623
1624 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1625 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1626 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1627 {
1628 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1629 }
1630
1631 U_NAMESPACE_END
1632
1633 /* U_HAVE_RBNF */
1634 #endif
1635