1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2006, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7
8 #include "unicode/rbnf.h"
9
10 #if U_HAVE_RBNF
11
12 #include "unicode/normlzr.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/uchar.h"
15 #include "unicode/ucol.h"
16 #include "unicode/uloc.h"
17 #include "unicode/unum.h"
18 #include "unicode/ures.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/udata.h"
22 #include "nfrs.h"
23
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "../common/util.h"
27
28 // debugging
29 // #define DEBUG
30
31 #ifdef DEBUG
32 #include "stdio.h"
33 #endif
34
35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
36
37 static const UChar gPercentPercent[] =
38 {
39 0x25, 0x25, 0
40 }; /* "%%" */
41
42 // All urbnf objects are created through openRules, so we init all of the
43 // Unicode string constants required by rbnf, nfrs, or nfr here.
44 static const UChar gLenientParse[] =
45 {
46 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
47 }; /* "%%lenient-parse:" */
48 static const UChar gSemiColon = 0x003B;
49 static const UChar gSemiPercent[] =
50 {
51 0x3B, 0x25, 0
52 }; /* ";%" */
53
54 #define kSomeNumberOfBitsDiv2 22
55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
57
58 U_NAMESPACE_BEGIN
59
60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
61
62 /*
63 This is a utility class. It does not use ICU's RTTI.
64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
65 Please make sure that intltest passes on Windows in Release mode,
66 since the string pooling per compilation unit will mess up how RTTI works.
67 The RTTI code was also removed due to lack of code coverage.
68 */
69 class LocalizationInfo : public UMemory {
70 protected:
~LocalizationInfo()71 virtual ~LocalizationInfo() {};
72 uint32_t refcount;
73
74 public:
LocalizationInfo()75 LocalizationInfo() : refcount(0) {}
76
ref(void)77 LocalizationInfo* ref(void) {
78 ++refcount;
79 return this;
80 }
81
unref(void)82 LocalizationInfo* unref(void) {
83 if (refcount && --refcount == 0) {
84 delete this;
85 }
86 return NULL;
87 }
88
89 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const90 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
91
92 virtual int32_t getNumberOfRuleSets(void) const = 0;
93 virtual const UChar* getRuleSetName(int32_t index) const = 0;
94 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
95 virtual const UChar* getLocaleName(int32_t index) const = 0;
96 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
97
98 virtual int32_t indexForLocale(const UChar* locale) const;
99 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
100
101 // virtual UClassID getDynamicClassID() const = 0;
102 // static UClassID getStaticClassID(void);
103 };
104
105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
106
107 // if both strings are NULL, this returns TRUE
108 static UBool
streq(const UChar * lhs,const UChar * rhs)109 streq(const UChar* lhs, const UChar* rhs) {
110 if (rhs == lhs) {
111 return TRUE;
112 }
113 if (lhs && rhs) {
114 return u_strcmp(lhs, rhs) == 0;
115 }
116 return FALSE;
117 }
118
119 UBool
operator ==(const LocalizationInfo * rhs) const120 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
121 if (rhs) {
122 if (this == rhs) {
123 return TRUE;
124 }
125
126 int32_t rsc = getNumberOfRuleSets();
127 if (rsc == rhs->getNumberOfRuleSets()) {
128 for (int i = 0; i < rsc; ++i) {
129 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
130 return FALSE;
131 }
132 }
133 int32_t dlc = getNumberOfDisplayLocales();
134 if (dlc == rhs->getNumberOfDisplayLocales()) {
135 for (int i = 0; i < dlc; ++i) {
136 const UChar* locale = getLocaleName(i);
137 int32_t ix = rhs->indexForLocale(locale);
138 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
139 if (!streq(locale, rhs->getLocaleName(ix))) {
140 return FALSE;
141 }
142 for (int j = 0; j < rsc; ++j) {
143 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
144 return FALSE;
145 }
146 }
147 }
148 return TRUE;
149 }
150 }
151 }
152 return FALSE;
153 }
154
155 int32_t
indexForLocale(const UChar * locale) const156 LocalizationInfo::indexForLocale(const UChar* locale) const {
157 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
158 if (streq(locale, getLocaleName(i))) {
159 return i;
160 }
161 }
162 return -1;
163 }
164
165 int32_t
indexForRuleSet(const UChar * ruleset) const166 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
167 if (ruleset) {
168 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
169 if (streq(ruleset, getRuleSetName(i))) {
170 return i;
171 }
172 }
173 }
174 return -1;
175 }
176
177
178 typedef void (*Fn_Deleter)(void*);
179
180 class VArray {
181 void** buf;
182 int32_t cap;
183 int32_t size;
184 Fn_Deleter deleter;
185 public:
VArray()186 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
187
VArray(Fn_Deleter del)188 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
189
~VArray()190 ~VArray() {
191 if (deleter) {
192 for (int i = 0; i < size; ++i) {
193 (*deleter)(buf[i]);
194 }
195 }
196 uprv_free(buf);
197 }
198
length()199 int32_t length() {
200 return size;
201 }
202
add(void * elem,UErrorCode & status)203 void add(void* elem, UErrorCode& status) {
204 if (U_SUCCESS(status)) {
205 if (size == cap) {
206 if (cap == 0) {
207 cap = 1;
208 } else if (cap < 256) {
209 cap *= 2;
210 } else {
211 cap += 256;
212 }
213 if (buf == NULL) {
214 buf = (void**)uprv_malloc(cap * sizeof(void*));
215 } else {
216 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
217 }
218 if (buf == NULL) {
219 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
220 status = U_MEMORY_ALLOCATION_ERROR;
221 return;
222 }
223 void* start = &buf[size];
224 size_t count = (cap - size) * sizeof(void*);
225 uprv_memset(start, 0, count); // fill with nulls, just because
226 }
227 buf[size++] = elem;
228 }
229 }
230
release(void)231 void** release(void) {
232 void** result = buf;
233 buf = NULL;
234 cap = 0;
235 size = 0;
236 return result;
237 }
238 };
239
240 class LocDataParser;
241
242 class StringLocalizationInfo : public LocalizationInfo {
243 UChar* info;
244 UChar*** data;
245 int32_t numRuleSets;
246 int32_t numLocales;
247
248 friend class LocDataParser;
249
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)250 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
251 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
252 {
253 }
254
255 public:
256 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
257
258 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const259 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
260 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const261 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
262 virtual const UChar* getLocaleName(int32_t index) const;
263 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
264
265 // virtual UClassID getDynamicClassID() const;
266 // static UClassID getStaticClassID(void);
267
268 private:
269 void init(UErrorCode& status) const;
270 };
271
272
273 enum {
274 OPEN_ANGLE = 0x003c, /* '<' */
275 CLOSE_ANGLE = 0x003e, /* '>' */
276 COMMA = 0x002c,
277 TICK = 0x0027,
278 QUOTE = 0x0022,
279 SPACE = 0x0020
280 };
281
282 /**
283 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
284 */
285 class LocDataParser {
286 UChar* data;
287 const UChar* e;
288 UChar* p;
289 UChar ch;
290 UParseError& pe;
291 UErrorCode& ec;
292
293 public:
LocDataParser(UParseError & parseError,UErrorCode & status)294 LocDataParser(UParseError& parseError, UErrorCode& status)
295 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()296 ~LocDataParser() {}
297
298 /*
299 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
300 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
301 */
302 StringLocalizationInfo* parse(UChar* data, int32_t len);
303
304 private:
305
inc(void)306 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)307 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)308 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)309 void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const310 UBool inList(UChar c, const UChar* list) const {
311 if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
312 while (*list && *list != c) ++list; return *list == c;
313 }
314 void parseError(const char* msg);
315
316 StringLocalizationInfo* doParse(void);
317
318 UChar** nextArray(int32_t& requiredLength);
319 UChar* nextString(void);
320 };
321
322 #ifdef DEBUG
323 #define ERROR(msg) parseError(msg); return NULL;
324 #else
325 #define ERROR(msg) parseError(NULL); return NULL;
326 #endif
327
328
329 static const UChar DQUOTE_STOPLIST[] = {
330 QUOTE, 0
331 };
332
333 static const UChar SQUOTE_STOPLIST[] = {
334 TICK, 0
335 };
336
337 static const UChar NOQUOTE_STOPLIST[] = {
338 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
339 };
340
341 static void
DeleteFn(void * p)342 DeleteFn(void* p) {
343 uprv_free(p);
344 }
345
346 StringLocalizationInfo*
parse(UChar * _data,int32_t len)347 LocDataParser::parse(UChar* _data, int32_t len) {
348 if (U_FAILURE(ec)) {
349 if (_data) uprv_free(_data);
350 return NULL;
351 }
352
353 pe.line = 0;
354 pe.offset = -1;
355 pe.postContext[0] = 0;
356 pe.preContext[0] = 0;
357
358 if (_data == NULL) {
359 ec = U_ILLEGAL_ARGUMENT_ERROR;
360 return NULL;
361 }
362
363 if (len <= 0) {
364 ec = U_ILLEGAL_ARGUMENT_ERROR;
365 uprv_free(_data);
366 return NULL;
367 }
368
369 data = _data;
370 e = data + len;
371 p = _data;
372 ch = 0xffff;
373
374 return doParse();
375 }
376
377
378 StringLocalizationInfo*
doParse(void)379 LocDataParser::doParse(void) {
380 skipWhitespace();
381 if (!checkInc(OPEN_ANGLE)) {
382 ERROR("Missing open angle");
383 } else {
384 VArray array(DeleteFn);
385 UBool mightHaveNext = TRUE;
386 int32_t requiredLength = -1;
387 while (mightHaveNext) {
388 mightHaveNext = FALSE;
389 UChar** elem = nextArray(requiredLength);
390 skipWhitespace();
391 UBool haveComma = check(COMMA);
392 if (elem) {
393 array.add(elem, ec);
394 if (haveComma) {
395 inc();
396 mightHaveNext = TRUE;
397 }
398 } else if (haveComma) {
399 ERROR("Unexpected character");
400 }
401 }
402
403 skipWhitespace();
404 if (!checkInc(CLOSE_ANGLE)) {
405 if (check(OPEN_ANGLE)) {
406 ERROR("Missing comma in outer array");
407 } else {
408 ERROR("Missing close angle bracket in outer array");
409 }
410 }
411
412 skipWhitespace();
413 if (p != e) {
414 ERROR("Extra text after close of localization data");
415 }
416
417 array.add(NULL, ec);
418 if (U_SUCCESS(ec)) {
419 int32_t numLocs = array.length() - 2; // subtract first, NULL
420 UChar*** result = (UChar***)array.release();
421
422 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
423 }
424 }
425
426 ERROR("Unknown error");
427 }
428
429 UChar**
nextArray(int32_t & requiredLength)430 LocDataParser::nextArray(int32_t& requiredLength) {
431 if (U_FAILURE(ec)) {
432 return NULL;
433 }
434
435 skipWhitespace();
436 if (!checkInc(OPEN_ANGLE)) {
437 ERROR("Missing open angle");
438 }
439
440 VArray array;
441 UBool mightHaveNext = TRUE;
442 while (mightHaveNext) {
443 mightHaveNext = FALSE;
444 UChar* elem = nextString();
445 skipWhitespace();
446 UBool haveComma = check(COMMA);
447 if (elem) {
448 array.add(elem, ec);
449 if (haveComma) {
450 inc();
451 mightHaveNext = TRUE;
452 }
453 } else if (haveComma) {
454 ERROR("Unexpected comma");
455 }
456 }
457 skipWhitespace();
458 if (!checkInc(CLOSE_ANGLE)) {
459 if (check(OPEN_ANGLE)) {
460 ERROR("Missing close angle bracket in inner array");
461 } else {
462 ERROR("Missing comma in inner array");
463 }
464 }
465
466 array.add(NULL, ec);
467 if (U_SUCCESS(ec)) {
468 if (requiredLength == -1) {
469 requiredLength = array.length() + 1;
470 } else if (array.length() != requiredLength) {
471 ec = U_ILLEGAL_ARGUMENT_ERROR;
472 ERROR("Array not of required length");
473 }
474
475 return (UChar**)array.release();
476 }
477 ERROR("Unknown Error");
478 }
479
480 UChar*
nextString()481 LocDataParser::nextString() {
482 UChar* result = NULL;
483
484 skipWhitespace();
485 if (p < e) {
486 const UChar* terminators;
487 UChar c = *p;
488 UBool haveQuote = c == QUOTE || c == TICK;
489 if (haveQuote) {
490 inc();
491 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
492 } else {
493 terminators = NOQUOTE_STOPLIST;
494 }
495 UChar* start = p;
496 while (p < e && !inList(*p, terminators)) ++p;
497 if (p == e) {
498 ERROR("Unexpected end of data");
499 }
500
501 UChar x = *p;
502 if (p > start) {
503 ch = x;
504 *p = 0x0; // terminate by writing to data
505 result = start; // just point into data
506 }
507 if (haveQuote) {
508 if (x != c) {
509 ERROR("Missing matching quote");
510 } else if (p == start) {
511 ERROR("Empty string");
512 }
513 inc();
514 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
515 ERROR("Unexpected character in string");
516 }
517 }
518
519 // ok for there to be no next string
520 return result;
521 }
522
523 void
parseError(const char *)524 LocDataParser::parseError(const char* /*str*/) {
525 if (!data) {
526 return;
527 }
528
529 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
530 if (start < data) {
531 start = data;
532 }
533 for (UChar* x = p; --x >= start;) {
534 if (!*x) {
535 start = x+1;
536 break;
537 }
538 }
539 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
540 if (limit > e) {
541 limit = e;
542 }
543 u_strncpy(pe.preContext, start, (int32_t)(p-start));
544 pe.preContext[p-start] = 0;
545 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
546 pe.postContext[limit-p] = 0;
547 pe.offset = (int32_t)(p - data);
548
549 #ifdef DEBUG
550 fprintf(stderr, "%s at or near character %d: ", str, p-data);
551
552 UnicodeString msg;
553 msg.append(start, p - start);
554 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
555 msg.append(p, limit-p);
556 msg.append("'");
557
558 char buf[128];
559 int32_t len = msg.extract(0, msg.length(), buf, 128);
560 if (len >= 128) {
561 buf[127] = 0;
562 } else {
563 buf[len] = 0;
564 }
565 fprintf(stderr, "%s\n", buf);
566 fflush(stderr);
567 #endif
568
569 uprv_free(data);
570 data = NULL;
571 p = NULL;
572 e = NULL;
573
574 if (U_SUCCESS(ec)) {
575 ec = U_PARSE_ERROR;
576 }
577 }
578
579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
580
581 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)582 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
583 if (U_FAILURE(status)) {
584 return NULL;
585 }
586
587 int32_t len = info.length();
588 if (len == 0) {
589 return NULL; // no error;
590 }
591
592 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
593 if (!p) {
594 status = U_MEMORY_ALLOCATION_ERROR;
595 return NULL;
596 }
597 info.extract(p, len, status);
598 if (!U_FAILURE(status)) {
599 status = U_ZERO_ERROR; // clear warning about non-termination
600 }
601
602 LocDataParser parser(perror, status);
603 return parser.parse(p, len);
604 }
605
~StringLocalizationInfo()606 StringLocalizationInfo::~StringLocalizationInfo() {
607 for (UChar*** p = (UChar***)data; *p; ++p) {
608 // remaining data is simply pointer into our unicode string data.
609 if (*p) uprv_free(*p);
610 }
611 if (data) uprv_free(data);
612 if (info) uprv_free(info);
613 }
614
615
616 const UChar*
getRuleSetName(int32_t index) const617 StringLocalizationInfo::getRuleSetName(int32_t index) const {
618 if (index >= 0 && index < getNumberOfRuleSets()) {
619 return data[0][index];
620 }
621 return NULL;
622 }
623
624 const UChar*
getLocaleName(int32_t index) const625 StringLocalizationInfo::getLocaleName(int32_t index) const {
626 if (index >= 0 && index < getNumberOfDisplayLocales()) {
627 return data[index+1][0];
628 }
629 return NULL;
630 }
631
632 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const633 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
634 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
635 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
636 return data[localeIndex+1][ruleIndex+1];
637 }
638 return NULL;
639 }
640
641 // ----------
642
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
644 const UnicodeString& locs,
645 const Locale& alocale, UParseError& perror, UErrorCode& status)
646 : ruleSets(NULL)
647 , defaultRuleSet(NULL)
648 , locale(alocale)
649 , collator(NULL)
650 , decimalFormatSymbols(NULL)
651 , lenient(FALSE)
652 , lenientParseRules(NULL)
653 , localizations(NULL)
654 {
655 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
656 init(description, locinfo, perror, status);
657 }
658
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
660 const UnicodeString& locs,
661 UParseError& perror, UErrorCode& status)
662 : ruleSets(NULL)
663 , defaultRuleSet(NULL)
664 , locale(Locale::getDefault())
665 , collator(NULL)
666 , decimalFormatSymbols(NULL)
667 , lenient(FALSE)
668 , lenientParseRules(NULL)
669 , localizations(NULL)
670 {
671 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
672 init(description, locinfo, perror, status);
673 }
674
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
676 LocalizationInfo* info,
677 const Locale& alocale, UParseError& perror, UErrorCode& status)
678 : ruleSets(NULL)
679 , defaultRuleSet(NULL)
680 , locale(alocale)
681 , collator(NULL)
682 , decimalFormatSymbols(NULL)
683 , lenient(FALSE)
684 , lenientParseRules(NULL)
685 , localizations(NULL)
686 {
687 init(description, info, perror, status);
688 }
689
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
691 UParseError& perror,
692 UErrorCode& status)
693 : ruleSets(NULL)
694 , defaultRuleSet(NULL)
695 , locale(Locale::getDefault())
696 , collator(NULL)
697 , decimalFormatSymbols(NULL)
698 , lenient(FALSE)
699 , lenientParseRules(NULL)
700 , localizations(NULL)
701 {
702 init(description, NULL, perror, status);
703 }
704
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
706 const Locale& aLocale,
707 UParseError& perror,
708 UErrorCode& status)
709 : ruleSets(NULL)
710 , defaultRuleSet(NULL)
711 , locale(aLocale)
712 , collator(NULL)
713 , decimalFormatSymbols(NULL)
714 , lenient(FALSE)
715 , lenientParseRules(NULL)
716 , localizations(NULL)
717 {
718 init(description, NULL, perror, status);
719 }
720
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
722 : ruleSets(NULL)
723 , defaultRuleSet(NULL)
724 , locale(alocale)
725 , collator(NULL)
726 , decimalFormatSymbols(NULL)
727 , lenient(FALSE)
728 , lenientParseRules(NULL)
729 , localizations(NULL)
730 {
731 if (U_FAILURE(status)) {
732 return;
733 }
734
735 const char* fmt_tag = "";
736 switch (tag) {
737 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
738 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
739 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
740 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
741 }
742
743 // TODO: read localization info from resource
744 LocalizationInfo* locinfo = NULL;
745
746 int32_t len = 0;
747 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
748 if (U_SUCCESS(status)) {
749 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
750 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
751 const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
752 UnicodeString desc(description, len);
753 UParseError perror;
754 init (desc, locinfo, perror, status);
755 }
756 ures_close(nfrb);
757 }
758
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
760 : NumberFormat(rhs)
761 , ruleSets(NULL)
762 , defaultRuleSet(NULL)
763 , locale(rhs.locale)
764 , collator(NULL)
765 , decimalFormatSymbols(NULL)
766 , lenient(FALSE)
767 , lenientParseRules(NULL)
768 , localizations(NULL)
769 {
770 this->operator=(rhs);
771 }
772
773 // --------
774
775 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
777 {
778 UErrorCode status = U_ZERO_ERROR;
779 dispose();
780 locale = rhs.locale;
781 lenient = rhs.lenient;
782
783 UnicodeString rules = rhs.getRules();
784 UParseError perror;
785 init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
786
787 return *this;
788 }
789
~RuleBasedNumberFormat()790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
791 {
792 dispose();
793 }
794
795 Format*
clone(void) const796 RuleBasedNumberFormat::clone(void) const
797 {
798 RuleBasedNumberFormat * result = NULL;
799 UnicodeString rules = getRules();
800 UErrorCode status = U_ZERO_ERROR;
801 UParseError perror;
802 result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
803 /* test for NULL */
804 if (result == 0) {
805 status = U_MEMORY_ALLOCATION_ERROR;
806 return 0;
807 }
808 if (U_FAILURE(status)) {
809 delete result;
810 result = 0;
811 } else {
812 result->lenient = lenient;
813 }
814 return result;
815 }
816
817 UBool
operator ==(const Format & other) const818 RuleBasedNumberFormat::operator==(const Format& other) const
819 {
820 if (this == &other) {
821 return TRUE;
822 }
823
824 if (other.getDynamicClassID() == getStaticClassID()) {
825 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
826 if (locale == rhs.locale &&
827 lenient == rhs.lenient &&
828 (localizations == NULL
829 ? rhs.localizations == NULL
830 : (rhs.localizations == NULL
831 ? FALSE
832 : *localizations == rhs.localizations))) {
833
834 NFRuleSet** p = ruleSets;
835 NFRuleSet** q = rhs.ruleSets;
836 if (p == NULL) {
837 return q == NULL;
838 } else if (q == NULL) {
839 return FALSE;
840 }
841 while (*p && *q && (**p == **q)) {
842 ++p;
843 ++q;
844 }
845 return *q == NULL && *p == NULL;
846 }
847 }
848
849 return FALSE;
850 }
851
852 UnicodeString
getRules() const853 RuleBasedNumberFormat::getRules() const
854 {
855 UnicodeString result;
856 if (ruleSets != NULL) {
857 for (NFRuleSet** p = ruleSets; *p; ++p) {
858 (*p)->appendRules(result);
859 }
860 }
861 return result;
862 }
863
864 UnicodeString
getRuleSetName(int32_t index) const865 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
866 {
867 if (localizations) {
868 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
869 return string;
870 } else if (ruleSets) {
871 UnicodeString result;
872 for (NFRuleSet** p = ruleSets; *p; ++p) {
873 NFRuleSet* rs = *p;
874 if (rs->isPublic()) {
875 if (--index == -1) {
876 rs->getName(result);
877 return result;
878 }
879 }
880 }
881 }
882 UnicodeString empty;
883 return empty;
884 }
885
886 int32_t
getNumberOfRuleSetNames() const887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
888 {
889 int32_t result = 0;
890 if (localizations) {
891 result = localizations->getNumberOfRuleSets();
892 } else if (ruleSets) {
893 for (NFRuleSet** p = ruleSets; *p; ++p) {
894 if ((**p).isPublic()) {
895 ++result;
896 }
897 }
898 }
899 return result;
900 }
901
902 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
904 if (localizations) {
905 return localizations->getNumberOfDisplayLocales();
906 }
907 return 0;
908 }
909
910 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
912 if (U_FAILURE(status)) {
913 return Locale("");
914 }
915 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
916 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
917 char buffer[64];
918 int32_t cap = name.length() + 1;
919 char* bp = buffer;
920 if (cap > 64) {
921 bp = (char *)uprv_malloc(cap);
922 if (bp == NULL) {
923 status = U_MEMORY_ALLOCATION_ERROR;
924 return Locale("");
925 }
926 }
927 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
928 Locale retLocale(bp);
929 if (bp != buffer) {
930 uprv_free(bp);
931 }
932 return retLocale;
933 }
934 status = U_ILLEGAL_ARGUMENT_ERROR;
935 Locale retLocale;
936 return retLocale;
937 }
938
939 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
941 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
942 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
943 int32_t len = localeName.length();
944 UChar* localeStr = localeName.getBuffer(len + 1);
945 while (len >= 0) {
946 localeStr[len] = 0;
947 int32_t ix = localizations->indexForLocale(localeStr);
948 if (ix >= 0) {
949 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
950 return name;
951 }
952
953 // trim trailing portion, skipping over ommitted sections
954 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
955 while (len > 0 && localeStr[len-1] == 0x005F) --len;
956 }
957 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
958 return name;
959 }
960 UnicodeString bogus;
961 bogus.setToBogus();
962 return bogus;
963 }
964
965 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
967 if (localizations) {
968 UnicodeString rsn(ruleSetName);
969 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
970 return getRuleSetDisplayName(ix, localeParam);
971 }
972 UnicodeString bogus;
973 bogus.setToBogus();
974 return bogus;
975 }
976
977 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const978 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
979 {
980 if (U_SUCCESS(status) && ruleSets) {
981 for (NFRuleSet** p = ruleSets; *p; ++p) {
982 NFRuleSet* rs = *p;
983 if (rs->isNamed(name)) {
984 return rs;
985 }
986 }
987 status = U_ILLEGAL_ARGUMENT_ERROR;
988 }
989 return NULL;
990 }
991
992 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const993 RuleBasedNumberFormat::format(int32_t number,
994 UnicodeString& toAppendTo,
995 FieldPosition& /* pos */) const
996 {
997 if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
998 return toAppendTo;
999 }
1000
1001
1002 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1003 RuleBasedNumberFormat::format(int64_t number,
1004 UnicodeString& toAppendTo,
1005 FieldPosition& /* pos */) const
1006 {
1007 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1008 return toAppendTo;
1009 }
1010
1011
1012 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1013 RuleBasedNumberFormat::format(double number,
1014 UnicodeString& toAppendTo,
1015 FieldPosition& /* pos */) const
1016 {
1017 if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1018 return toAppendTo;
1019 }
1020
1021
1022 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1023 RuleBasedNumberFormat::format(int32_t number,
1024 const UnicodeString& ruleSetName,
1025 UnicodeString& toAppendTo,
1026 FieldPosition& /* pos */,
1027 UErrorCode& status) const
1028 {
1029 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030 if (U_SUCCESS(status)) {
1031 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1032 // throw new IllegalArgumentException("Can't use internal rule set");
1033 status = U_ILLEGAL_ARGUMENT_ERROR;
1034 } else {
1035 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1036 if (rs) {
1037 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1038 }
1039 }
1040 }
1041 return toAppendTo;
1042 }
1043
1044
1045 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1046 RuleBasedNumberFormat::format(int64_t number,
1047 const UnicodeString& ruleSetName,
1048 UnicodeString& toAppendTo,
1049 FieldPosition& /* pos */,
1050 UErrorCode& status) const
1051 {
1052 if (U_SUCCESS(status)) {
1053 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1054 // throw new IllegalArgumentException("Can't use internal rule set");
1055 status = U_ILLEGAL_ARGUMENT_ERROR;
1056 } else {
1057 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1058 if (rs) {
1059 rs->format(number, toAppendTo, toAppendTo.length());
1060 }
1061 }
1062 }
1063 return toAppendTo;
1064 }
1065
1066
1067 // make linker happy
1068 UnicodeString&
format(const Formattable & obj,UnicodeString & toAppendTo,FieldPosition & pos,UErrorCode & status) const1069 RuleBasedNumberFormat::format(const Formattable& obj,
1070 UnicodeString& toAppendTo,
1071 FieldPosition& pos,
1072 UErrorCode& status) const
1073 {
1074 return NumberFormat::format(obj, toAppendTo, pos, status);
1075 }
1076
1077 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1078 RuleBasedNumberFormat::format(double number,
1079 const UnicodeString& ruleSetName,
1080 UnicodeString& toAppendTo,
1081 FieldPosition& /* pos */,
1082 UErrorCode& status) const
1083 {
1084 if (U_SUCCESS(status)) {
1085 if (ruleSetName.indexOf(gPercentPercent) == 0) {
1086 // throw new IllegalArgumentException("Can't use internal rule set");
1087 status = U_ILLEGAL_ARGUMENT_ERROR;
1088 } else {
1089 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1090 if (rs) {
1091 rs->format(number, toAppendTo, toAppendTo.length());
1092 }
1093 }
1094 }
1095 return toAppendTo;
1096 }
1097
1098 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1099 RuleBasedNumberFormat::parse(const UnicodeString& text,
1100 Formattable& result,
1101 ParsePosition& parsePosition) const
1102 {
1103 if (!ruleSets) {
1104 parsePosition.setErrorIndex(0);
1105 return;
1106 }
1107
1108 UnicodeString workingText(text, parsePosition.getIndex());
1109 ParsePosition workingPos(0);
1110
1111 ParsePosition high_pp(0);
1112 Formattable high_result;
1113
1114 for (NFRuleSet** p = ruleSets; *p; ++p) {
1115 NFRuleSet *rp = *p;
1116 if (rp->isPublic()) {
1117 ParsePosition working_pp(0);
1118 Formattable working_result;
1119
1120 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1121 if (working_pp.getIndex() > high_pp.getIndex()) {
1122 high_pp = working_pp;
1123 high_result = working_result;
1124
1125 if (high_pp.getIndex() == workingText.length()) {
1126 break;
1127 }
1128 }
1129 }
1130 }
1131
1132 parsePosition.setIndex(parsePosition.getIndex() + high_pp.getIndex());
1133 if (high_pp.getIndex() > 0) {
1134 parsePosition.setErrorIndex(-1);
1135 }
1136 result = high_result;
1137 if (result.getType() == Formattable::kDouble) {
1138 int32_t r = (int32_t)result.getDouble();
1139 if ((double)r == result.getDouble()) {
1140 result.setLong(r);
1141 }
1142 }
1143 }
1144
1145 #if !UCONFIG_NO_COLLATION
1146
1147 void
setLenient(UBool enabled)1148 RuleBasedNumberFormat::setLenient(UBool enabled)
1149 {
1150 lenient = enabled;
1151 if (!enabled && collator) {
1152 delete collator;
1153 collator = NULL;
1154 }
1155 }
1156
1157 #endif
1158
1159 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1160 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1161 if (U_SUCCESS(status)) {
1162 if (ruleSetName.isEmpty()) {
1163 if (localizations) {
1164 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1165 defaultRuleSet = findRuleSet(name, status);
1166 } else {
1167 initDefaultRuleSet();
1168 }
1169 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1170 status = U_ILLEGAL_ARGUMENT_ERROR;
1171 } else {
1172 NFRuleSet* result = findRuleSet(ruleSetName, status);
1173 if (result != NULL) {
1174 defaultRuleSet = result;
1175 }
1176 }
1177 }
1178 }
1179
1180 UnicodeString
getDefaultRuleSetName() const1181 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1182 UnicodeString result;
1183 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1184 defaultRuleSet->getName(result);
1185 } else {
1186 result.setToBogus();
1187 }
1188 return result;
1189 }
1190
1191 void
initDefaultRuleSet()1192 RuleBasedNumberFormat::initDefaultRuleSet()
1193 {
1194 defaultRuleSet = NULL;
1195 if (!ruleSets) {
1196 return;
1197 }
1198
1199 NFRuleSet**p = &ruleSets[0];
1200 while (*p) {
1201 ++p;
1202 }
1203
1204 defaultRuleSet = *--p;
1205 if (!defaultRuleSet->isPublic()) {
1206 while (p != ruleSets) {
1207 if ((*--p)->isPublic()) {
1208 defaultRuleSet = *p;
1209 break;
1210 }
1211 }
1212 }
1213 }
1214
1215
1216 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1217 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1218 UParseError& pErr, UErrorCode& status)
1219 {
1220 // TODO: implement UParseError
1221 uprv_memset(&pErr, 0, sizeof(UParseError));
1222 // Note: this can leave ruleSets == NULL, so remaining code should check
1223 if (U_FAILURE(status)) {
1224 return;
1225 }
1226
1227 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1228
1229 UnicodeString description(rules);
1230 if (!description.length()) {
1231 status = U_MEMORY_ALLOCATION_ERROR;
1232 return;
1233 }
1234
1235 // start by stripping the trailing whitespace from all the rules
1236 // (this is all the whitespace follwing each semicolon in the
1237 // description). This allows us to look for rule-set boundaries
1238 // by searching for ";%" without having to worry about whitespace
1239 // between the ; and the %
1240 stripWhitespace(description);
1241
1242 // check to see if there's a set of lenient-parse rules. If there
1243 // is, pull them out into our temporary holding place for them,
1244 // and delete them from the description before the real desciption-
1245 // parsing code sees them
1246 int32_t lp = description.indexOf(gLenientParse);
1247 if (lp != -1) {
1248 // we've got to make sure we're not in the middle of a rule
1249 // (where "%%lenient-parse" would actually get treated as
1250 // rule text)
1251 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1252 // locate the beginning and end of the actual collation
1253 // rules (there may be whitespace between the name and
1254 // the first token in the description)
1255 int lpEnd = description.indexOf(gSemiPercent, lp);
1256
1257 if (lpEnd == -1) {
1258 lpEnd = description.length() - 1;
1259 }
1260 int lpStart = lp + u_strlen(gLenientParse);
1261 while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1262 ++lpStart;
1263 }
1264
1265 // copy out the lenient-parse rules and delete them
1266 // from the description
1267 lenientParseRules = new UnicodeString();
1268 /* test for NULL */
1269 if (lenientParseRules == 0) {
1270 status = U_MEMORY_ALLOCATION_ERROR;
1271 return;
1272 }
1273 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1274
1275 description.remove(lp, lpEnd + 1 - lp);
1276 }
1277 }
1278
1279 // pre-flight parsing the description and count the number of
1280 // rule sets (";%" marks the end of one rule set and the beginning
1281 // of the next)
1282 int numRuleSets = 0;
1283 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1284 ++numRuleSets;
1285 ++p;
1286 }
1287 ++numRuleSets;
1288
1289 // our rule list is an array of the appropriate size
1290 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1291 /* test for NULL */
1292 if (ruleSets == 0) {
1293 status = U_MEMORY_ALLOCATION_ERROR;
1294 return;
1295 }
1296
1297 for (int i = 0; i <= numRuleSets; ++i) {
1298 ruleSets[i] = NULL;
1299 }
1300
1301 // divide up the descriptions into individual rule-set descriptions
1302 // and store them in a temporary array. At each step, we also
1303 // new up a rule set, but all this does is initialize its name
1304 // and remove it from its description. We can't actually parse
1305 // the rest of the descriptions and finish initializing everything
1306 // because we have to know the names and locations of all the rule
1307 // sets before we can actually set everything up
1308 if(!numRuleSets) {
1309 status = U_ILLEGAL_ARGUMENT_ERROR;
1310 return;
1311 }
1312 UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1313 /* test for NULL */
1314 if (ruleSetDescriptions == 0) {
1315 status = U_MEMORY_ALLOCATION_ERROR;
1316 return;
1317 }
1318
1319 {
1320 int curRuleSet = 0;
1321 int32_t start = 0;
1322 for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1323 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1324 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1325 /* test for NULL */
1326 if (ruleSets[curRuleSet] == 0) {
1327 status = U_MEMORY_ALLOCATION_ERROR;
1328 return;
1329 }
1330 ++curRuleSet;
1331 start = p + 1;
1332 }
1333 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1334 ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1335 /* test for NULL */
1336 if (ruleSets[curRuleSet] == 0) {
1337 status = U_MEMORY_ALLOCATION_ERROR;
1338 return;
1339 }
1340 }
1341
1342 // now we can take note of the formatter's default rule set, which
1343 // is the last public rule set in the description (it's the last
1344 // rather than the first so that a user can create a new formatter
1345 // from an existing formatter and change its default behavior just
1346 // by appending more rule sets to the end)
1347
1348 // {dlf} Initialization of a fraction rule set requires the default rule
1349 // set to be known. For purposes of initialization, this is always the
1350 // last public rule set, no matter what the localization data says.
1351 initDefaultRuleSet();
1352
1353 // finally, we can go back through the temporary descriptions
1354 // list and finish seting up the substructure (and we throw
1355 // away the temporary descriptions as we go)
1356 {
1357 for (int i = 0; i < numRuleSets; i++) {
1358 ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1359 }
1360 }
1361
1362 delete[] ruleSetDescriptions;
1363
1364 // Now that the rules are initialized, the 'real' default rule
1365 // set can be adjusted by the localization data.
1366
1367 // The C code keeps the localization array as is, rather than building
1368 // a separate array of the public rule set names, so we have less work
1369 // to do here-- but we still need to check the names.
1370
1371 if (localizationInfos) {
1372 // confirm the names, if any aren't in the rules, that's an error
1373 // it is ok if the rules contain public rule sets that are not in this list
1374 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1375 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1376 NFRuleSet* rs = findRuleSet(name, status);
1377 if (rs == NULL) {
1378 break; // error
1379 }
1380 if (i == 0) {
1381 defaultRuleSet = rs;
1382 }
1383 }
1384 } else {
1385 defaultRuleSet = getDefaultRuleSet();
1386 }
1387 }
1388
1389 void
stripWhitespace(UnicodeString & description)1390 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1391 {
1392 // iterate through the characters...
1393 UnicodeString result;
1394
1395 int start = 0;
1396 while (start != -1 && start < description.length()) {
1397 // seek to the first non-whitespace character...
1398 while (start < description.length()
1399 && uprv_isRuleWhiteSpace(description.charAt(start))) {
1400 ++start;
1401 }
1402
1403 // locate the next semicolon in the text and copy the text from
1404 // our current position up to that semicolon into the result
1405 int32_t p = description.indexOf(gSemiColon, start);
1406 if (p == -1) {
1407 // or if we don't find a semicolon, just copy the rest of
1408 // the string into the result
1409 result.append(description, start, description.length() - start);
1410 start = -1;
1411 }
1412 else if (p < description.length()) {
1413 result.append(description, start, p + 1 - start);
1414 start = p + 1;
1415 }
1416
1417 // when we get here, we've seeked off the end of the sring, and
1418 // we terminate the loop (we continue until *start* is -1 rather
1419 // than until *p* is -1, because otherwise we'd miss the last
1420 // rule in the description)
1421 else {
1422 start = -1;
1423 }
1424 }
1425
1426 description.setTo(result);
1427 }
1428
1429
1430 void
dispose()1431 RuleBasedNumberFormat::dispose()
1432 {
1433 if (ruleSets) {
1434 for (NFRuleSet** p = ruleSets; *p; ++p) {
1435 delete *p;
1436 }
1437 uprv_free(ruleSets);
1438 ruleSets = NULL;
1439 }
1440
1441 #if !UCONFIG_NO_COLLATION
1442 delete collator;
1443 #endif
1444 collator = NULL;
1445
1446 delete decimalFormatSymbols;
1447 decimalFormatSymbols = NULL;
1448
1449 delete lenientParseRules;
1450 lenientParseRules = NULL;
1451
1452 if (localizations) localizations = localizations->unref();
1453 }
1454
1455
1456 //-----------------------------------------------------------------------
1457 // package-internal API
1458 //-----------------------------------------------------------------------
1459
1460 /**
1461 * Returns the collator to use for lenient parsing. The collator is lazily created:
1462 * this function creates it the first time it's called.
1463 * @return The collator to use for lenient parsing, or null if lenient parsing
1464 * is turned off.
1465 */
1466 Collator*
getCollator() const1467 RuleBasedNumberFormat::getCollator() const
1468 {
1469 #if !UCONFIG_NO_COLLATION
1470 if (!ruleSets) {
1471 return NULL;
1472 }
1473
1474 // lazy-evaulate the collator
1475 if (collator == NULL && lenient) {
1476 // create a default collator based on the formatter's locale,
1477 // then pull out that collator's rules, append any additional
1478 // rules specified in the description, and create a _new_
1479 // collator based on the combinaiton of those rules
1480
1481 UErrorCode status = U_ZERO_ERROR;
1482
1483 Collator* temp = Collator::createInstance(locale, status);
1484 if (U_SUCCESS(status) &&
1485 temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1486
1487 RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1488 if (lenientParseRules) {
1489 UnicodeString rules(newCollator->getRules());
1490 rules.append(*lenientParseRules);
1491
1492 newCollator = new RuleBasedCollator(rules, status);
1493 } else {
1494 temp = NULL;
1495 }
1496 if (U_SUCCESS(status)) {
1497 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1498 // cast away const
1499 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1500 } else {
1501 delete newCollator;
1502 }
1503 }
1504 delete temp;
1505 }
1506 #endif
1507
1508 // if lenient-parse mode is off, this will be null
1509 // (see setLenientParseMode())
1510 return collator;
1511 }
1512
1513
1514 /**
1515 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1516 * instances owned by this formatter. This object is lazily created: this function
1517 * creates it the first time it's called.
1518 * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1519 * instances owned by this formatter.
1520 */
1521 DecimalFormatSymbols*
getDecimalFormatSymbols() const1522 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1523 {
1524 // lazy-evaluate the DecimalFormatSymbols object. This object
1525 // is shared by all DecimalFormat instances belonging to this
1526 // formatter
1527 if (decimalFormatSymbols == NULL) {
1528 UErrorCode status = U_ZERO_ERROR;
1529 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1530 if (U_SUCCESS(status)) {
1531 ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1532 } else {
1533 delete temp;
1534 }
1535 }
1536 return decimalFormatSymbols;
1537 }
1538
1539 U_NAMESPACE_END
1540
1541 /* U_HAVE_RBNF */
1542 #endif
1543