• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2006, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include "unicode/rbnf.h"
9 
10 #if U_HAVE_RBNF
11 
12 #include "unicode/normlzr.h"
13 #include "unicode/tblcoll.h"
14 #include "unicode/uchar.h"
15 #include "unicode/ucol.h"
16 #include "unicode/uloc.h"
17 #include "unicode/unum.h"
18 #include "unicode/ures.h"
19 #include "unicode/ustring.h"
20 #include "unicode/utf16.h"
21 #include "unicode/udata.h"
22 #include "nfrs.h"
23 
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "../common/util.h"
27 
28 // debugging
29 // #define DEBUG
30 
31 #ifdef DEBUG
32 #include "stdio.h"
33 #endif
34 
35 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
36 
37 static const UChar gPercentPercent[] =
38 {
39     0x25, 0x25, 0
40 }; /* "%%" */
41 
42 // All urbnf objects are created through openRules, so we init all of the
43 // Unicode string constants required by rbnf, nfrs, or nfr here.
44 static const UChar gLenientParse[] =
45 {
46     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
47 }; /* "%%lenient-parse:" */
48 static const UChar gSemiColon = 0x003B;
49 static const UChar gSemiPercent[] =
50 {
51     0x3B, 0x25, 0
52 }; /* ";%" */
53 
54 #define kSomeNumberOfBitsDiv2 22
55 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
56 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
57 
58 U_NAMESPACE_BEGIN
59 
60 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
61 
62 /*
63 This is a utility class. It does not use ICU's RTTI.
64 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
65 Please make sure that intltest passes on Windows in Release mode,
66 since the string pooling per compilation unit will mess up how RTTI works.
67 The RTTI code was also removed due to lack of code coverage.
68 */
69 class LocalizationInfo : public UMemory {
70 protected:
~LocalizationInfo()71     virtual ~LocalizationInfo() {};
72     uint32_t refcount;
73 
74 public:
LocalizationInfo()75     LocalizationInfo() : refcount(0) {}
76 
ref(void)77     LocalizationInfo* ref(void) {
78         ++refcount;
79         return this;
80     }
81 
unref(void)82     LocalizationInfo* unref(void) {
83         if (refcount && --refcount == 0) {
84             delete this;
85         }
86         return NULL;
87     }
88 
89     virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const90     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
91 
92     virtual int32_t getNumberOfRuleSets(void) const = 0;
93     virtual const UChar* getRuleSetName(int32_t index) const = 0;
94     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
95     virtual const UChar* getLocaleName(int32_t index) const = 0;
96     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
97 
98     virtual int32_t indexForLocale(const UChar* locale) const;
99     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
100 
101 //    virtual UClassID getDynamicClassID() const = 0;
102 //    static UClassID getStaticClassID(void);
103 };
104 
105 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
106 
107 // if both strings are NULL, this returns TRUE
108 static UBool
streq(const UChar * lhs,const UChar * rhs)109 streq(const UChar* lhs, const UChar* rhs) {
110     if (rhs == lhs) {
111         return TRUE;
112     }
113     if (lhs && rhs) {
114         return u_strcmp(lhs, rhs) == 0;
115     }
116     return FALSE;
117 }
118 
119 UBool
operator ==(const LocalizationInfo * rhs) const120 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
121     if (rhs) {
122         if (this == rhs) {
123             return TRUE;
124         }
125 
126         int32_t rsc = getNumberOfRuleSets();
127         if (rsc == rhs->getNumberOfRuleSets()) {
128             for (int i = 0; i < rsc; ++i) {
129                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
130                     return FALSE;
131                 }
132             }
133             int32_t dlc = getNumberOfDisplayLocales();
134             if (dlc == rhs->getNumberOfDisplayLocales()) {
135                 for (int i = 0; i < dlc; ++i) {
136                     const UChar* locale = getLocaleName(i);
137                     int32_t ix = rhs->indexForLocale(locale);
138                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
139                     if (!streq(locale, rhs->getLocaleName(ix))) {
140                         return FALSE;
141                     }
142                     for (int j = 0; j < rsc; ++j) {
143                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
144                             return FALSE;
145                         }
146                     }
147                 }
148                 return TRUE;
149             }
150         }
151     }
152     return FALSE;
153 }
154 
155 int32_t
indexForLocale(const UChar * locale) const156 LocalizationInfo::indexForLocale(const UChar* locale) const {
157     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
158         if (streq(locale, getLocaleName(i))) {
159             return i;
160         }
161     }
162     return -1;
163 }
164 
165 int32_t
indexForRuleSet(const UChar * ruleset) const166 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
167     if (ruleset) {
168         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
169             if (streq(ruleset, getRuleSetName(i))) {
170                 return i;
171             }
172         }
173     }
174     return -1;
175 }
176 
177 
178 typedef void (*Fn_Deleter)(void*);
179 
180 class VArray {
181     void** buf;
182     int32_t cap;
183     int32_t size;
184     Fn_Deleter deleter;
185 public:
VArray()186     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
187 
VArray(Fn_Deleter del)188     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
189 
~VArray()190     ~VArray() {
191         if (deleter) {
192             for (int i = 0; i < size; ++i) {
193                 (*deleter)(buf[i]);
194             }
195         }
196         uprv_free(buf);
197     }
198 
length()199     int32_t length() {
200         return size;
201     }
202 
add(void * elem,UErrorCode & status)203     void add(void* elem, UErrorCode& status) {
204         if (U_SUCCESS(status)) {
205             if (size == cap) {
206                 if (cap == 0) {
207                     cap = 1;
208                 } else if (cap < 256) {
209                     cap *= 2;
210                 } else {
211                     cap += 256;
212                 }
213                 if (buf == NULL) {
214                     buf = (void**)uprv_malloc(cap * sizeof(void*));
215                 } else {
216                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
217                 }
218                 if (buf == NULL) {
219                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
220                     status = U_MEMORY_ALLOCATION_ERROR;
221                     return;
222                 }
223                 void* start = &buf[size];
224                 size_t count = (cap - size) * sizeof(void*);
225                 uprv_memset(start, 0, count); // fill with nulls, just because
226             }
227             buf[size++] = elem;
228         }
229     }
230 
release(void)231     void** release(void) {
232         void** result = buf;
233         buf = NULL;
234         cap = 0;
235         size = 0;
236         return result;
237     }
238 };
239 
240 class LocDataParser;
241 
242 class StringLocalizationInfo : public LocalizationInfo {
243     UChar* info;
244     UChar*** data;
245     int32_t numRuleSets;
246     int32_t numLocales;
247 
248 friend class LocDataParser;
249 
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)250     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
251         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
252     {
253     }
254 
255 public:
256     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
257 
258     virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const259     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
260     virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const261     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
262     virtual const UChar* getLocaleName(int32_t index) const;
263     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
264 
265 //    virtual UClassID getDynamicClassID() const;
266 //    static UClassID getStaticClassID(void);
267 
268 private:
269     void init(UErrorCode& status) const;
270 };
271 
272 
273 enum {
274     OPEN_ANGLE = 0x003c, /* '<' */
275     CLOSE_ANGLE = 0x003e, /* '>' */
276     COMMA = 0x002c,
277     TICK = 0x0027,
278     QUOTE = 0x0022,
279     SPACE = 0x0020
280 };
281 
282 /**
283  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
284  */
285 class LocDataParser {
286     UChar* data;
287     const UChar* e;
288     UChar* p;
289     UChar ch;
290     UParseError& pe;
291     UErrorCode& ec;
292 
293 public:
LocDataParser(UParseError & parseError,UErrorCode & status)294     LocDataParser(UParseError& parseError, UErrorCode& status)
295         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()296     ~LocDataParser() {}
297 
298     /*
299     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
300     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
301     */
302     StringLocalizationInfo* parse(UChar* data, int32_t len);
303 
304 private:
305 
inc(void)306     void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)307     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)308     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)309     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const310     UBool inList(UChar c, const UChar* list) const {
311         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
312         while (*list && *list != c) ++list; return *list == c;
313     }
314     void parseError(const char* msg);
315 
316     StringLocalizationInfo* doParse(void);
317 
318     UChar** nextArray(int32_t& requiredLength);
319     UChar*  nextString(void);
320 };
321 
322 #ifdef DEBUG
323 #define ERROR(msg) parseError(msg); return NULL;
324 #else
325 #define ERROR(msg) parseError(NULL); return NULL;
326 #endif
327 
328 
329 static const UChar DQUOTE_STOPLIST[] = {
330     QUOTE, 0
331 };
332 
333 static const UChar SQUOTE_STOPLIST[] = {
334     TICK, 0
335 };
336 
337 static const UChar NOQUOTE_STOPLIST[] = {
338     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
339 };
340 
341 static void
DeleteFn(void * p)342 DeleteFn(void* p) {
343   uprv_free(p);
344 }
345 
346 StringLocalizationInfo*
parse(UChar * _data,int32_t len)347 LocDataParser::parse(UChar* _data, int32_t len) {
348     if (U_FAILURE(ec)) {
349         if (_data) uprv_free(_data);
350         return NULL;
351     }
352 
353     pe.line = 0;
354     pe.offset = -1;
355     pe.postContext[0] = 0;
356     pe.preContext[0] = 0;
357 
358     if (_data == NULL) {
359         ec = U_ILLEGAL_ARGUMENT_ERROR;
360         return NULL;
361     }
362 
363     if (len <= 0) {
364         ec = U_ILLEGAL_ARGUMENT_ERROR;
365         uprv_free(_data);
366         return NULL;
367     }
368 
369     data = _data;
370     e = data + len;
371     p = _data;
372     ch = 0xffff;
373 
374     return doParse();
375 }
376 
377 
378 StringLocalizationInfo*
doParse(void)379 LocDataParser::doParse(void) {
380     skipWhitespace();
381     if (!checkInc(OPEN_ANGLE)) {
382         ERROR("Missing open angle");
383     } else {
384         VArray array(DeleteFn);
385         UBool mightHaveNext = TRUE;
386         int32_t requiredLength = -1;
387         while (mightHaveNext) {
388             mightHaveNext = FALSE;
389             UChar** elem = nextArray(requiredLength);
390             skipWhitespace();
391             UBool haveComma = check(COMMA);
392             if (elem) {
393                 array.add(elem, ec);
394                 if (haveComma) {
395                     inc();
396                     mightHaveNext = TRUE;
397                 }
398             } else if (haveComma) {
399                 ERROR("Unexpected character");
400             }
401         }
402 
403         skipWhitespace();
404         if (!checkInc(CLOSE_ANGLE)) {
405             if (check(OPEN_ANGLE)) {
406                 ERROR("Missing comma in outer array");
407             } else {
408                 ERROR("Missing close angle bracket in outer array");
409             }
410         }
411 
412         skipWhitespace();
413         if (p != e) {
414             ERROR("Extra text after close of localization data");
415         }
416 
417         array.add(NULL, ec);
418         if (U_SUCCESS(ec)) {
419             int32_t numLocs = array.length() - 2; // subtract first, NULL
420             UChar*** result = (UChar***)array.release();
421 
422             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
423         }
424     }
425 
426     ERROR("Unknown error");
427 }
428 
429 UChar**
nextArray(int32_t & requiredLength)430 LocDataParser::nextArray(int32_t& requiredLength) {
431     if (U_FAILURE(ec)) {
432         return NULL;
433     }
434 
435     skipWhitespace();
436     if (!checkInc(OPEN_ANGLE)) {
437         ERROR("Missing open angle");
438     }
439 
440     VArray array;
441     UBool mightHaveNext = TRUE;
442     while (mightHaveNext) {
443         mightHaveNext = FALSE;
444         UChar* elem = nextString();
445         skipWhitespace();
446         UBool haveComma = check(COMMA);
447         if (elem) {
448             array.add(elem, ec);
449             if (haveComma) {
450                 inc();
451                 mightHaveNext = TRUE;
452             }
453         } else if (haveComma) {
454             ERROR("Unexpected comma");
455         }
456     }
457     skipWhitespace();
458     if (!checkInc(CLOSE_ANGLE)) {
459         if (check(OPEN_ANGLE)) {
460             ERROR("Missing close angle bracket in inner array");
461         } else {
462             ERROR("Missing comma in inner array");
463         }
464     }
465 
466     array.add(NULL, ec);
467     if (U_SUCCESS(ec)) {
468         if (requiredLength == -1) {
469             requiredLength = array.length() + 1;
470         } else if (array.length() != requiredLength) {
471             ec = U_ILLEGAL_ARGUMENT_ERROR;
472             ERROR("Array not of required length");
473         }
474 
475         return (UChar**)array.release();
476     }
477     ERROR("Unknown Error");
478 }
479 
480 UChar*
nextString()481 LocDataParser::nextString() {
482     UChar* result = NULL;
483 
484     skipWhitespace();
485     if (p < e) {
486         const UChar* terminators;
487         UChar c = *p;
488         UBool haveQuote = c == QUOTE || c == TICK;
489         if (haveQuote) {
490             inc();
491             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
492         } else {
493             terminators = NOQUOTE_STOPLIST;
494         }
495         UChar* start = p;
496         while (p < e && !inList(*p, terminators)) ++p;
497         if (p == e) {
498             ERROR("Unexpected end of data");
499         }
500 
501         UChar x = *p;
502         if (p > start) {
503             ch = x;
504             *p = 0x0; // terminate by writing to data
505             result = start; // just point into data
506         }
507         if (haveQuote) {
508             if (x != c) {
509                 ERROR("Missing matching quote");
510             } else if (p == start) {
511                 ERROR("Empty string");
512             }
513             inc();
514         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
515             ERROR("Unexpected character in string");
516         }
517     }
518 
519     // ok for there to be no next string
520     return result;
521 }
522 
523 void
parseError(const char *)524 LocDataParser::parseError(const char* /*str*/) {
525     if (!data) {
526         return;
527     }
528 
529     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
530     if (start < data) {
531         start = data;
532     }
533     for (UChar* x = p; --x >= start;) {
534         if (!*x) {
535             start = x+1;
536             break;
537         }
538     }
539     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
540     if (limit > e) {
541         limit = e;
542     }
543     u_strncpy(pe.preContext, start, (int32_t)(p-start));
544     pe.preContext[p-start] = 0;
545     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
546     pe.postContext[limit-p] = 0;
547     pe.offset = (int32_t)(p - data);
548 
549 #ifdef DEBUG
550     fprintf(stderr, "%s at or near character %d: ", str, p-data);
551 
552     UnicodeString msg;
553     msg.append(start, p - start);
554     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
555     msg.append(p, limit-p);
556     msg.append("'");
557 
558     char buf[128];
559     int32_t len = msg.extract(0, msg.length(), buf, 128);
560     if (len >= 128) {
561         buf[127] = 0;
562     } else {
563         buf[len] = 0;
564     }
565     fprintf(stderr, "%s\n", buf);
566     fflush(stderr);
567 #endif
568 
569     uprv_free(data);
570     data = NULL;
571     p = NULL;
572     e = NULL;
573 
574     if (U_SUCCESS(ec)) {
575         ec = U_PARSE_ERROR;
576     }
577 }
578 
579 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
580 
581 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)582 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
583     if (U_FAILURE(status)) {
584         return NULL;
585     }
586 
587     int32_t len = info.length();
588     if (len == 0) {
589         return NULL; // no error;
590     }
591 
592     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
593     if (!p) {
594         status = U_MEMORY_ALLOCATION_ERROR;
595         return NULL;
596     }
597     info.extract(p, len, status);
598     if (!U_FAILURE(status)) {
599         status = U_ZERO_ERROR; // clear warning about non-termination
600     }
601 
602     LocDataParser parser(perror, status);
603     return parser.parse(p, len);
604 }
605 
~StringLocalizationInfo()606 StringLocalizationInfo::~StringLocalizationInfo() {
607     for (UChar*** p = (UChar***)data; *p; ++p) {
608         // remaining data is simply pointer into our unicode string data.
609         if (*p) uprv_free(*p);
610     }
611     if (data) uprv_free(data);
612     if (info) uprv_free(info);
613 }
614 
615 
616 const UChar*
getRuleSetName(int32_t index) const617 StringLocalizationInfo::getRuleSetName(int32_t index) const {
618     if (index >= 0 && index < getNumberOfRuleSets()) {
619         return data[0][index];
620     }
621     return NULL;
622 }
623 
624 const UChar*
getLocaleName(int32_t index) const625 StringLocalizationInfo::getLocaleName(int32_t index) const {
626     if (index >= 0 && index < getNumberOfDisplayLocales()) {
627         return data[index+1][0];
628     }
629     return NULL;
630 }
631 
632 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const633 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
634     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
635         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
636         return data[localeIndex+1][ruleIndex+1];
637     }
638     return NULL;
639 }
640 
641 // ----------
642 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)643 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
644                                              const UnicodeString& locs,
645                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
646   : ruleSets(NULL)
647   , defaultRuleSet(NULL)
648   , locale(alocale)
649   , collator(NULL)
650   , decimalFormatSymbols(NULL)
651   , lenient(FALSE)
652   , lenientParseRules(NULL)
653   , localizations(NULL)
654 {
655   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
656   init(description, locinfo, perror, status);
657 }
658 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)659 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
660                                              const UnicodeString& locs,
661                                              UParseError& perror, UErrorCode& status)
662   : ruleSets(NULL)
663   , defaultRuleSet(NULL)
664   , locale(Locale::getDefault())
665   , collator(NULL)
666   , decimalFormatSymbols(NULL)
667   , lenient(FALSE)
668   , lenientParseRules(NULL)
669   , localizations(NULL)
670 {
671   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
672   init(description, locinfo, perror, status);
673 }
674 
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)675 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
676                                              LocalizationInfo* info,
677                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
678   : ruleSets(NULL)
679   , defaultRuleSet(NULL)
680   , locale(alocale)
681   , collator(NULL)
682   , decimalFormatSymbols(NULL)
683   , lenient(FALSE)
684   , lenientParseRules(NULL)
685   , localizations(NULL)
686 {
687   init(description, info, perror, status);
688 }
689 
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)690 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
691                          UParseError& perror,
692                          UErrorCode& status)
693   : ruleSets(NULL)
694   , defaultRuleSet(NULL)
695   , locale(Locale::getDefault())
696   , collator(NULL)
697   , decimalFormatSymbols(NULL)
698   , lenient(FALSE)
699   , lenientParseRules(NULL)
700   , localizations(NULL)
701 {
702     init(description, NULL, perror, status);
703 }
704 
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)705 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
706                          const Locale& aLocale,
707                          UParseError& perror,
708                          UErrorCode& status)
709   : ruleSets(NULL)
710   , defaultRuleSet(NULL)
711   , locale(aLocale)
712   , collator(NULL)
713   , decimalFormatSymbols(NULL)
714   , lenient(FALSE)
715   , lenientParseRules(NULL)
716   , localizations(NULL)
717 {
718     init(description, NULL, perror, status);
719 }
720 
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)721 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
722   : ruleSets(NULL)
723   , defaultRuleSet(NULL)
724   , locale(alocale)
725   , collator(NULL)
726   , decimalFormatSymbols(NULL)
727   , lenient(FALSE)
728   , lenientParseRules(NULL)
729   , localizations(NULL)
730 {
731     if (U_FAILURE(status)) {
732         return;
733     }
734 
735     const char* fmt_tag = "";
736     switch (tag) {
737     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
738     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
739     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
740     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
741     }
742 
743     // TODO: read localization info from resource
744     LocalizationInfo* locinfo = NULL;
745 
746     int32_t len = 0;
747     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
748     if (U_SUCCESS(status)) {
749         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
750                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
751         const UChar* description = ures_getStringByKey(nfrb, fmt_tag, &len, &status);
752         UnicodeString desc(description, len);
753         UParseError perror;
754         init (desc, locinfo, perror, status);
755     }
756     ures_close(nfrb);
757 }
758 
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)759 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
760   : NumberFormat(rhs)
761   , ruleSets(NULL)
762   , defaultRuleSet(NULL)
763   , locale(rhs.locale)
764   , collator(NULL)
765   , decimalFormatSymbols(NULL)
766   , lenient(FALSE)
767   , lenientParseRules(NULL)
768   , localizations(NULL)
769 {
770     this->operator=(rhs);
771 }
772 
773 // --------
774 
775 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)776 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
777 {
778     UErrorCode status = U_ZERO_ERROR;
779     dispose();
780     locale = rhs.locale;
781     lenient = rhs.lenient;
782 
783     UnicodeString rules = rhs.getRules();
784     UParseError perror;
785     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
786 
787     return *this;
788 }
789 
~RuleBasedNumberFormat()790 RuleBasedNumberFormat::~RuleBasedNumberFormat()
791 {
792     dispose();
793 }
794 
795 Format*
clone(void) const796 RuleBasedNumberFormat::clone(void) const
797 {
798     RuleBasedNumberFormat * result = NULL;
799     UnicodeString rules = getRules();
800     UErrorCode status = U_ZERO_ERROR;
801     UParseError perror;
802     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
803     /* test for NULL */
804     if (result == 0) {
805         status = U_MEMORY_ALLOCATION_ERROR;
806         return 0;
807     }
808     if (U_FAILURE(status)) {
809         delete result;
810         result = 0;
811     } else {
812         result->lenient = lenient;
813     }
814     return result;
815 }
816 
817 UBool
operator ==(const Format & other) const818 RuleBasedNumberFormat::operator==(const Format& other) const
819 {
820     if (this == &other) {
821         return TRUE;
822     }
823 
824     if (other.getDynamicClassID() == getStaticClassID()) {
825         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
826         if (locale == rhs.locale &&
827             lenient == rhs.lenient &&
828             (localizations == NULL
829                 ? rhs.localizations == NULL
830                 : (rhs.localizations == NULL
831                     ? FALSE
832                     : *localizations == rhs.localizations))) {
833 
834             NFRuleSet** p = ruleSets;
835             NFRuleSet** q = rhs.ruleSets;
836             if (p == NULL) {
837                 return q == NULL;
838             } else if (q == NULL) {
839                 return FALSE;
840             }
841             while (*p && *q && (**p == **q)) {
842                 ++p;
843                 ++q;
844             }
845             return *q == NULL && *p == NULL;
846         }
847     }
848 
849     return FALSE;
850 }
851 
852 UnicodeString
getRules() const853 RuleBasedNumberFormat::getRules() const
854 {
855     UnicodeString result;
856     if (ruleSets != NULL) {
857         for (NFRuleSet** p = ruleSets; *p; ++p) {
858             (*p)->appendRules(result);
859         }
860     }
861     return result;
862 }
863 
864 UnicodeString
getRuleSetName(int32_t index) const865 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
866 {
867     if (localizations) {
868       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
869       return string;
870     } else if (ruleSets) {
871         UnicodeString result;
872         for (NFRuleSet** p = ruleSets; *p; ++p) {
873             NFRuleSet* rs = *p;
874             if (rs->isPublic()) {
875                 if (--index == -1) {
876                     rs->getName(result);
877                     return result;
878                 }
879             }
880         }
881     }
882     UnicodeString empty;
883     return empty;
884 }
885 
886 int32_t
getNumberOfRuleSetNames() const887 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
888 {
889     int32_t result = 0;
890     if (localizations) {
891       result = localizations->getNumberOfRuleSets();
892     } else if (ruleSets) {
893         for (NFRuleSet** p = ruleSets; *p; ++p) {
894             if ((**p).isPublic()) {
895                 ++result;
896             }
897         }
898     }
899     return result;
900 }
901 
902 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const903 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
904     if (localizations) {
905         return localizations->getNumberOfDisplayLocales();
906     }
907     return 0;
908 }
909 
910 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const911 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
912     if (U_FAILURE(status)) {
913         return Locale("");
914     }
915     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
916         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
917         char buffer[64];
918         int32_t cap = name.length() + 1;
919         char* bp = buffer;
920         if (cap > 64) {
921             bp = (char *)uprv_malloc(cap);
922             if (bp == NULL) {
923                 status = U_MEMORY_ALLOCATION_ERROR;
924                 return Locale("");
925             }
926         }
927         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
928         Locale retLocale(bp);
929         if (bp != buffer) {
930             uprv_free(bp);
931         }
932         return retLocale;
933     }
934     status = U_ILLEGAL_ARGUMENT_ERROR;
935     Locale retLocale;
936     return retLocale;
937 }
938 
939 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)940 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
941     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
942         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
943         int32_t len = localeName.length();
944         UChar* localeStr = localeName.getBuffer(len + 1);
945         while (len >= 0) {
946             localeStr[len] = 0;
947             int32_t ix = localizations->indexForLocale(localeStr);
948             if (ix >= 0) {
949                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
950                 return name;
951             }
952 
953             // trim trailing portion, skipping over ommitted sections
954             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
955             while (len > 0 && localeStr[len-1] == 0x005F) --len;
956         }
957         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
958         return name;
959     }
960     UnicodeString bogus;
961     bogus.setToBogus();
962     return bogus;
963 }
964 
965 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)966 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
967     if (localizations) {
968         UnicodeString rsn(ruleSetName);
969         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
970         return getRuleSetDisplayName(ix, localeParam);
971     }
972     UnicodeString bogus;
973     bogus.setToBogus();
974     return bogus;
975 }
976 
977 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const978 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
979 {
980     if (U_SUCCESS(status) && ruleSets) {
981         for (NFRuleSet** p = ruleSets; *p; ++p) {
982             NFRuleSet* rs = *p;
983             if (rs->isNamed(name)) {
984                 return rs;
985             }
986         }
987         status = U_ILLEGAL_ARGUMENT_ERROR;
988     }
989     return NULL;
990 }
991 
992 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const993 RuleBasedNumberFormat::format(int32_t number,
994                               UnicodeString& toAppendTo,
995                               FieldPosition& /* pos */) const
996 {
997     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
998     return toAppendTo;
999 }
1000 
1001 
1002 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1003 RuleBasedNumberFormat::format(int64_t number,
1004                               UnicodeString& toAppendTo,
1005                               FieldPosition& /* pos */) const
1006 {
1007     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1008     return toAppendTo;
1009 }
1010 
1011 
1012 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1013 RuleBasedNumberFormat::format(double number,
1014                               UnicodeString& toAppendTo,
1015                               FieldPosition& /* pos */) const
1016 {
1017     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1018     return toAppendTo;
1019 }
1020 
1021 
1022 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1023 RuleBasedNumberFormat::format(int32_t number,
1024                               const UnicodeString& ruleSetName,
1025                               UnicodeString& toAppendTo,
1026                               FieldPosition& /* pos */,
1027                               UErrorCode& status) const
1028 {
1029     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1030     if (U_SUCCESS(status)) {
1031         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1032             // throw new IllegalArgumentException("Can't use internal rule set");
1033             status = U_ILLEGAL_ARGUMENT_ERROR;
1034         } else {
1035             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1036             if (rs) {
1037                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1038             }
1039         }
1040     }
1041     return toAppendTo;
1042 }
1043 
1044 
1045 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1046 RuleBasedNumberFormat::format(int64_t number,
1047                               const UnicodeString& ruleSetName,
1048                               UnicodeString& toAppendTo,
1049                               FieldPosition& /* pos */,
1050                               UErrorCode& status) const
1051 {
1052     if (U_SUCCESS(status)) {
1053         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1054             // throw new IllegalArgumentException("Can't use internal rule set");
1055             status = U_ILLEGAL_ARGUMENT_ERROR;
1056         } else {
1057             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1058             if (rs) {
1059                 rs->format(number, toAppendTo, toAppendTo.length());
1060             }
1061         }
1062     }
1063     return toAppendTo;
1064 }
1065 
1066 
1067 // make linker happy
1068 UnicodeString&
format(const Formattable & obj,UnicodeString & toAppendTo,FieldPosition & pos,UErrorCode & status) const1069 RuleBasedNumberFormat::format(const Formattable& obj,
1070                               UnicodeString& toAppendTo,
1071                               FieldPosition& pos,
1072                               UErrorCode& status) const
1073 {
1074     return NumberFormat::format(obj, toAppendTo, pos, status);
1075 }
1076 
1077 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1078 RuleBasedNumberFormat::format(double number,
1079                               const UnicodeString& ruleSetName,
1080                               UnicodeString& toAppendTo,
1081                               FieldPosition& /* pos */,
1082                               UErrorCode& status) const
1083 {
1084     if (U_SUCCESS(status)) {
1085         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1086             // throw new IllegalArgumentException("Can't use internal rule set");
1087             status = U_ILLEGAL_ARGUMENT_ERROR;
1088         } else {
1089             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1090             if (rs) {
1091                 rs->format(number, toAppendTo, toAppendTo.length());
1092             }
1093         }
1094     }
1095     return toAppendTo;
1096 }
1097 
1098 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1099 RuleBasedNumberFormat::parse(const UnicodeString& text,
1100                              Formattable& result,
1101                              ParsePosition& parsePosition) const
1102 {
1103     if (!ruleSets) {
1104         parsePosition.setErrorIndex(0);
1105         return;
1106     }
1107 
1108     UnicodeString workingText(text, parsePosition.getIndex());
1109     ParsePosition workingPos(0);
1110 
1111     ParsePosition high_pp(0);
1112     Formattable high_result;
1113 
1114     for (NFRuleSet** p = ruleSets; *p; ++p) {
1115         NFRuleSet *rp = *p;
1116         if (rp->isPublic()) {
1117             ParsePosition working_pp(0);
1118             Formattable working_result;
1119 
1120             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1121             if (working_pp.getIndex() > high_pp.getIndex()) {
1122                 high_pp = working_pp;
1123                 high_result = working_result;
1124 
1125                 if (high_pp.getIndex() == workingText.length()) {
1126                     break;
1127                 }
1128             }
1129         }
1130     }
1131 
1132     parsePosition.setIndex(parsePosition.getIndex() + high_pp.getIndex());
1133     if (high_pp.getIndex() > 0) {
1134         parsePosition.setErrorIndex(-1);
1135     }
1136     result = high_result;
1137     if (result.getType() == Formattable::kDouble) {
1138         int32_t r = (int32_t)result.getDouble();
1139         if ((double)r == result.getDouble()) {
1140             result.setLong(r);
1141         }
1142     }
1143 }
1144 
1145 #if !UCONFIG_NO_COLLATION
1146 
1147 void
setLenient(UBool enabled)1148 RuleBasedNumberFormat::setLenient(UBool enabled)
1149 {
1150     lenient = enabled;
1151     if (!enabled && collator) {
1152         delete collator;
1153         collator = NULL;
1154     }
1155 }
1156 
1157 #endif
1158 
1159 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1160 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1161     if (U_SUCCESS(status)) {
1162         if (ruleSetName.isEmpty()) {
1163           if (localizations) {
1164               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1165               defaultRuleSet = findRuleSet(name, status);
1166           } else {
1167             initDefaultRuleSet();
1168           }
1169         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1170             status = U_ILLEGAL_ARGUMENT_ERROR;
1171         } else {
1172             NFRuleSet* result = findRuleSet(ruleSetName, status);
1173             if (result != NULL) {
1174                 defaultRuleSet = result;
1175             }
1176         }
1177     }
1178 }
1179 
1180 UnicodeString
getDefaultRuleSetName() const1181 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1182   UnicodeString result;
1183   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1184     defaultRuleSet->getName(result);
1185   } else {
1186     result.setToBogus();
1187   }
1188   return result;
1189 }
1190 
1191 void
initDefaultRuleSet()1192 RuleBasedNumberFormat::initDefaultRuleSet()
1193 {
1194     defaultRuleSet = NULL;
1195     if (!ruleSets) {
1196       return;
1197     }
1198 
1199     NFRuleSet**p = &ruleSets[0];
1200     while (*p) {
1201         ++p;
1202     }
1203 
1204     defaultRuleSet = *--p;
1205     if (!defaultRuleSet->isPublic()) {
1206         while (p != ruleSets) {
1207             if ((*--p)->isPublic()) {
1208                 defaultRuleSet = *p;
1209                 break;
1210             }
1211         }
1212     }
1213 }
1214 
1215 
1216 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1217 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1218                             UParseError& pErr, UErrorCode& status)
1219 {
1220     // TODO: implement UParseError
1221     uprv_memset(&pErr, 0, sizeof(UParseError));
1222     // Note: this can leave ruleSets == NULL, so remaining code should check
1223     if (U_FAILURE(status)) {
1224         return;
1225     }
1226 
1227     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1228 
1229     UnicodeString description(rules);
1230     if (!description.length()) {
1231         status = U_MEMORY_ALLOCATION_ERROR;
1232         return;
1233     }
1234 
1235     // start by stripping the trailing whitespace from all the rules
1236     // (this is all the whitespace follwing each semicolon in the
1237     // description).  This allows us to look for rule-set boundaries
1238     // by searching for ";%" without having to worry about whitespace
1239     // between the ; and the %
1240     stripWhitespace(description);
1241 
1242     // check to see if there's a set of lenient-parse rules.  If there
1243     // is, pull them out into our temporary holding place for them,
1244     // and delete them from the description before the real desciption-
1245     // parsing code sees them
1246     int32_t lp = description.indexOf(gLenientParse);
1247     if (lp != -1) {
1248         // we've got to make sure we're not in the middle of a rule
1249         // (where "%%lenient-parse" would actually get treated as
1250         // rule text)
1251         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1252             // locate the beginning and end of the actual collation
1253             // rules (there may be whitespace between the name and
1254             // the first token in the description)
1255             int lpEnd = description.indexOf(gSemiPercent, lp);
1256 
1257             if (lpEnd == -1) {
1258                 lpEnd = description.length() - 1;
1259             }
1260             int lpStart = lp + u_strlen(gLenientParse);
1261             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1262                 ++lpStart;
1263             }
1264 
1265             // copy out the lenient-parse rules and delete them
1266             // from the description
1267             lenientParseRules = new UnicodeString();
1268             /* test for NULL */
1269             if (lenientParseRules == 0) {
1270                 status = U_MEMORY_ALLOCATION_ERROR;
1271                 return;
1272             }
1273             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1274 
1275             description.remove(lp, lpEnd + 1 - lp);
1276         }
1277     }
1278 
1279     // pre-flight parsing the description and count the number of
1280     // rule sets (";%" marks the end of one rule set and the beginning
1281     // of the next)
1282     int numRuleSets = 0;
1283     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1284         ++numRuleSets;
1285         ++p;
1286     }
1287     ++numRuleSets;
1288 
1289     // our rule list is an array of the appropriate size
1290     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1291     /* test for NULL */
1292     if (ruleSets == 0) {
1293         status = U_MEMORY_ALLOCATION_ERROR;
1294         return;
1295     }
1296 
1297     for (int i = 0; i <= numRuleSets; ++i) {
1298         ruleSets[i] = NULL;
1299     }
1300 
1301     // divide up the descriptions into individual rule-set descriptions
1302     // and store them in a temporary array.  At each step, we also
1303     // new up a rule set, but all this does is initialize its name
1304     // and remove it from its description.  We can't actually parse
1305     // the rest of the descriptions and finish initializing everything
1306     // because we have to know the names and locations of all the rule
1307     // sets before we can actually set everything up
1308     if(!numRuleSets) {
1309       status = U_ILLEGAL_ARGUMENT_ERROR;
1310       return;
1311     }
1312     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1313     /* test for NULL */
1314     if (ruleSetDescriptions == 0) {
1315         status = U_MEMORY_ALLOCATION_ERROR;
1316         return;
1317     }
1318 
1319     {
1320         int curRuleSet = 0;
1321         int32_t start = 0;
1322         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1323             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1324             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1325             /* test for NULL */
1326             if (ruleSets[curRuleSet] == 0) {
1327                 status = U_MEMORY_ALLOCATION_ERROR;
1328                 return;
1329             }
1330             ++curRuleSet;
1331             start = p + 1;
1332         }
1333         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1334         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1335         /* test for NULL */
1336         if (ruleSets[curRuleSet] == 0) {
1337             status = U_MEMORY_ALLOCATION_ERROR;
1338             return;
1339         }
1340     }
1341 
1342     // now we can take note of the formatter's default rule set, which
1343     // is the last public rule set in the description (it's the last
1344     // rather than the first so that a user can create a new formatter
1345     // from an existing formatter and change its default behavior just
1346     // by appending more rule sets to the end)
1347 
1348     // {dlf} Initialization of a fraction rule set requires the default rule
1349     // set to be known.  For purposes of initialization, this is always the
1350     // last public rule set, no matter what the localization data says.
1351     initDefaultRuleSet();
1352 
1353     // finally, we can go back through the temporary descriptions
1354     // list and finish seting up the substructure (and we throw
1355     // away the temporary descriptions as we go)
1356     {
1357         for (int i = 0; i < numRuleSets; i++) {
1358             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1359         }
1360     }
1361 
1362     delete[] ruleSetDescriptions;
1363 
1364     // Now that the rules are initialized, the 'real' default rule
1365     // set can be adjusted by the localization data.
1366 
1367     // The C code keeps the localization array as is, rather than building
1368     // a separate array of the public rule set names, so we have less work
1369     // to do here-- but we still need to check the names.
1370 
1371     if (localizationInfos) {
1372         // confirm the names, if any aren't in the rules, that's an error
1373         // it is ok if the rules contain public rule sets that are not in this list
1374         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1375             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1376             NFRuleSet* rs = findRuleSet(name, status);
1377             if (rs == NULL) {
1378                 break; // error
1379             }
1380             if (i == 0) {
1381                 defaultRuleSet = rs;
1382             }
1383         }
1384     } else {
1385         defaultRuleSet = getDefaultRuleSet();
1386     }
1387 }
1388 
1389 void
stripWhitespace(UnicodeString & description)1390 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1391 {
1392     // iterate through the characters...
1393     UnicodeString result;
1394 
1395     int start = 0;
1396     while (start != -1 && start < description.length()) {
1397         // seek to the first non-whitespace character...
1398         while (start < description.length()
1399             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1400             ++start;
1401         }
1402 
1403         // locate the next semicolon in the text and copy the text from
1404         // our current position up to that semicolon into the result
1405         int32_t p = description.indexOf(gSemiColon, start);
1406         if (p == -1) {
1407             // or if we don't find a semicolon, just copy the rest of
1408             // the string into the result
1409             result.append(description, start, description.length() - start);
1410             start = -1;
1411         }
1412         else if (p < description.length()) {
1413             result.append(description, start, p + 1 - start);
1414             start = p + 1;
1415         }
1416 
1417         // when we get here, we've seeked off the end of the sring, and
1418         // we terminate the loop (we continue until *start* is -1 rather
1419         // than until *p* is -1, because otherwise we'd miss the last
1420         // rule in the description)
1421         else {
1422             start = -1;
1423         }
1424     }
1425 
1426     description.setTo(result);
1427 }
1428 
1429 
1430 void
dispose()1431 RuleBasedNumberFormat::dispose()
1432 {
1433     if (ruleSets) {
1434         for (NFRuleSet** p = ruleSets; *p; ++p) {
1435             delete *p;
1436         }
1437         uprv_free(ruleSets);
1438         ruleSets = NULL;
1439     }
1440 
1441 #if !UCONFIG_NO_COLLATION
1442     delete collator;
1443 #endif
1444     collator = NULL;
1445 
1446     delete decimalFormatSymbols;
1447     decimalFormatSymbols = NULL;
1448 
1449     delete lenientParseRules;
1450     lenientParseRules = NULL;
1451 
1452     if (localizations) localizations = localizations->unref();
1453 }
1454 
1455 
1456 //-----------------------------------------------------------------------
1457 // package-internal API
1458 //-----------------------------------------------------------------------
1459 
1460 /**
1461  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1462  * this function creates it the first time it's called.
1463  * @return The collator to use for lenient parsing, or null if lenient parsing
1464  * is turned off.
1465 */
1466 Collator*
getCollator() const1467 RuleBasedNumberFormat::getCollator() const
1468 {
1469 #if !UCONFIG_NO_COLLATION
1470     if (!ruleSets) {
1471         return NULL;
1472     }
1473 
1474     // lazy-evaulate the collator
1475     if (collator == NULL && lenient) {
1476         // create a default collator based on the formatter's locale,
1477         // then pull out that collator's rules, append any additional
1478         // rules specified in the description, and create a _new_
1479         // collator based on the combinaiton of those rules
1480 
1481         UErrorCode status = U_ZERO_ERROR;
1482 
1483         Collator* temp = Collator::createInstance(locale, status);
1484         if (U_SUCCESS(status) &&
1485             temp->getDynamicClassID() == RuleBasedCollator::getStaticClassID()) {
1486 
1487             RuleBasedCollator* newCollator = (RuleBasedCollator*)temp;
1488             if (lenientParseRules) {
1489                 UnicodeString rules(newCollator->getRules());
1490                 rules.append(*lenientParseRules);
1491 
1492                 newCollator = new RuleBasedCollator(rules, status);
1493             } else {
1494                 temp = NULL;
1495             }
1496             if (U_SUCCESS(status)) {
1497                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1498                 // cast away const
1499                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1500             } else {
1501                 delete newCollator;
1502             }
1503         }
1504         delete temp;
1505     }
1506 #endif
1507 
1508     // if lenient-parse mode is off, this will be null
1509     // (see setLenientParseMode())
1510     return collator;
1511 }
1512 
1513 
1514 /**
1515  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1516  * instances owned by this formatter.  This object is lazily created: this function
1517  * creates it the first time it's called.
1518  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1519  * instances owned by this formatter.
1520 */
1521 DecimalFormatSymbols*
getDecimalFormatSymbols() const1522 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1523 {
1524     // lazy-evaluate the DecimalFormatSymbols object.  This object
1525     // is shared by all DecimalFormat instances belonging to this
1526     // formatter
1527     if (decimalFormatSymbols == NULL) {
1528         UErrorCode status = U_ZERO_ERROR;
1529         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1530         if (U_SUCCESS(status)) {
1531             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1532         } else {
1533             delete temp;
1534         }
1535     }
1536     return decimalFormatSymbols;
1537 }
1538 
1539 U_NAMESPACE_END
1540 
1541 /* U_HAVE_RBNF */
1542 #endif
1543