• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2010, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include <typeinfo>  // for 'typeid' to work
9 
10 #include "unicode/rbnf.h"
11 
12 #if U_HAVE_RBNF
13 
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
24 #include "nfrs.h"
25 
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "util.h"
29 #include "uresimp.h"
30 
31 // debugging
32 // #define DEBUG
33 
34 #ifdef DEBUG
35 #include "stdio.h"
36 #endif
37 
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
39 
40 static const UChar gPercentPercent[] =
41 {
42     0x25, 0x25, 0
43 }; /* "%%" */
44 
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse[] =
48 {
49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon = 0x003B;
52 static const UChar gSemiPercent[] =
53 {
54     0x3B, 0x25, 0
55 }; /* ";%" */
56 
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60 
61 // Temporary workaround - when noParse is true, do noting in parse.
62 // TODO: We need a real fix - see #6895/#6896
63 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
64 
65 U_NAMESPACE_BEGIN
66 
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68 
69 /*
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
75 */
76 class LocalizationInfo : public UMemory {
77 protected:
~LocalizationInfo()78     virtual ~LocalizationInfo() {};
79     uint32_t refcount;
80 
81 public:
LocalizationInfo()82     LocalizationInfo() : refcount(0) {}
83 
ref(void)84     LocalizationInfo* ref(void) {
85         ++refcount;
86         return this;
87     }
88 
unref(void)89     LocalizationInfo* unref(void) {
90         if (refcount && --refcount == 0) {
91             delete this;
92         }
93         return NULL;
94     }
95 
96     virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98 
99     virtual int32_t getNumberOfRuleSets(void) const = 0;
100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102     virtual const UChar* getLocaleName(int32_t index) const = 0;
103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104 
105     virtual int32_t indexForLocale(const UChar* locale) const;
106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107 
108 //    virtual UClassID getDynamicClassID() const = 0;
109 //    static UClassID getStaticClassID(void);
110 };
111 
112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
113 
114 // if both strings are NULL, this returns TRUE
115 static UBool
streq(const UChar * lhs,const UChar * rhs)116 streq(const UChar* lhs, const UChar* rhs) {
117     if (rhs == lhs) {
118         return TRUE;
119     }
120     if (lhs && rhs) {
121         return u_strcmp(lhs, rhs) == 0;
122     }
123     return FALSE;
124 }
125 
126 UBool
operator ==(const LocalizationInfo * rhs) const127 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
128     if (rhs) {
129         if (this == rhs) {
130             return TRUE;
131         }
132 
133         int32_t rsc = getNumberOfRuleSets();
134         if (rsc == rhs->getNumberOfRuleSets()) {
135             for (int i = 0; i < rsc; ++i) {
136                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
137                     return FALSE;
138                 }
139             }
140             int32_t dlc = getNumberOfDisplayLocales();
141             if (dlc == rhs->getNumberOfDisplayLocales()) {
142                 for (int i = 0; i < dlc; ++i) {
143                     const UChar* locale = getLocaleName(i);
144                     int32_t ix = rhs->indexForLocale(locale);
145                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
146                     if (!streq(locale, rhs->getLocaleName(ix))) {
147                         return FALSE;
148                     }
149                     for (int j = 0; j < rsc; ++j) {
150                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
151                             return FALSE;
152                         }
153                     }
154                 }
155                 return TRUE;
156             }
157         }
158     }
159     return FALSE;
160 }
161 
162 int32_t
indexForLocale(const UChar * locale) const163 LocalizationInfo::indexForLocale(const UChar* locale) const {
164     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
165         if (streq(locale, getLocaleName(i))) {
166             return i;
167         }
168     }
169     return -1;
170 }
171 
172 int32_t
indexForRuleSet(const UChar * ruleset) const173 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
174     if (ruleset) {
175         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
176             if (streq(ruleset, getRuleSetName(i))) {
177                 return i;
178             }
179         }
180     }
181     return -1;
182 }
183 
184 
185 typedef void (*Fn_Deleter)(void*);
186 
187 class VArray {
188     void** buf;
189     int32_t cap;
190     int32_t size;
191     Fn_Deleter deleter;
192 public:
VArray()193     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
194 
VArray(Fn_Deleter del)195     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
196 
~VArray()197     ~VArray() {
198         if (deleter) {
199             for (int i = 0; i < size; ++i) {
200                 (*deleter)(buf[i]);
201             }
202         }
203         uprv_free(buf);
204     }
205 
length()206     int32_t length() {
207         return size;
208     }
209 
add(void * elem,UErrorCode & status)210     void add(void* elem, UErrorCode& status) {
211         if (U_SUCCESS(status)) {
212             if (size == cap) {
213                 if (cap == 0) {
214                     cap = 1;
215                 } else if (cap < 256) {
216                     cap *= 2;
217                 } else {
218                     cap += 256;
219                 }
220                 if (buf == NULL) {
221                     buf = (void**)uprv_malloc(cap * sizeof(void*));
222                 } else {
223                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
224                 }
225                 if (buf == NULL) {
226                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
227                     status = U_MEMORY_ALLOCATION_ERROR;
228                     return;
229                 }
230                 void* start = &buf[size];
231                 size_t count = (cap - size) * sizeof(void*);
232                 uprv_memset(start, 0, count); // fill with nulls, just because
233             }
234             buf[size++] = elem;
235         }
236     }
237 
release(void)238     void** release(void) {
239         void** result = buf;
240         buf = NULL;
241         cap = 0;
242         size = 0;
243         return result;
244     }
245 };
246 
247 class LocDataParser;
248 
249 class StringLocalizationInfo : public LocalizationInfo {
250     UChar* info;
251     UChar*** data;
252     int32_t numRuleSets;
253     int32_t numLocales;
254 
255 friend class LocDataParser;
256 
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)257     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
258         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
259     {
260     }
261 
262 public:
263     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
264 
265     virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const266     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
267     virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const268     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
269     virtual const UChar* getLocaleName(int32_t index) const;
270     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
271 
272 //    virtual UClassID getDynamicClassID() const;
273 //    static UClassID getStaticClassID(void);
274 
275 private:
276     void init(UErrorCode& status) const;
277 };
278 
279 
280 enum {
281     OPEN_ANGLE = 0x003c, /* '<' */
282     CLOSE_ANGLE = 0x003e, /* '>' */
283     COMMA = 0x002c,
284     TICK = 0x0027,
285     QUOTE = 0x0022,
286     SPACE = 0x0020
287 };
288 
289 /**
290  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
291  */
292 class LocDataParser {
293     UChar* data;
294     const UChar* e;
295     UChar* p;
296     UChar ch;
297     UParseError& pe;
298     UErrorCode& ec;
299 
300 public:
LocDataParser(UParseError & parseError,UErrorCode & status)301     LocDataParser(UParseError& parseError, UErrorCode& status)
302         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()303     ~LocDataParser() {}
304 
305     /*
306     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
307     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
308     */
309     StringLocalizationInfo* parse(UChar* data, int32_t len);
310 
311 private:
312 
inc(void)313     void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)314     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)315     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)316     void skipWhitespace(void) { while (p < e && uprv_isRuleWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const317     UBool inList(UChar c, const UChar* list) const {
318         if (*list == SPACE && uprv_isRuleWhiteSpace(c)) return TRUE;
319         while (*list && *list != c) ++list; return *list == c;
320     }
321     void parseError(const char* msg);
322 
323     StringLocalizationInfo* doParse(void);
324 
325     UChar** nextArray(int32_t& requiredLength);
326     UChar*  nextString(void);
327 };
328 
329 #ifdef DEBUG
330 #define ERROR(msg) parseError(msg); return NULL;
331 #else
332 #define ERROR(msg) parseError(NULL); return NULL;
333 #endif
334 
335 
336 static const UChar DQUOTE_STOPLIST[] = {
337     QUOTE, 0
338 };
339 
340 static const UChar SQUOTE_STOPLIST[] = {
341     TICK, 0
342 };
343 
344 static const UChar NOQUOTE_STOPLIST[] = {
345     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
346 };
347 
348 static void
DeleteFn(void * p)349 DeleteFn(void* p) {
350   uprv_free(p);
351 }
352 
353 StringLocalizationInfo*
parse(UChar * _data,int32_t len)354 LocDataParser::parse(UChar* _data, int32_t len) {
355     if (U_FAILURE(ec)) {
356         if (_data) uprv_free(_data);
357         return NULL;
358     }
359 
360     pe.line = 0;
361     pe.offset = -1;
362     pe.postContext[0] = 0;
363     pe.preContext[0] = 0;
364 
365     if (_data == NULL) {
366         ec = U_ILLEGAL_ARGUMENT_ERROR;
367         return NULL;
368     }
369 
370     if (len <= 0) {
371         ec = U_ILLEGAL_ARGUMENT_ERROR;
372         uprv_free(_data);
373         return NULL;
374     }
375 
376     data = _data;
377     e = data + len;
378     p = _data;
379     ch = 0xffff;
380 
381     return doParse();
382 }
383 
384 
385 StringLocalizationInfo*
doParse(void)386 LocDataParser::doParse(void) {
387     skipWhitespace();
388     if (!checkInc(OPEN_ANGLE)) {
389         ERROR("Missing open angle");
390     } else {
391         VArray array(DeleteFn);
392         UBool mightHaveNext = TRUE;
393         int32_t requiredLength = -1;
394         while (mightHaveNext) {
395             mightHaveNext = FALSE;
396             UChar** elem = nextArray(requiredLength);
397             skipWhitespace();
398             UBool haveComma = check(COMMA);
399             if (elem) {
400                 array.add(elem, ec);
401                 if (haveComma) {
402                     inc();
403                     mightHaveNext = TRUE;
404                 }
405             } else if (haveComma) {
406                 ERROR("Unexpected character");
407             }
408         }
409 
410         skipWhitespace();
411         if (!checkInc(CLOSE_ANGLE)) {
412             if (check(OPEN_ANGLE)) {
413                 ERROR("Missing comma in outer array");
414             } else {
415                 ERROR("Missing close angle bracket in outer array");
416             }
417         }
418 
419         skipWhitespace();
420         if (p != e) {
421             ERROR("Extra text after close of localization data");
422         }
423 
424         array.add(NULL, ec);
425         if (U_SUCCESS(ec)) {
426             int32_t numLocs = array.length() - 2; // subtract first, NULL
427             UChar*** result = (UChar***)array.release();
428 
429             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
430         }
431     }
432 
433     ERROR("Unknown error");
434 }
435 
436 UChar**
nextArray(int32_t & requiredLength)437 LocDataParser::nextArray(int32_t& requiredLength) {
438     if (U_FAILURE(ec)) {
439         return NULL;
440     }
441 
442     skipWhitespace();
443     if (!checkInc(OPEN_ANGLE)) {
444         ERROR("Missing open angle");
445     }
446 
447     VArray array;
448     UBool mightHaveNext = TRUE;
449     while (mightHaveNext) {
450         mightHaveNext = FALSE;
451         UChar* elem = nextString();
452         skipWhitespace();
453         UBool haveComma = check(COMMA);
454         if (elem) {
455             array.add(elem, ec);
456             if (haveComma) {
457                 inc();
458                 mightHaveNext = TRUE;
459             }
460         } else if (haveComma) {
461             ERROR("Unexpected comma");
462         }
463     }
464     skipWhitespace();
465     if (!checkInc(CLOSE_ANGLE)) {
466         if (check(OPEN_ANGLE)) {
467             ERROR("Missing close angle bracket in inner array");
468         } else {
469             ERROR("Missing comma in inner array");
470         }
471     }
472 
473     array.add(NULL, ec);
474     if (U_SUCCESS(ec)) {
475         if (requiredLength == -1) {
476             requiredLength = array.length() + 1;
477         } else if (array.length() != requiredLength) {
478             ec = U_ILLEGAL_ARGUMENT_ERROR;
479             ERROR("Array not of required length");
480         }
481 
482         return (UChar**)array.release();
483     }
484     ERROR("Unknown Error");
485 }
486 
487 UChar*
nextString()488 LocDataParser::nextString() {
489     UChar* result = NULL;
490 
491     skipWhitespace();
492     if (p < e) {
493         const UChar* terminators;
494         UChar c = *p;
495         UBool haveQuote = c == QUOTE || c == TICK;
496         if (haveQuote) {
497             inc();
498             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
499         } else {
500             terminators = NOQUOTE_STOPLIST;
501         }
502         UChar* start = p;
503         while (p < e && !inList(*p, terminators)) ++p;
504         if (p == e) {
505             ERROR("Unexpected end of data");
506         }
507 
508         UChar x = *p;
509         if (p > start) {
510             ch = x;
511             *p = 0x0; // terminate by writing to data
512             result = start; // just point into data
513         }
514         if (haveQuote) {
515             if (x != c) {
516                 ERROR("Missing matching quote");
517             } else if (p == start) {
518                 ERROR("Empty string");
519             }
520             inc();
521         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
522             ERROR("Unexpected character in string");
523         }
524     }
525 
526     // ok for there to be no next string
527     return result;
528 }
529 
530 void
parseError(const char *)531 LocDataParser::parseError(const char* /*str*/) {
532     if (!data) {
533         return;
534     }
535 
536     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
537     if (start < data) {
538         start = data;
539     }
540     for (UChar* x = p; --x >= start;) {
541         if (!*x) {
542             start = x+1;
543             break;
544         }
545     }
546     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
547     if (limit > e) {
548         limit = e;
549     }
550     u_strncpy(pe.preContext, start, (int32_t)(p-start));
551     pe.preContext[p-start] = 0;
552     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
553     pe.postContext[limit-p] = 0;
554     pe.offset = (int32_t)(p - data);
555 
556 #ifdef DEBUG
557     fprintf(stderr, "%s at or near character %d: ", str, p-data);
558 
559     UnicodeString msg;
560     msg.append(start, p - start);
561     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
562     msg.append(p, limit-p);
563     msg.append("'");
564 
565     char buf[128];
566     int32_t len = msg.extract(0, msg.length(), buf, 128);
567     if (len >= 128) {
568         buf[127] = 0;
569     } else {
570         buf[len] = 0;
571     }
572     fprintf(stderr, "%s\n", buf);
573     fflush(stderr);
574 #endif
575 
576     uprv_free(data);
577     data = NULL;
578     p = NULL;
579     e = NULL;
580 
581     if (U_SUCCESS(ec)) {
582         ec = U_PARSE_ERROR;
583     }
584 }
585 
586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
587 
588 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)589 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
590     if (U_FAILURE(status)) {
591         return NULL;
592     }
593 
594     int32_t len = info.length();
595     if (len == 0) {
596         return NULL; // no error;
597     }
598 
599     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
600     if (!p) {
601         status = U_MEMORY_ALLOCATION_ERROR;
602         return NULL;
603     }
604     info.extract(p, len, status);
605     if (!U_FAILURE(status)) {
606         status = U_ZERO_ERROR; // clear warning about non-termination
607     }
608 
609     LocDataParser parser(perror, status);
610     return parser.parse(p, len);
611 }
612 
~StringLocalizationInfo()613 StringLocalizationInfo::~StringLocalizationInfo() {
614     for (UChar*** p = (UChar***)data; *p; ++p) {
615         // remaining data is simply pointer into our unicode string data.
616         if (*p) uprv_free(*p);
617     }
618     if (data) uprv_free(data);
619     if (info) uprv_free(info);
620 }
621 
622 
623 const UChar*
getRuleSetName(int32_t index) const624 StringLocalizationInfo::getRuleSetName(int32_t index) const {
625     if (index >= 0 && index < getNumberOfRuleSets()) {
626         return data[0][index];
627     }
628     return NULL;
629 }
630 
631 const UChar*
getLocaleName(int32_t index) const632 StringLocalizationInfo::getLocaleName(int32_t index) const {
633     if (index >= 0 && index < getNumberOfDisplayLocales()) {
634         return data[index+1][0];
635     }
636     return NULL;
637 }
638 
639 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const640 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
641     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
642         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
643         return data[localeIndex+1][ruleIndex+1];
644     }
645     return NULL;
646 }
647 
648 // ----------
649 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
651                                              const UnicodeString& locs,
652                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
653   : ruleSets(NULL)
654   , defaultRuleSet(NULL)
655   , locale(alocale)
656   , collator(NULL)
657   , decimalFormatSymbols(NULL)
658   , lenient(FALSE)
659   , lenientParseRules(NULL)
660   , localizations(NULL)
661   , noParse(FALSE) //TODO: to be removed after #6895
662 {
663   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
664   init(description, locinfo, perror, status);
665 }
666 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
668                                              const UnicodeString& locs,
669                                              UParseError& perror, UErrorCode& status)
670   : ruleSets(NULL)
671   , defaultRuleSet(NULL)
672   , locale(Locale::getDefault())
673   , collator(NULL)
674   , decimalFormatSymbols(NULL)
675   , lenient(FALSE)
676   , lenientParseRules(NULL)
677   , localizations(NULL)
678   , noParse(FALSE) //TODO: to be removed after #6895
679 {
680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681   init(description, locinfo, perror, status);
682 }
683 
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685                                              LocalizationInfo* info,
686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
687   : ruleSets(NULL)
688   , defaultRuleSet(NULL)
689   , locale(alocale)
690   , collator(NULL)
691   , decimalFormatSymbols(NULL)
692   , lenient(FALSE)
693   , lenientParseRules(NULL)
694   , localizations(NULL)
695   , noParse(FALSE) //TODO: to be removed after #6895
696 {
697   init(description, info, perror, status);
698 }
699 
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
701                          UParseError& perror,
702                          UErrorCode& status)
703   : ruleSets(NULL)
704   , defaultRuleSet(NULL)
705   , locale(Locale::getDefault())
706   , collator(NULL)
707   , decimalFormatSymbols(NULL)
708   , lenient(FALSE)
709   , lenientParseRules(NULL)
710   , localizations(NULL)
711   , noParse(FALSE) //TODO: to be removed after #6895
712 {
713     init(description, NULL, perror, status);
714 }
715 
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
717                          const Locale& aLocale,
718                          UParseError& perror,
719                          UErrorCode& status)
720   : ruleSets(NULL)
721   , defaultRuleSet(NULL)
722   , locale(aLocale)
723   , collator(NULL)
724   , decimalFormatSymbols(NULL)
725   , lenient(FALSE)
726   , lenientParseRules(NULL)
727   , localizations(NULL)
728   , noParse(FALSE) //TODO: to be removed after #6895
729 {
730     init(description, NULL, perror, status);
731 }
732 
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
734   : ruleSets(NULL)
735   , defaultRuleSet(NULL)
736   , locale(alocale)
737   , collator(NULL)
738   , decimalFormatSymbols(NULL)
739   , lenient(FALSE)
740   , lenientParseRules(NULL)
741   , localizations(NULL)
742 {
743     if (U_FAILURE(status)) {
744         return;
745     }
746 
747     const char* rules_tag = "RBNFRules";
748     const char* fmt_tag = "";
749     switch (tag) {
750     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
751     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
752     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
753     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
754     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
755     }
756 
757     // TODO: read localization info from resource
758     LocalizationInfo* locinfo = NULL;
759 
760     int32_t len = 0;
761     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
762     if (U_SUCCESS(status)) {
763         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
764                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
765 
766         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
767         if (U_FAILURE(status)) {
768             ures_close(nfrb);
769         }
770         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
771         if (U_FAILURE(status)) {
772             ures_close(rbnfRules);
773             ures_close(nfrb);
774             return;
775         }
776 
777         UnicodeString desc;
778         while (ures_hasNext(ruleSets)) {
779            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
780            desc.append(currentString);
781         }
782         UParseError perror;
783 
784 
785         init (desc, locinfo, perror, status);
786 
787         //TODO: we need a real fix - see #6895 / #6896
788         noParse = FALSE;
789         if (tag == URBNF_SPELLOUT) {
790             const char *lang = alocale.getLanguage();
791             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
792                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
793                     noParse = TRUE;
794                     break;
795                 }
796             }
797         }
798         //TODO: end
799 
800         ures_close(ruleSets);
801         ures_close(rbnfRules);
802     }
803     ures_close(nfrb);
804 }
805 
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
807   : NumberFormat(rhs)
808   , ruleSets(NULL)
809   , defaultRuleSet(NULL)
810   , locale(rhs.locale)
811   , collator(NULL)
812   , decimalFormatSymbols(NULL)
813   , lenient(FALSE)
814   , lenientParseRules(NULL)
815   , localizations(NULL)
816 {
817     this->operator=(rhs);
818 }
819 
820 // --------
821 
822 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
824 {
825     UErrorCode status = U_ZERO_ERROR;
826     dispose();
827     locale = rhs.locale;
828     lenient = rhs.lenient;
829 
830     UnicodeString rules = rhs.getRules();
831     UParseError perror;
832     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
833 
834     //TODO: remove below when we fix the parse bug - See #6895 / #6896
835     noParse = rhs.noParse;
836 
837     return *this;
838 }
839 
~RuleBasedNumberFormat()840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
841 {
842     dispose();
843 }
844 
845 Format*
clone(void) const846 RuleBasedNumberFormat::clone(void) const
847 {
848     RuleBasedNumberFormat * result = NULL;
849     UnicodeString rules = getRules();
850     UErrorCode status = U_ZERO_ERROR;
851     UParseError perror;
852     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
853     /* test for NULL */
854     if (result == 0) {
855         status = U_MEMORY_ALLOCATION_ERROR;
856         return 0;
857     }
858     if (U_FAILURE(status)) {
859         delete result;
860         result = 0;
861     } else {
862         result->lenient = lenient;
863 
864         //TODO: remove below when we fix the parse bug - See #6895 / #6896
865         result->noParse = noParse;
866     }
867     return result;
868 }
869 
870 UBool
operator ==(const Format & other) const871 RuleBasedNumberFormat::operator==(const Format& other) const
872 {
873     if (this == &other) {
874         return TRUE;
875     }
876 
877     if (typeid(*this) == typeid(other)) {
878         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
879         if (locale == rhs.locale &&
880             lenient == rhs.lenient &&
881             (localizations == NULL
882                 ? rhs.localizations == NULL
883                 : (rhs.localizations == NULL
884                     ? FALSE
885                     : *localizations == rhs.localizations))) {
886 
887             NFRuleSet** p = ruleSets;
888             NFRuleSet** q = rhs.ruleSets;
889             if (p == NULL) {
890                 return q == NULL;
891             } else if (q == NULL) {
892                 return FALSE;
893             }
894             while (*p && *q && (**p == **q)) {
895                 ++p;
896                 ++q;
897             }
898             return *q == NULL && *p == NULL;
899         }
900     }
901 
902     return FALSE;
903 }
904 
905 UnicodeString
getRules() const906 RuleBasedNumberFormat::getRules() const
907 {
908     UnicodeString result;
909     if (ruleSets != NULL) {
910         for (NFRuleSet** p = ruleSets; *p; ++p) {
911             (*p)->appendRules(result);
912         }
913     }
914     return result;
915 }
916 
917 UnicodeString
getRuleSetName(int32_t index) const918 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
919 {
920     if (localizations) {
921       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
922       return string;
923     } else if (ruleSets) {
924         UnicodeString result;
925         for (NFRuleSet** p = ruleSets; *p; ++p) {
926             NFRuleSet* rs = *p;
927             if (rs->isPublic()) {
928                 if (--index == -1) {
929                     rs->getName(result);
930                     return result;
931                 }
932             }
933         }
934     }
935     UnicodeString empty;
936     return empty;
937 }
938 
939 int32_t
getNumberOfRuleSetNames() const940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
941 {
942     int32_t result = 0;
943     if (localizations) {
944       result = localizations->getNumberOfRuleSets();
945     } else if (ruleSets) {
946         for (NFRuleSet** p = ruleSets; *p; ++p) {
947             if ((**p).isPublic()) {
948                 ++result;
949             }
950         }
951     }
952     return result;
953 }
954 
955 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
957     if (localizations) {
958         return localizations->getNumberOfDisplayLocales();
959     }
960     return 0;
961 }
962 
963 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
965     if (U_FAILURE(status)) {
966         return Locale("");
967     }
968     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
969         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
970         char buffer[64];
971         int32_t cap = name.length() + 1;
972         char* bp = buffer;
973         if (cap > 64) {
974             bp = (char *)uprv_malloc(cap);
975             if (bp == NULL) {
976                 status = U_MEMORY_ALLOCATION_ERROR;
977                 return Locale("");
978             }
979         }
980         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
981         Locale retLocale(bp);
982         if (bp != buffer) {
983             uprv_free(bp);
984         }
985         return retLocale;
986     }
987     status = U_ILLEGAL_ARGUMENT_ERROR;
988     Locale retLocale;
989     return retLocale;
990 }
991 
992 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
994     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
995         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
996         int32_t len = localeName.length();
997         UChar* localeStr = localeName.getBuffer(len + 1);
998         while (len >= 0) {
999             localeStr[len] = 0;
1000             int32_t ix = localizations->indexForLocale(localeStr);
1001             if (ix >= 0) {
1002                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1003                 return name;
1004             }
1005 
1006             // trim trailing portion, skipping over ommitted sections
1007             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1008             while (len > 0 && localeStr[len-1] == 0x005F) --len;
1009         }
1010         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1011         return name;
1012     }
1013     UnicodeString bogus;
1014     bogus.setToBogus();
1015     return bogus;
1016 }
1017 
1018 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1020     if (localizations) {
1021         UnicodeString rsn(ruleSetName);
1022         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1023         return getRuleSetDisplayName(ix, localeParam);
1024     }
1025     UnicodeString bogus;
1026     bogus.setToBogus();
1027     return bogus;
1028 }
1029 
1030 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1032 {
1033     if (U_SUCCESS(status) && ruleSets) {
1034         for (NFRuleSet** p = ruleSets; *p; ++p) {
1035             NFRuleSet* rs = *p;
1036             if (rs->isNamed(name)) {
1037                 return rs;
1038             }
1039         }
1040         status = U_ILLEGAL_ARGUMENT_ERROR;
1041     }
1042     return NULL;
1043 }
1044 
1045 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1046 RuleBasedNumberFormat::format(int32_t number,
1047                               UnicodeString& toAppendTo,
1048                               FieldPosition& /* pos */) const
1049 {
1050     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1051     return toAppendTo;
1052 }
1053 
1054 
1055 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1056 RuleBasedNumberFormat::format(int64_t number,
1057                               UnicodeString& toAppendTo,
1058                               FieldPosition& /* pos */) const
1059 {
1060     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1061     return toAppendTo;
1062 }
1063 
1064 
1065 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1066 RuleBasedNumberFormat::format(double number,
1067                               UnicodeString& toAppendTo,
1068                               FieldPosition& /* pos */) const
1069 {
1070     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1071     return toAppendTo;
1072 }
1073 
1074 
1075 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1076 RuleBasedNumberFormat::format(int32_t number,
1077                               const UnicodeString& ruleSetName,
1078                               UnicodeString& toAppendTo,
1079                               FieldPosition& /* pos */,
1080                               UErrorCode& status) const
1081 {
1082     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1083     if (U_SUCCESS(status)) {
1084         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1085             // throw new IllegalArgumentException("Can't use internal rule set");
1086             status = U_ILLEGAL_ARGUMENT_ERROR;
1087         } else {
1088             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1089             if (rs) {
1090                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1091             }
1092         }
1093     }
1094     return toAppendTo;
1095 }
1096 
1097 
1098 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1099 RuleBasedNumberFormat::format(int64_t number,
1100                               const UnicodeString& ruleSetName,
1101                               UnicodeString& toAppendTo,
1102                               FieldPosition& /* pos */,
1103                               UErrorCode& status) const
1104 {
1105     if (U_SUCCESS(status)) {
1106         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1107             // throw new IllegalArgumentException("Can't use internal rule set");
1108             status = U_ILLEGAL_ARGUMENT_ERROR;
1109         } else {
1110             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1111             if (rs) {
1112                 rs->format(number, toAppendTo, toAppendTo.length());
1113             }
1114         }
1115     }
1116     return toAppendTo;
1117 }
1118 
1119 
1120 // make linker happy
1121 UnicodeString&
format(const Formattable & obj,UnicodeString & toAppendTo,FieldPosition & pos,UErrorCode & status) const1122 RuleBasedNumberFormat::format(const Formattable& obj,
1123                               UnicodeString& toAppendTo,
1124                               FieldPosition& pos,
1125                               UErrorCode& status) const
1126 {
1127     return NumberFormat::format(obj, toAppendTo, pos, status);
1128 }
1129 
1130 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1131 RuleBasedNumberFormat::format(double number,
1132                               const UnicodeString& ruleSetName,
1133                               UnicodeString& toAppendTo,
1134                               FieldPosition& /* pos */,
1135                               UErrorCode& status) const
1136 {
1137     if (U_SUCCESS(status)) {
1138         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1139             // throw new IllegalArgumentException("Can't use internal rule set");
1140             status = U_ILLEGAL_ARGUMENT_ERROR;
1141         } else {
1142             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1143             if (rs) {
1144                 rs->format(number, toAppendTo, toAppendTo.length());
1145             }
1146         }
1147     }
1148     return toAppendTo;
1149 }
1150 
1151 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1152 RuleBasedNumberFormat::parse(const UnicodeString& text,
1153                              Formattable& result,
1154                              ParsePosition& parsePosition) const
1155 {
1156     //TODO: We need a real fix.  See #6895 / #6896
1157     if (noParse) {
1158         // skip parsing
1159         parsePosition.setErrorIndex(0);
1160         return;
1161     }
1162 
1163     if (!ruleSets) {
1164         parsePosition.setErrorIndex(0);
1165         return;
1166     }
1167 
1168     UnicodeString workingText(text, parsePosition.getIndex());
1169     ParsePosition workingPos(0);
1170 
1171     ParsePosition high_pp(0);
1172     Formattable high_result;
1173 
1174     for (NFRuleSet** p = ruleSets; *p; ++p) {
1175         NFRuleSet *rp = *p;
1176         if (rp->isPublic() && rp->isParseable()) {
1177             ParsePosition working_pp(0);
1178             Formattable working_result;
1179 
1180             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1181             if (working_pp.getIndex() > high_pp.getIndex()) {
1182                 high_pp = working_pp;
1183                 high_result = working_result;
1184 
1185                 if (high_pp.getIndex() == workingText.length()) {
1186                     break;
1187                 }
1188             }
1189         }
1190     }
1191 
1192     int32_t startIndex = parsePosition.getIndex();
1193     parsePosition.setIndex(startIndex + high_pp.getIndex());
1194     if (high_pp.getIndex() > 0) {
1195         parsePosition.setErrorIndex(-1);
1196     } else {
1197         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1198         parsePosition.setErrorIndex(startIndex + errorIndex);
1199     }
1200     result = high_result;
1201     if (result.getType() == Formattable::kDouble) {
1202         int32_t r = (int32_t)result.getDouble();
1203         if ((double)r == result.getDouble()) {
1204             result.setLong(r);
1205         }
1206     }
1207 }
1208 
1209 #if !UCONFIG_NO_COLLATION
1210 
1211 void
setLenient(UBool enabled)1212 RuleBasedNumberFormat::setLenient(UBool enabled)
1213 {
1214     lenient = enabled;
1215     if (!enabled && collator) {
1216         delete collator;
1217         collator = NULL;
1218     }
1219 }
1220 
1221 #endif
1222 
1223 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1224 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1225     if (U_SUCCESS(status)) {
1226         if (ruleSetName.isEmpty()) {
1227           if (localizations) {
1228               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1229               defaultRuleSet = findRuleSet(name, status);
1230           } else {
1231             initDefaultRuleSet();
1232           }
1233         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1234             status = U_ILLEGAL_ARGUMENT_ERROR;
1235         } else {
1236             NFRuleSet* result = findRuleSet(ruleSetName, status);
1237             if (result != NULL) {
1238                 defaultRuleSet = result;
1239             }
1240         }
1241     }
1242 }
1243 
1244 UnicodeString
getDefaultRuleSetName() const1245 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1246   UnicodeString result;
1247   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1248     defaultRuleSet->getName(result);
1249   } else {
1250     result.setToBogus();
1251   }
1252   return result;
1253 }
1254 
1255 void
initDefaultRuleSet()1256 RuleBasedNumberFormat::initDefaultRuleSet()
1257 {
1258     defaultRuleSet = NULL;
1259     if (!ruleSets) {
1260       return;
1261     }
1262 
1263     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1264     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1265     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1266 
1267     NFRuleSet**p = &ruleSets[0];
1268     while (*p) {
1269         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1270             defaultRuleSet = *p;
1271             return;
1272         } else {
1273             ++p;
1274         }
1275     }
1276 
1277     defaultRuleSet = *--p;
1278     if (!defaultRuleSet->isPublic()) {
1279         while (p != ruleSets) {
1280             if ((*--p)->isPublic()) {
1281                 defaultRuleSet = *p;
1282                 break;
1283             }
1284         }
1285     }
1286 }
1287 
1288 
1289 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1290 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1291                             UParseError& pErr, UErrorCode& status)
1292 {
1293     // TODO: implement UParseError
1294     uprv_memset(&pErr, 0, sizeof(UParseError));
1295     // Note: this can leave ruleSets == NULL, so remaining code should check
1296     if (U_FAILURE(status)) {
1297         return;
1298     }
1299 
1300     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1301 
1302     UnicodeString description(rules);
1303     if (!description.length()) {
1304         status = U_MEMORY_ALLOCATION_ERROR;
1305         return;
1306     }
1307 
1308     // start by stripping the trailing whitespace from all the rules
1309     // (this is all the whitespace follwing each semicolon in the
1310     // description).  This allows us to look for rule-set boundaries
1311     // by searching for ";%" without having to worry about whitespace
1312     // between the ; and the %
1313     stripWhitespace(description);
1314 
1315     // check to see if there's a set of lenient-parse rules.  If there
1316     // is, pull them out into our temporary holding place for them,
1317     // and delete them from the description before the real desciption-
1318     // parsing code sees them
1319     int32_t lp = description.indexOf(gLenientParse);
1320     if (lp != -1) {
1321         // we've got to make sure we're not in the middle of a rule
1322         // (where "%%lenient-parse" would actually get treated as
1323         // rule text)
1324         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1325             // locate the beginning and end of the actual collation
1326             // rules (there may be whitespace between the name and
1327             // the first token in the description)
1328             int lpEnd = description.indexOf(gSemiPercent, lp);
1329 
1330             if (lpEnd == -1) {
1331                 lpEnd = description.length() - 1;
1332             }
1333             int lpStart = lp + u_strlen(gLenientParse);
1334             while (uprv_isRuleWhiteSpace(description.charAt(lpStart))) {
1335                 ++lpStart;
1336             }
1337 
1338             // copy out the lenient-parse rules and delete them
1339             // from the description
1340             lenientParseRules = new UnicodeString();
1341             /* test for NULL */
1342             if (lenientParseRules == 0) {
1343                 status = U_MEMORY_ALLOCATION_ERROR;
1344                 return;
1345             }
1346             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1347 
1348             description.remove(lp, lpEnd + 1 - lp);
1349         }
1350     }
1351 
1352     // pre-flight parsing the description and count the number of
1353     // rule sets (";%" marks the end of one rule set and the beginning
1354     // of the next)
1355     int numRuleSets = 0;
1356     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1357         ++numRuleSets;
1358         ++p;
1359     }
1360     ++numRuleSets;
1361 
1362     // our rule list is an array of the appropriate size
1363     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1364     /* test for NULL */
1365     if (ruleSets == 0) {
1366         status = U_MEMORY_ALLOCATION_ERROR;
1367         return;
1368     }
1369 
1370     for (int i = 0; i <= numRuleSets; ++i) {
1371         ruleSets[i] = NULL;
1372     }
1373 
1374     // divide up the descriptions into individual rule-set descriptions
1375     // and store them in a temporary array.  At each step, we also
1376     // new up a rule set, but all this does is initialize its name
1377     // and remove it from its description.  We can't actually parse
1378     // the rest of the descriptions and finish initializing everything
1379     // because we have to know the names and locations of all the rule
1380     // sets before we can actually set everything up
1381     if(!numRuleSets) {
1382         status = U_ILLEGAL_ARGUMENT_ERROR;
1383         return;
1384     }
1385     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1386     if (ruleSetDescriptions == 0) {
1387         status = U_MEMORY_ALLOCATION_ERROR;
1388         return;
1389     }
1390 
1391     {
1392         int curRuleSet = 0;
1393         int32_t start = 0;
1394         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1395             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1396             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1397             if (ruleSets[curRuleSet] == 0) {
1398                 status = U_MEMORY_ALLOCATION_ERROR;
1399                 goto cleanup;
1400             }
1401             ++curRuleSet;
1402             start = p + 1;
1403         }
1404         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1405         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1406         if (ruleSets[curRuleSet] == 0) {
1407             status = U_MEMORY_ALLOCATION_ERROR;
1408             goto cleanup;
1409         }
1410     }
1411 
1412     // now we can take note of the formatter's default rule set, which
1413     // is the last public rule set in the description (it's the last
1414     // rather than the first so that a user can create a new formatter
1415     // from an existing formatter and change its default behavior just
1416     // by appending more rule sets to the end)
1417 
1418     // {dlf} Initialization of a fraction rule set requires the default rule
1419     // set to be known.  For purposes of initialization, this is always the
1420     // last public rule set, no matter what the localization data says.
1421     initDefaultRuleSet();
1422 
1423     // finally, we can go back through the temporary descriptions
1424     // list and finish seting up the substructure (and we throw
1425     // away the temporary descriptions as we go)
1426     {
1427         for (int i = 0; i < numRuleSets; i++) {
1428             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1429         }
1430     }
1431 
1432     // Now that the rules are initialized, the 'real' default rule
1433     // set can be adjusted by the localization data.
1434 
1435     // The C code keeps the localization array as is, rather than building
1436     // a separate array of the public rule set names, so we have less work
1437     // to do here-- but we still need to check the names.
1438 
1439     if (localizationInfos) {
1440         // confirm the names, if any aren't in the rules, that's an error
1441         // it is ok if the rules contain public rule sets that are not in this list
1442         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1443             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1444             NFRuleSet* rs = findRuleSet(name, status);
1445             if (rs == NULL) {
1446                 break; // error
1447             }
1448             if (i == 0) {
1449                 defaultRuleSet = rs;
1450             }
1451         }
1452     } else {
1453         defaultRuleSet = getDefaultRuleSet();
1454     }
1455 
1456 cleanup:
1457     delete[] ruleSetDescriptions;
1458 }
1459 
1460 void
stripWhitespace(UnicodeString & description)1461 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1462 {
1463     // iterate through the characters...
1464     UnicodeString result;
1465 
1466     int start = 0;
1467     while (start != -1 && start < description.length()) {
1468         // seek to the first non-whitespace character...
1469         while (start < description.length()
1470             && uprv_isRuleWhiteSpace(description.charAt(start))) {
1471             ++start;
1472         }
1473 
1474         // locate the next semicolon in the text and copy the text from
1475         // our current position up to that semicolon into the result
1476         int32_t p = description.indexOf(gSemiColon, start);
1477         if (p == -1) {
1478             // or if we don't find a semicolon, just copy the rest of
1479             // the string into the result
1480             result.append(description, start, description.length() - start);
1481             start = -1;
1482         }
1483         else if (p < description.length()) {
1484             result.append(description, start, p + 1 - start);
1485             start = p + 1;
1486         }
1487 
1488         // when we get here, we've seeked off the end of the sring, and
1489         // we terminate the loop (we continue until *start* is -1 rather
1490         // than until *p* is -1, because otherwise we'd miss the last
1491         // rule in the description)
1492         else {
1493             start = -1;
1494         }
1495     }
1496 
1497     description.setTo(result);
1498 }
1499 
1500 
1501 void
dispose()1502 RuleBasedNumberFormat::dispose()
1503 {
1504     if (ruleSets) {
1505         for (NFRuleSet** p = ruleSets; *p; ++p) {
1506             delete *p;
1507         }
1508         uprv_free(ruleSets);
1509         ruleSets = NULL;
1510     }
1511 
1512 #if !UCONFIG_NO_COLLATION
1513     delete collator;
1514 #endif
1515     collator = NULL;
1516 
1517     delete decimalFormatSymbols;
1518     decimalFormatSymbols = NULL;
1519 
1520     delete lenientParseRules;
1521     lenientParseRules = NULL;
1522 
1523     if (localizations) localizations = localizations->unref();
1524 }
1525 
1526 
1527 //-----------------------------------------------------------------------
1528 // package-internal API
1529 //-----------------------------------------------------------------------
1530 
1531 /**
1532  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1533  * this function creates it the first time it's called.
1534  * @return The collator to use for lenient parsing, or null if lenient parsing
1535  * is turned off.
1536 */
1537 Collator*
getCollator() const1538 RuleBasedNumberFormat::getCollator() const
1539 {
1540 #if !UCONFIG_NO_COLLATION
1541     if (!ruleSets) {
1542         return NULL;
1543     }
1544 
1545     // lazy-evaulate the collator
1546     if (collator == NULL && lenient) {
1547         // create a default collator based on the formatter's locale,
1548         // then pull out that collator's rules, append any additional
1549         // rules specified in the description, and create a _new_
1550         // collator based on the combinaiton of those rules
1551 
1552         UErrorCode status = U_ZERO_ERROR;
1553 
1554         Collator* temp = Collator::createInstance(locale, status);
1555         RuleBasedCollator* newCollator;
1556         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1557             if (lenientParseRules) {
1558                 UnicodeString rules(newCollator->getRules());
1559                 rules.append(*lenientParseRules);
1560 
1561                 newCollator = new RuleBasedCollator(rules, status);
1562                 // Exit if newCollator could not be created.
1563                 if (newCollator == NULL) {
1564                 	return NULL;
1565                 }
1566             } else {
1567                 temp = NULL;
1568             }
1569             if (U_SUCCESS(status)) {
1570                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1571                 // cast away const
1572                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1573             } else {
1574                 delete newCollator;
1575             }
1576         }
1577         delete temp;
1578     }
1579 #endif
1580 
1581     // if lenient-parse mode is off, this will be null
1582     // (see setLenientParseMode())
1583     return collator;
1584 }
1585 
1586 
1587 /**
1588  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1589  * instances owned by this formatter.  This object is lazily created: this function
1590  * creates it the first time it's called.
1591  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1592  * instances owned by this formatter.
1593 */
1594 DecimalFormatSymbols*
getDecimalFormatSymbols() const1595 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1596 {
1597     // lazy-evaluate the DecimalFormatSymbols object.  This object
1598     // is shared by all DecimalFormat instances belonging to this
1599     // formatter
1600     if (decimalFormatSymbols == NULL) {
1601         UErrorCode status = U_ZERO_ERROR;
1602         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1603         if (U_SUCCESS(status)) {
1604             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1605         } else {
1606             delete temp;
1607         }
1608     }
1609     return decimalFormatSymbols;
1610 }
1611 
1612 U_NAMESPACE_END
1613 
1614 /* U_HAVE_RBNF */
1615 #endif
1616