• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 * Copyright (C) 1997-2011, International Business Machines Corporation
4 * and others. All Rights Reserved.
5 *******************************************************************************
6 */
7 
8 #include <typeinfo>  // for 'typeid' to work
9 
10 #include "unicode/rbnf.h"
11 
12 #if U_HAVE_RBNF
13 
14 #include "unicode/normlzr.h"
15 #include "unicode/tblcoll.h"
16 #include "unicode/uchar.h"
17 #include "unicode/ucol.h"
18 #include "unicode/uloc.h"
19 #include "unicode/unum.h"
20 #include "unicode/ures.h"
21 #include "unicode/ustring.h"
22 #include "unicode/utf16.h"
23 #include "unicode/udata.h"
24 #include "nfrs.h"
25 
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "patternprops.h"
29 #include "uresimp.h"
30 
31 // debugging
32 // #define DEBUG
33 
34 #ifdef DEBUG
35 #include "stdio.h"
36 #endif
37 
38 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
39 
40 static const UChar gPercentPercent[] =
41 {
42     0x25, 0x25, 0
43 }; /* "%%" */
44 
45 // All urbnf objects are created through openRules, so we init all of the
46 // Unicode string constants required by rbnf, nfrs, or nfr here.
47 static const UChar gLenientParse[] =
48 {
49     0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
50 }; /* "%%lenient-parse:" */
51 static const UChar gSemiColon = 0x003B;
52 static const UChar gSemiPercent[] =
53 {
54     0x3B, 0x25, 0
55 }; /* ";%" */
56 
57 #define kSomeNumberOfBitsDiv2 22
58 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
59 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
60 
61 // Temporary workaround - when noParse is true, do noting in parse.
62 // TODO: We need a real fix - see #6895/#6896
63 static const char *NO_SPELLOUT_PARSE_LANGUAGES[] = { "ga", NULL };
64 
65 U_NAMESPACE_BEGIN
66 
67 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
68 
69 /*
70 This is a utility class. It does not use ICU's RTTI.
71 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
72 Please make sure that intltest passes on Windows in Release mode,
73 since the string pooling per compilation unit will mess up how RTTI works.
74 The RTTI code was also removed due to lack of code coverage.
75 */
76 class LocalizationInfo : public UMemory {
77 protected:
~LocalizationInfo()78     virtual ~LocalizationInfo() {}
79     uint32_t refcount;
80 
81 public:
LocalizationInfo()82     LocalizationInfo() : refcount(0) {}
83 
ref(void)84     LocalizationInfo* ref(void) {
85         ++refcount;
86         return this;
87     }
88 
unref(void)89     LocalizationInfo* unref(void) {
90         if (refcount && --refcount == 0) {
91             delete this;
92         }
93         return NULL;
94     }
95 
96     virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const97     inline  UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
98 
99     virtual int32_t getNumberOfRuleSets(void) const = 0;
100     virtual const UChar* getRuleSetName(int32_t index) const = 0;
101     virtual int32_t getNumberOfDisplayLocales(void) const = 0;
102     virtual const UChar* getLocaleName(int32_t index) const = 0;
103     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
104 
105     virtual int32_t indexForLocale(const UChar* locale) const;
106     virtual int32_t indexForRuleSet(const UChar* ruleset) const;
107 
108 //    virtual UClassID getDynamicClassID() const = 0;
109 //    static UClassID getStaticClassID(void);
110 };
111 
112 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
113 
114 // if both strings are NULL, this returns TRUE
115 static UBool
streq(const UChar * lhs,const UChar * rhs)116 streq(const UChar* lhs, const UChar* rhs) {
117     if (rhs == lhs) {
118         return TRUE;
119     }
120     if (lhs && rhs) {
121         return u_strcmp(lhs, rhs) == 0;
122     }
123     return FALSE;
124 }
125 
126 UBool
operator ==(const LocalizationInfo * rhs) const127 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
128     if (rhs) {
129         if (this == rhs) {
130             return TRUE;
131         }
132 
133         int32_t rsc = getNumberOfRuleSets();
134         if (rsc == rhs->getNumberOfRuleSets()) {
135             for (int i = 0; i < rsc; ++i) {
136                 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
137                     return FALSE;
138                 }
139             }
140             int32_t dlc = getNumberOfDisplayLocales();
141             if (dlc == rhs->getNumberOfDisplayLocales()) {
142                 for (int i = 0; i < dlc; ++i) {
143                     const UChar* locale = getLocaleName(i);
144                     int32_t ix = rhs->indexForLocale(locale);
145                     // if no locale, ix is -1, getLocaleName returns null, so streq returns false
146                     if (!streq(locale, rhs->getLocaleName(ix))) {
147                         return FALSE;
148                     }
149                     for (int j = 0; j < rsc; ++j) {
150                         if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
151                             return FALSE;
152                         }
153                     }
154                 }
155                 return TRUE;
156             }
157         }
158     }
159     return FALSE;
160 }
161 
162 int32_t
indexForLocale(const UChar * locale) const163 LocalizationInfo::indexForLocale(const UChar* locale) const {
164     for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
165         if (streq(locale, getLocaleName(i))) {
166             return i;
167         }
168     }
169     return -1;
170 }
171 
172 int32_t
indexForRuleSet(const UChar * ruleset) const173 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
174     if (ruleset) {
175         for (int i = 0; i < getNumberOfRuleSets(); ++i) {
176             if (streq(ruleset, getRuleSetName(i))) {
177                 return i;
178             }
179         }
180     }
181     return -1;
182 }
183 
184 
185 typedef void (*Fn_Deleter)(void*);
186 
187 class VArray {
188     void** buf;
189     int32_t cap;
190     int32_t size;
191     Fn_Deleter deleter;
192 public:
VArray()193     VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
194 
VArray(Fn_Deleter del)195     VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
196 
~VArray()197     ~VArray() {
198         if (deleter) {
199             for (int i = 0; i < size; ++i) {
200                 (*deleter)(buf[i]);
201             }
202         }
203         uprv_free(buf);
204     }
205 
length()206     int32_t length() {
207         return size;
208     }
209 
add(void * elem,UErrorCode & status)210     void add(void* elem, UErrorCode& status) {
211         if (U_SUCCESS(status)) {
212             if (size == cap) {
213                 if (cap == 0) {
214                     cap = 1;
215                 } else if (cap < 256) {
216                     cap *= 2;
217                 } else {
218                     cap += 256;
219                 }
220                 if (buf == NULL) {
221                     buf = (void**)uprv_malloc(cap * sizeof(void*));
222                 } else {
223                     buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
224                 }
225                 if (buf == NULL) {
226                     // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
227                     status = U_MEMORY_ALLOCATION_ERROR;
228                     return;
229                 }
230                 void* start = &buf[size];
231                 size_t count = (cap - size) * sizeof(void*);
232                 uprv_memset(start, 0, count); // fill with nulls, just because
233             }
234             buf[size++] = elem;
235         }
236     }
237 
release(void)238     void** release(void) {
239         void** result = buf;
240         buf = NULL;
241         cap = 0;
242         size = 0;
243         return result;
244     }
245 };
246 
247 class LocDataParser;
248 
249 class StringLocalizationInfo : public LocalizationInfo {
250     UChar* info;
251     UChar*** data;
252     int32_t numRuleSets;
253     int32_t numLocales;
254 
255 friend class LocDataParser;
256 
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)257     StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
258         : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
259     {
260     }
261 
262 public:
263     static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
264 
265     virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const266     virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
267     virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const268     virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
269     virtual const UChar* getLocaleName(int32_t index) const;
270     virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
271 
272 //    virtual UClassID getDynamicClassID() const;
273 //    static UClassID getStaticClassID(void);
274 
275 private:
276     void init(UErrorCode& status) const;
277 };
278 
279 
280 enum {
281     OPEN_ANGLE = 0x003c, /* '<' */
282     CLOSE_ANGLE = 0x003e, /* '>' */
283     COMMA = 0x002c,
284     TICK = 0x0027,
285     QUOTE = 0x0022,
286     SPACE = 0x0020
287 };
288 
289 /**
290  * Utility for parsing a localization string and returning a StringLocalizationInfo*.
291  */
292 class LocDataParser {
293     UChar* data;
294     const UChar* e;
295     UChar* p;
296     UChar ch;
297     UParseError& pe;
298     UErrorCode& ec;
299 
300 public:
LocDataParser(UParseError & parseError,UErrorCode & status)301     LocDataParser(UParseError& parseError, UErrorCode& status)
302         : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()303     ~LocDataParser() {}
304 
305     /*
306     * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
307     * and return NULL.  The StringLocalizationInfo will adopt locData if it is created.
308     */
309     StringLocalizationInfo* parse(UChar* data, int32_t len);
310 
311 private:
312 
inc(void)313     void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)314     UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)315     UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)316     void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const317     UBool inList(UChar c, const UChar* list) const {
318         if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
319         while (*list && *list != c) ++list; return *list == c;
320     }
321     void parseError(const char* msg);
322 
323     StringLocalizationInfo* doParse(void);
324 
325     UChar** nextArray(int32_t& requiredLength);
326     UChar*  nextString(void);
327 };
328 
329 #ifdef DEBUG
330 #define ERROR(msg) parseError(msg); return NULL;
331 #else
332 #define ERROR(msg) parseError(NULL); return NULL;
333 #endif
334 
335 
336 static const UChar DQUOTE_STOPLIST[] = {
337     QUOTE, 0
338 };
339 
340 static const UChar SQUOTE_STOPLIST[] = {
341     TICK, 0
342 };
343 
344 static const UChar NOQUOTE_STOPLIST[] = {
345     SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
346 };
347 
348 static void
DeleteFn(void * p)349 DeleteFn(void* p) {
350   uprv_free(p);
351 }
352 
353 StringLocalizationInfo*
parse(UChar * _data,int32_t len)354 LocDataParser::parse(UChar* _data, int32_t len) {
355     if (U_FAILURE(ec)) {
356         if (_data) uprv_free(_data);
357         return NULL;
358     }
359 
360     pe.line = 0;
361     pe.offset = -1;
362     pe.postContext[0] = 0;
363     pe.preContext[0] = 0;
364 
365     if (_data == NULL) {
366         ec = U_ILLEGAL_ARGUMENT_ERROR;
367         return NULL;
368     }
369 
370     if (len <= 0) {
371         ec = U_ILLEGAL_ARGUMENT_ERROR;
372         uprv_free(_data);
373         return NULL;
374     }
375 
376     data = _data;
377     e = data + len;
378     p = _data;
379     ch = 0xffff;
380 
381     return doParse();
382 }
383 
384 
385 StringLocalizationInfo*
doParse(void)386 LocDataParser::doParse(void) {
387     skipWhitespace();
388     if (!checkInc(OPEN_ANGLE)) {
389         ERROR("Missing open angle");
390     } else {
391         VArray array(DeleteFn);
392         UBool mightHaveNext = TRUE;
393         int32_t requiredLength = -1;
394         while (mightHaveNext) {
395             mightHaveNext = FALSE;
396             UChar** elem = nextArray(requiredLength);
397             skipWhitespace();
398             UBool haveComma = check(COMMA);
399             if (elem) {
400                 array.add(elem, ec);
401                 if (haveComma) {
402                     inc();
403                     mightHaveNext = TRUE;
404                 }
405             } else if (haveComma) {
406                 ERROR("Unexpected character");
407             }
408         }
409 
410         skipWhitespace();
411         if (!checkInc(CLOSE_ANGLE)) {
412             if (check(OPEN_ANGLE)) {
413                 ERROR("Missing comma in outer array");
414             } else {
415                 ERROR("Missing close angle bracket in outer array");
416             }
417         }
418 
419         skipWhitespace();
420         if (p != e) {
421             ERROR("Extra text after close of localization data");
422         }
423 
424         array.add(NULL, ec);
425         if (U_SUCCESS(ec)) {
426             int32_t numLocs = array.length() - 2; // subtract first, NULL
427             UChar*** result = (UChar***)array.release();
428 
429             return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
430         }
431     }
432 
433     ERROR("Unknown error");
434 }
435 
436 UChar**
nextArray(int32_t & requiredLength)437 LocDataParser::nextArray(int32_t& requiredLength) {
438     if (U_FAILURE(ec)) {
439         return NULL;
440     }
441 
442     skipWhitespace();
443     if (!checkInc(OPEN_ANGLE)) {
444         ERROR("Missing open angle");
445     }
446 
447     VArray array;
448     UBool mightHaveNext = TRUE;
449     while (mightHaveNext) {
450         mightHaveNext = FALSE;
451         UChar* elem = nextString();
452         skipWhitespace();
453         UBool haveComma = check(COMMA);
454         if (elem) {
455             array.add(elem, ec);
456             if (haveComma) {
457                 inc();
458                 mightHaveNext = TRUE;
459             }
460         } else if (haveComma) {
461             ERROR("Unexpected comma");
462         }
463     }
464     skipWhitespace();
465     if (!checkInc(CLOSE_ANGLE)) {
466         if (check(OPEN_ANGLE)) {
467             ERROR("Missing close angle bracket in inner array");
468         } else {
469             ERROR("Missing comma in inner array");
470         }
471     }
472 
473     array.add(NULL, ec);
474     if (U_SUCCESS(ec)) {
475         if (requiredLength == -1) {
476             requiredLength = array.length() + 1;
477         } else if (array.length() != requiredLength) {
478             ec = U_ILLEGAL_ARGUMENT_ERROR;
479             ERROR("Array not of required length");
480         }
481 
482         return (UChar**)array.release();
483     }
484     ERROR("Unknown Error");
485 }
486 
487 UChar*
nextString()488 LocDataParser::nextString() {
489     UChar* result = NULL;
490 
491     skipWhitespace();
492     if (p < e) {
493         const UChar* terminators;
494         UChar c = *p;
495         UBool haveQuote = c == QUOTE || c == TICK;
496         if (haveQuote) {
497             inc();
498             terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
499         } else {
500             terminators = NOQUOTE_STOPLIST;
501         }
502         UChar* start = p;
503         while (p < e && !inList(*p, terminators)) ++p;
504         if (p == e) {
505             ERROR("Unexpected end of data");
506         }
507 
508         UChar x = *p;
509         if (p > start) {
510             ch = x;
511             *p = 0x0; // terminate by writing to data
512             result = start; // just point into data
513         }
514         if (haveQuote) {
515             if (x != c) {
516                 ERROR("Missing matching quote");
517             } else if (p == start) {
518                 ERROR("Empty string");
519             }
520             inc();
521         } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
522             ERROR("Unexpected character in string");
523         }
524     }
525 
526     // ok for there to be no next string
527     return result;
528 }
529 
530 void
parseError(const char *)531 LocDataParser::parseError(const char* /*str*/) {
532     if (!data) {
533         return;
534     }
535 
536     const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
537     if (start < data) {
538         start = data;
539     }
540     for (UChar* x = p; --x >= start;) {
541         if (!*x) {
542             start = x+1;
543             break;
544         }
545     }
546     const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
547     if (limit > e) {
548         limit = e;
549     }
550     u_strncpy(pe.preContext, start, (int32_t)(p-start));
551     pe.preContext[p-start] = 0;
552     u_strncpy(pe.postContext, p, (int32_t)(limit-p));
553     pe.postContext[limit-p] = 0;
554     pe.offset = (int32_t)(p - data);
555 
556 #ifdef DEBUG
557     fprintf(stderr, "%s at or near character %d: ", str, p-data);
558 
559     UnicodeString msg;
560     msg.append(start, p - start);
561     msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
562     msg.append(p, limit-p);
563     msg.append("'");
564 
565     char buf[128];
566     int32_t len = msg.extract(0, msg.length(), buf, 128);
567     if (len >= 128) {
568         buf[127] = 0;
569     } else {
570         buf[len] = 0;
571     }
572     fprintf(stderr, "%s\n", buf);
573     fflush(stderr);
574 #endif
575 
576     uprv_free(data);
577     data = NULL;
578     p = NULL;
579     e = NULL;
580 
581     if (U_SUCCESS(ec)) {
582         ec = U_PARSE_ERROR;
583     }
584 }
585 
586 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
587 
588 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)589 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
590     if (U_FAILURE(status)) {
591         return NULL;
592     }
593 
594     int32_t len = info.length();
595     if (len == 0) {
596         return NULL; // no error;
597     }
598 
599     UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
600     if (!p) {
601         status = U_MEMORY_ALLOCATION_ERROR;
602         return NULL;
603     }
604     info.extract(p, len, status);
605     if (!U_FAILURE(status)) {
606         status = U_ZERO_ERROR; // clear warning about non-termination
607     }
608 
609     LocDataParser parser(perror, status);
610     return parser.parse(p, len);
611 }
612 
~StringLocalizationInfo()613 StringLocalizationInfo::~StringLocalizationInfo() {
614     for (UChar*** p = (UChar***)data; *p; ++p) {
615         // remaining data is simply pointer into our unicode string data.
616         if (*p) uprv_free(*p);
617     }
618     if (data) uprv_free(data);
619     if (info) uprv_free(info);
620 }
621 
622 
623 const UChar*
getRuleSetName(int32_t index) const624 StringLocalizationInfo::getRuleSetName(int32_t index) const {
625     if (index >= 0 && index < getNumberOfRuleSets()) {
626         return data[0][index];
627     }
628     return NULL;
629 }
630 
631 const UChar*
getLocaleName(int32_t index) const632 StringLocalizationInfo::getLocaleName(int32_t index) const {
633     if (index >= 0 && index < getNumberOfDisplayLocales()) {
634         return data[index+1][0];
635     }
636     return NULL;
637 }
638 
639 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const640 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
641     if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
642         ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
643         return data[localeIndex+1][ruleIndex+1];
644     }
645     return NULL;
646 }
647 
648 // ----------
649 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)650 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
651                                              const UnicodeString& locs,
652                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
653   : ruleSets(NULL)
654   , defaultRuleSet(NULL)
655   , locale(alocale)
656   , collator(NULL)
657   , decimalFormatSymbols(NULL)
658   , lenient(FALSE)
659   , lenientParseRules(NULL)
660   , localizations(NULL)
661   , noParse(FALSE) //TODO: to be removed after #6895
662 {
663   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
664   init(description, locinfo, perror, status);
665 }
666 
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)667 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
668                                              const UnicodeString& locs,
669                                              UParseError& perror, UErrorCode& status)
670   : ruleSets(NULL)
671   , defaultRuleSet(NULL)
672   , locale(Locale::getDefault())
673   , collator(NULL)
674   , decimalFormatSymbols(NULL)
675   , lenient(FALSE)
676   , lenientParseRules(NULL)
677   , localizations(NULL)
678   , noParse(FALSE) //TODO: to be removed after #6895
679 {
680   LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
681   init(description, locinfo, perror, status);
682 }
683 
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)684 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
685                                              LocalizationInfo* info,
686                                              const Locale& alocale, UParseError& perror, UErrorCode& status)
687   : ruleSets(NULL)
688   , defaultRuleSet(NULL)
689   , locale(alocale)
690   , collator(NULL)
691   , decimalFormatSymbols(NULL)
692   , lenient(FALSE)
693   , lenientParseRules(NULL)
694   , localizations(NULL)
695   , noParse(FALSE) //TODO: to be removed after #6895
696 {
697   init(description, info, perror, status);
698 }
699 
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)700 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
701                          UParseError& perror,
702                          UErrorCode& status)
703   : ruleSets(NULL)
704   , defaultRuleSet(NULL)
705   , locale(Locale::getDefault())
706   , collator(NULL)
707   , decimalFormatSymbols(NULL)
708   , lenient(FALSE)
709   , lenientParseRules(NULL)
710   , localizations(NULL)
711   , noParse(FALSE) //TODO: to be removed after #6895
712 {
713     init(description, NULL, perror, status);
714 }
715 
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)716 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
717                          const Locale& aLocale,
718                          UParseError& perror,
719                          UErrorCode& status)
720   : ruleSets(NULL)
721   , defaultRuleSet(NULL)
722   , locale(aLocale)
723   , collator(NULL)
724   , decimalFormatSymbols(NULL)
725   , lenient(FALSE)
726   , lenientParseRules(NULL)
727   , localizations(NULL)
728   , noParse(FALSE) //TODO: to be removed after #6895
729 {
730     init(description, NULL, perror, status);
731 }
732 
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)733 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
734   : ruleSets(NULL)
735   , defaultRuleSet(NULL)
736   , locale(alocale)
737   , collator(NULL)
738   , decimalFormatSymbols(NULL)
739   , lenient(FALSE)
740   , lenientParseRules(NULL)
741   , localizations(NULL)
742 {
743     if (U_FAILURE(status)) {
744         return;
745     }
746 
747     const char* rules_tag = "RBNFRules";
748     const char* fmt_tag = "";
749     switch (tag) {
750     case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
751     case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
752     case URBNF_DURATION: fmt_tag = "DurationRules"; break;
753     case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
754     default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
755     }
756 
757     // TODO: read localization info from resource
758     LocalizationInfo* locinfo = NULL;
759 
760     int32_t len = 0;
761     UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
762     if (U_SUCCESS(status)) {
763         setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
764                      ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
765 
766         UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
767         if (U_FAILURE(status)) {
768             ures_close(nfrb);
769         }
770         UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
771         if (U_FAILURE(status)) {
772             ures_close(rbnfRules);
773             ures_close(nfrb);
774             return;
775         }
776 
777         UnicodeString desc;
778         while (ures_hasNext(ruleSets)) {
779            const UChar* currentString = ures_getNextString(ruleSets,&len,NULL,&status);
780            desc.append(currentString);
781         }
782         UParseError perror;
783 
784 
785         init (desc, locinfo, perror, status);
786 
787         //TODO: we need a real fix - see #6895 / #6896
788         noParse = FALSE;
789         if (tag == URBNF_SPELLOUT) {
790             const char *lang = alocale.getLanguage();
791             for (int32_t i = 0; NO_SPELLOUT_PARSE_LANGUAGES[i] != NULL; i++) {
792                 if (uprv_strcmp(lang, NO_SPELLOUT_PARSE_LANGUAGES[i]) == 0) {
793                     noParse = TRUE;
794                     break;
795                 }
796             }
797         }
798         //TODO: end
799 
800         ures_close(ruleSets);
801         ures_close(rbnfRules);
802     }
803     ures_close(nfrb);
804 }
805 
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)806 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
807   : NumberFormat(rhs)
808   , ruleSets(NULL)
809   , defaultRuleSet(NULL)
810   , locale(rhs.locale)
811   , collator(NULL)
812   , decimalFormatSymbols(NULL)
813   , lenient(FALSE)
814   , lenientParseRules(NULL)
815   , localizations(NULL)
816 {
817     this->operator=(rhs);
818 }
819 
820 // --------
821 
822 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)823 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
824 {
825     UErrorCode status = U_ZERO_ERROR;
826     dispose();
827     locale = rhs.locale;
828     lenient = rhs.lenient;
829 
830     UnicodeString rules = rhs.getRules();
831     UParseError perror;
832     init(rules, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
833 
834     //TODO: remove below when we fix the parse bug - See #6895 / #6896
835     noParse = rhs.noParse;
836 
837     return *this;
838 }
839 
~RuleBasedNumberFormat()840 RuleBasedNumberFormat::~RuleBasedNumberFormat()
841 {
842     dispose();
843 }
844 
845 Format*
clone(void) const846 RuleBasedNumberFormat::clone(void) const
847 {
848     RuleBasedNumberFormat * result = NULL;
849     UnicodeString rules = getRules();
850     UErrorCode status = U_ZERO_ERROR;
851     UParseError perror;
852     result = new RuleBasedNumberFormat(rules, localizations, locale, perror, status);
853     /* test for NULL */
854     if (result == 0) {
855         status = U_MEMORY_ALLOCATION_ERROR;
856         return 0;
857     }
858     if (U_FAILURE(status)) {
859         delete result;
860         result = 0;
861     } else {
862         result->lenient = lenient;
863 
864         //TODO: remove below when we fix the parse bug - See #6895 / #6896
865         result->noParse = noParse;
866     }
867     return result;
868 }
869 
870 UBool
operator ==(const Format & other) const871 RuleBasedNumberFormat::operator==(const Format& other) const
872 {
873     if (this == &other) {
874         return TRUE;
875     }
876 
877     if (typeid(*this) == typeid(other)) {
878         const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
879         if (locale == rhs.locale &&
880             lenient == rhs.lenient &&
881             (localizations == NULL
882                 ? rhs.localizations == NULL
883                 : (rhs.localizations == NULL
884                     ? FALSE
885                     : *localizations == rhs.localizations))) {
886 
887             NFRuleSet** p = ruleSets;
888             NFRuleSet** q = rhs.ruleSets;
889             if (p == NULL) {
890                 return q == NULL;
891             } else if (q == NULL) {
892                 return FALSE;
893             }
894             while (*p && *q && (**p == **q)) {
895                 ++p;
896                 ++q;
897             }
898             return *q == NULL && *p == NULL;
899         }
900     }
901 
902     return FALSE;
903 }
904 
905 UnicodeString
getRules() const906 RuleBasedNumberFormat::getRules() const
907 {
908     UnicodeString result;
909     if (ruleSets != NULL) {
910         for (NFRuleSet** p = ruleSets; *p; ++p) {
911             (*p)->appendRules(result);
912         }
913     }
914     return result;
915 }
916 
917 UnicodeString
getRuleSetName(int32_t index) const918 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
919 {
920     if (localizations) {
921       UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
922       return string;
923     } else if (ruleSets) {
924         UnicodeString result;
925         for (NFRuleSet** p = ruleSets; *p; ++p) {
926             NFRuleSet* rs = *p;
927             if (rs->isPublic()) {
928                 if (--index == -1) {
929                     rs->getName(result);
930                     return result;
931                 }
932             }
933         }
934     }
935     UnicodeString empty;
936     return empty;
937 }
938 
939 int32_t
getNumberOfRuleSetNames() const940 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
941 {
942     int32_t result = 0;
943     if (localizations) {
944       result = localizations->getNumberOfRuleSets();
945     } else if (ruleSets) {
946         for (NFRuleSet** p = ruleSets; *p; ++p) {
947             if ((**p).isPublic()) {
948                 ++result;
949             }
950         }
951     }
952     return result;
953 }
954 
955 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const956 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
957     if (localizations) {
958         return localizations->getNumberOfDisplayLocales();
959     }
960     return 0;
961 }
962 
963 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const964 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
965     if (U_FAILURE(status)) {
966         return Locale("");
967     }
968     if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
969         UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
970         char buffer[64];
971         int32_t cap = name.length() + 1;
972         char* bp = buffer;
973         if (cap > 64) {
974             bp = (char *)uprv_malloc(cap);
975             if (bp == NULL) {
976                 status = U_MEMORY_ALLOCATION_ERROR;
977                 return Locale("");
978             }
979         }
980         name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
981         Locale retLocale(bp);
982         if (bp != buffer) {
983             uprv_free(bp);
984         }
985         return retLocale;
986     }
987     status = U_ILLEGAL_ARGUMENT_ERROR;
988     Locale retLocale;
989     return retLocale;
990 }
991 
992 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)993 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
994     if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
995         UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
996         int32_t len = localeName.length();
997         UChar* localeStr = localeName.getBuffer(len + 1);
998         while (len >= 0) {
999             localeStr[len] = 0;
1000             int32_t ix = localizations->indexForLocale(localeStr);
1001             if (ix >= 0) {
1002                 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1003                 return name;
1004             }
1005 
1006             // trim trailing portion, skipping over ommitted sections
1007             do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1008             while (len > 0 && localeStr[len-1] == 0x005F) --len;
1009         }
1010         UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1011         return name;
1012     }
1013     UnicodeString bogus;
1014     bogus.setToBogus();
1015     return bogus;
1016 }
1017 
1018 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1019 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1020     if (localizations) {
1021         UnicodeString rsn(ruleSetName);
1022         int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1023         return getRuleSetDisplayName(ix, localeParam);
1024     }
1025     UnicodeString bogus;
1026     bogus.setToBogus();
1027     return bogus;
1028 }
1029 
1030 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1031 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1032 {
1033     if (U_SUCCESS(status) && ruleSets) {
1034         for (NFRuleSet** p = ruleSets; *p; ++p) {
1035             NFRuleSet* rs = *p;
1036             if (rs->isNamed(name)) {
1037                 return rs;
1038             }
1039         }
1040         status = U_ILLEGAL_ARGUMENT_ERROR;
1041     }
1042     return NULL;
1043 }
1044 
1045 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1046 RuleBasedNumberFormat::format(int32_t number,
1047                               UnicodeString& toAppendTo,
1048                               FieldPosition& /* pos */) const
1049 {
1050     if (defaultRuleSet) defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length());
1051     return toAppendTo;
1052 }
1053 
1054 
1055 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1056 RuleBasedNumberFormat::format(int64_t number,
1057                               UnicodeString& toAppendTo,
1058                               FieldPosition& /* pos */) const
1059 {
1060     if (defaultRuleSet) defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1061     return toAppendTo;
1062 }
1063 
1064 
1065 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1066 RuleBasedNumberFormat::format(double number,
1067                               UnicodeString& toAppendTo,
1068                               FieldPosition& /* pos */) const
1069 {
1070     // Special case for NaN; adapted from what DecimalFormat::_format( double number,...) does.
1071     if (uprv_isNaN(number)) {
1072         DecimalFormatSymbols* decFmtSyms = getDecimalFormatSymbols(); // RuleBasedNumberFormat internal
1073         if (decFmtSyms) {
1074             toAppendTo += decFmtSyms->getConstSymbol(DecimalFormatSymbols::kNaNSymbol);
1075         }
1076     } else if (defaultRuleSet) {
1077         defaultRuleSet->format(number, toAppendTo, toAppendTo.length());
1078     }
1079     return toAppendTo;
1080 }
1081 
1082 
1083 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1084 RuleBasedNumberFormat::format(int32_t number,
1085                               const UnicodeString& ruleSetName,
1086                               UnicodeString& toAppendTo,
1087                               FieldPosition& /* pos */,
1088                               UErrorCode& status) const
1089 {
1090     // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1091     if (U_SUCCESS(status)) {
1092         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1093             // throw new IllegalArgumentException("Can't use internal rule set");
1094             status = U_ILLEGAL_ARGUMENT_ERROR;
1095         } else {
1096             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1097             if (rs) {
1098                 rs->format((int64_t)number, toAppendTo, toAppendTo.length());
1099             }
1100         }
1101     }
1102     return toAppendTo;
1103 }
1104 
1105 
1106 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1107 RuleBasedNumberFormat::format(int64_t number,
1108                               const UnicodeString& ruleSetName,
1109                               UnicodeString& toAppendTo,
1110                               FieldPosition& /* pos */,
1111                               UErrorCode& status) const
1112 {
1113     if (U_SUCCESS(status)) {
1114         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1115             // throw new IllegalArgumentException("Can't use internal rule set");
1116             status = U_ILLEGAL_ARGUMENT_ERROR;
1117         } else {
1118             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1119             if (rs) {
1120                 rs->format(number, toAppendTo, toAppendTo.length());
1121             }
1122         }
1123     }
1124     return toAppendTo;
1125 }
1126 
1127 
1128 // make linker happy
1129 UnicodeString&
format(const Formattable & obj,UnicodeString & toAppendTo,FieldPosition & pos,UErrorCode & status) const1130 RuleBasedNumberFormat::format(const Formattable& obj,
1131                               UnicodeString& toAppendTo,
1132                               FieldPosition& pos,
1133                               UErrorCode& status) const
1134 {
1135     return NumberFormat::format(obj, toAppendTo, pos, status);
1136 }
1137 
1138 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1139 RuleBasedNumberFormat::format(double number,
1140                               const UnicodeString& ruleSetName,
1141                               UnicodeString& toAppendTo,
1142                               FieldPosition& /* pos */,
1143                               UErrorCode& status) const
1144 {
1145     if (U_SUCCESS(status)) {
1146         if (ruleSetName.indexOf(gPercentPercent) == 0) {
1147             // throw new IllegalArgumentException("Can't use internal rule set");
1148             status = U_ILLEGAL_ARGUMENT_ERROR;
1149         } else {
1150             NFRuleSet *rs = findRuleSet(ruleSetName, status);
1151             if (rs) {
1152                 rs->format(number, toAppendTo, toAppendTo.length());
1153             }
1154         }
1155     }
1156     return toAppendTo;
1157 }
1158 
1159 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1160 RuleBasedNumberFormat::parse(const UnicodeString& text,
1161                              Formattable& result,
1162                              ParsePosition& parsePosition) const
1163 {
1164     //TODO: We need a real fix.  See #6895 / #6896
1165     if (noParse) {
1166         // skip parsing
1167         parsePosition.setErrorIndex(0);
1168         return;
1169     }
1170 
1171     if (!ruleSets) {
1172         parsePosition.setErrorIndex(0);
1173         return;
1174     }
1175 
1176     UnicodeString workingText(text, parsePosition.getIndex());
1177     ParsePosition workingPos(0);
1178 
1179     ParsePosition high_pp(0);
1180     Formattable high_result;
1181 
1182     for (NFRuleSet** p = ruleSets; *p; ++p) {
1183         NFRuleSet *rp = *p;
1184         if (rp->isPublic() && rp->isParseable()) {
1185             ParsePosition working_pp(0);
1186             Formattable working_result;
1187 
1188             rp->parse(workingText, working_pp, kMaxDouble, working_result);
1189             if (working_pp.getIndex() > high_pp.getIndex()) {
1190                 high_pp = working_pp;
1191                 high_result = working_result;
1192 
1193                 if (high_pp.getIndex() == workingText.length()) {
1194                     break;
1195                 }
1196             }
1197         }
1198     }
1199 
1200     int32_t startIndex = parsePosition.getIndex();
1201     parsePosition.setIndex(startIndex + high_pp.getIndex());
1202     if (high_pp.getIndex() > 0) {
1203         parsePosition.setErrorIndex(-1);
1204     } else {
1205         int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1206         parsePosition.setErrorIndex(startIndex + errorIndex);
1207     }
1208     result = high_result;
1209     if (result.getType() == Formattable::kDouble) {
1210         int32_t r = (int32_t)result.getDouble();
1211         if ((double)r == result.getDouble()) {
1212             result.setLong(r);
1213         }
1214     }
1215 }
1216 
1217 #if !UCONFIG_NO_COLLATION
1218 
1219 void
setLenient(UBool enabled)1220 RuleBasedNumberFormat::setLenient(UBool enabled)
1221 {
1222     lenient = enabled;
1223     if (!enabled && collator) {
1224         delete collator;
1225         collator = NULL;
1226     }
1227 }
1228 
1229 #endif
1230 
1231 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1232 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1233     if (U_SUCCESS(status)) {
1234         if (ruleSetName.isEmpty()) {
1235           if (localizations) {
1236               UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1237               defaultRuleSet = findRuleSet(name, status);
1238           } else {
1239             initDefaultRuleSet();
1240           }
1241         } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1242             status = U_ILLEGAL_ARGUMENT_ERROR;
1243         } else {
1244             NFRuleSet* result = findRuleSet(ruleSetName, status);
1245             if (result != NULL) {
1246                 defaultRuleSet = result;
1247             }
1248         }
1249     }
1250 }
1251 
1252 UnicodeString
getDefaultRuleSetName() const1253 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1254   UnicodeString result;
1255   if (defaultRuleSet && defaultRuleSet->isPublic()) {
1256     defaultRuleSet->getName(result);
1257   } else {
1258     result.setToBogus();
1259   }
1260   return result;
1261 }
1262 
1263 void
initDefaultRuleSet()1264 RuleBasedNumberFormat::initDefaultRuleSet()
1265 {
1266     defaultRuleSet = NULL;
1267     if (!ruleSets) {
1268       return;
1269     }
1270 
1271     const UnicodeString spellout = UNICODE_STRING_SIMPLE("%spellout-numbering");
1272     const UnicodeString ordinal = UNICODE_STRING_SIMPLE("%digits-ordinal");
1273     const UnicodeString duration = UNICODE_STRING_SIMPLE("%duration");
1274 
1275     NFRuleSet**p = &ruleSets[0];
1276     while (*p) {
1277         if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1278             defaultRuleSet = *p;
1279             return;
1280         } else {
1281             ++p;
1282         }
1283     }
1284 
1285     defaultRuleSet = *--p;
1286     if (!defaultRuleSet->isPublic()) {
1287         while (p != ruleSets) {
1288             if ((*--p)->isPublic()) {
1289                 defaultRuleSet = *p;
1290                 break;
1291             }
1292         }
1293     }
1294 }
1295 
1296 
1297 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1298 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1299                             UParseError& pErr, UErrorCode& status)
1300 {
1301     // TODO: implement UParseError
1302     uprv_memset(&pErr, 0, sizeof(UParseError));
1303     // Note: this can leave ruleSets == NULL, so remaining code should check
1304     if (U_FAILURE(status)) {
1305         return;
1306     }
1307 
1308     this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1309 
1310     UnicodeString description(rules);
1311     if (!description.length()) {
1312         status = U_MEMORY_ALLOCATION_ERROR;
1313         return;
1314     }
1315 
1316     // start by stripping the trailing whitespace from all the rules
1317     // (this is all the whitespace follwing each semicolon in the
1318     // description).  This allows us to look for rule-set boundaries
1319     // by searching for ";%" without having to worry about whitespace
1320     // between the ; and the %
1321     stripWhitespace(description);
1322 
1323     // check to see if there's a set of lenient-parse rules.  If there
1324     // is, pull them out into our temporary holding place for them,
1325     // and delete them from the description before the real desciption-
1326     // parsing code sees them
1327     int32_t lp = description.indexOf(gLenientParse);
1328     if (lp != -1) {
1329         // we've got to make sure we're not in the middle of a rule
1330         // (where "%%lenient-parse" would actually get treated as
1331         // rule text)
1332         if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1333             // locate the beginning and end of the actual collation
1334             // rules (there may be whitespace between the name and
1335             // the first token in the description)
1336             int lpEnd = description.indexOf(gSemiPercent, lp);
1337 
1338             if (lpEnd == -1) {
1339                 lpEnd = description.length() - 1;
1340             }
1341             int lpStart = lp + u_strlen(gLenientParse);
1342             while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1343                 ++lpStart;
1344             }
1345 
1346             // copy out the lenient-parse rules and delete them
1347             // from the description
1348             lenientParseRules = new UnicodeString();
1349             /* test for NULL */
1350             if (lenientParseRules == 0) {
1351                 status = U_MEMORY_ALLOCATION_ERROR;
1352                 return;
1353             }
1354             lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1355 
1356             description.remove(lp, lpEnd + 1 - lp);
1357         }
1358     }
1359 
1360     // pre-flight parsing the description and count the number of
1361     // rule sets (";%" marks the end of one rule set and the beginning
1362     // of the next)
1363     int numRuleSets = 0;
1364     for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, p)) {
1365         ++numRuleSets;
1366         ++p;
1367     }
1368     ++numRuleSets;
1369 
1370     // our rule list is an array of the appropriate size
1371     ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1372     /* test for NULL */
1373     if (ruleSets == 0) {
1374         status = U_MEMORY_ALLOCATION_ERROR;
1375         return;
1376     }
1377 
1378     for (int i = 0; i <= numRuleSets; ++i) {
1379         ruleSets[i] = NULL;
1380     }
1381 
1382     // divide up the descriptions into individual rule-set descriptions
1383     // and store them in a temporary array.  At each step, we also
1384     // new up a rule set, but all this does is initialize its name
1385     // and remove it from its description.  We can't actually parse
1386     // the rest of the descriptions and finish initializing everything
1387     // because we have to know the names and locations of all the rule
1388     // sets before we can actually set everything up
1389     if(!numRuleSets) {
1390         status = U_ILLEGAL_ARGUMENT_ERROR;
1391         return;
1392     }
1393     UnicodeString* ruleSetDescriptions = new UnicodeString[numRuleSets];
1394     if (ruleSetDescriptions == 0) {
1395         status = U_MEMORY_ALLOCATION_ERROR;
1396         return;
1397     }
1398 
1399     {
1400         int curRuleSet = 0;
1401         int32_t start = 0;
1402         for (int32_t p = description.indexOf(gSemiPercent); p != -1; p = description.indexOf(gSemiPercent, start)) {
1403             ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1404             ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1405             if (ruleSets[curRuleSet] == 0) {
1406                 status = U_MEMORY_ALLOCATION_ERROR;
1407                 goto cleanup;
1408             }
1409             ++curRuleSet;
1410             start = p + 1;
1411         }
1412         ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1413         ruleSets[curRuleSet] = new NFRuleSet(ruleSetDescriptions, curRuleSet, status);
1414         if (ruleSets[curRuleSet] == 0) {
1415             status = U_MEMORY_ALLOCATION_ERROR;
1416             goto cleanup;
1417         }
1418     }
1419 
1420     // now we can take note of the formatter's default rule set, which
1421     // is the last public rule set in the description (it's the last
1422     // rather than the first so that a user can create a new formatter
1423     // from an existing formatter and change its default behavior just
1424     // by appending more rule sets to the end)
1425 
1426     // {dlf} Initialization of a fraction rule set requires the default rule
1427     // set to be known.  For purposes of initialization, this is always the
1428     // last public rule set, no matter what the localization data says.
1429     initDefaultRuleSet();
1430 
1431     // finally, we can go back through the temporary descriptions
1432     // list and finish seting up the substructure (and we throw
1433     // away the temporary descriptions as we go)
1434     {
1435         for (int i = 0; i < numRuleSets; i++) {
1436             ruleSets[i]->parseRules(ruleSetDescriptions[i], this, status);
1437         }
1438     }
1439 
1440     // Now that the rules are initialized, the 'real' default rule
1441     // set can be adjusted by the localization data.
1442 
1443     // The C code keeps the localization array as is, rather than building
1444     // a separate array of the public rule set names, so we have less work
1445     // to do here-- but we still need to check the names.
1446 
1447     if (localizationInfos) {
1448         // confirm the names, if any aren't in the rules, that's an error
1449         // it is ok if the rules contain public rule sets that are not in this list
1450         for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1451             UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1452             NFRuleSet* rs = findRuleSet(name, status);
1453             if (rs == NULL) {
1454                 break; // error
1455             }
1456             if (i == 0) {
1457                 defaultRuleSet = rs;
1458             }
1459         }
1460     } else {
1461         defaultRuleSet = getDefaultRuleSet();
1462     }
1463 
1464 cleanup:
1465     delete[] ruleSetDescriptions;
1466 }
1467 
1468 void
stripWhitespace(UnicodeString & description)1469 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1470 {
1471     // iterate through the characters...
1472     UnicodeString result;
1473 
1474     int start = 0;
1475     while (start != -1 && start < description.length()) {
1476         // seek to the first non-whitespace character...
1477         while (start < description.length()
1478             && PatternProps::isWhiteSpace(description.charAt(start))) {
1479             ++start;
1480         }
1481 
1482         // locate the next semicolon in the text and copy the text from
1483         // our current position up to that semicolon into the result
1484         int32_t p = description.indexOf(gSemiColon, start);
1485         if (p == -1) {
1486             // or if we don't find a semicolon, just copy the rest of
1487             // the string into the result
1488             result.append(description, start, description.length() - start);
1489             start = -1;
1490         }
1491         else if (p < description.length()) {
1492             result.append(description, start, p + 1 - start);
1493             start = p + 1;
1494         }
1495 
1496         // when we get here, we've seeked off the end of the sring, and
1497         // we terminate the loop (we continue until *start* is -1 rather
1498         // than until *p* is -1, because otherwise we'd miss the last
1499         // rule in the description)
1500         else {
1501             start = -1;
1502         }
1503     }
1504 
1505     description.setTo(result);
1506 }
1507 
1508 
1509 void
dispose()1510 RuleBasedNumberFormat::dispose()
1511 {
1512     if (ruleSets) {
1513         for (NFRuleSet** p = ruleSets; *p; ++p) {
1514             delete *p;
1515         }
1516         uprv_free(ruleSets);
1517         ruleSets = NULL;
1518     }
1519 
1520 #if !UCONFIG_NO_COLLATION
1521     delete collator;
1522 #endif
1523     collator = NULL;
1524 
1525     delete decimalFormatSymbols;
1526     decimalFormatSymbols = NULL;
1527 
1528     delete lenientParseRules;
1529     lenientParseRules = NULL;
1530 
1531     if (localizations) localizations = localizations->unref();
1532 }
1533 
1534 
1535 //-----------------------------------------------------------------------
1536 // package-internal API
1537 //-----------------------------------------------------------------------
1538 
1539 /**
1540  * Returns the collator to use for lenient parsing.  The collator is lazily created:
1541  * this function creates it the first time it's called.
1542  * @return The collator to use for lenient parsing, or null if lenient parsing
1543  * is turned off.
1544 */
1545 Collator*
getCollator() const1546 RuleBasedNumberFormat::getCollator() const
1547 {
1548 #if !UCONFIG_NO_COLLATION
1549     if (!ruleSets) {
1550         return NULL;
1551     }
1552 
1553     // lazy-evaulate the collator
1554     if (collator == NULL && lenient) {
1555         // create a default collator based on the formatter's locale,
1556         // then pull out that collator's rules, append any additional
1557         // rules specified in the description, and create a _new_
1558         // collator based on the combinaiton of those rules
1559 
1560         UErrorCode status = U_ZERO_ERROR;
1561 
1562         Collator* temp = Collator::createInstance(locale, status);
1563         RuleBasedCollator* newCollator;
1564         if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1565             if (lenientParseRules) {
1566                 UnicodeString rules(newCollator->getRules());
1567                 rules.append(*lenientParseRules);
1568 
1569                 newCollator = new RuleBasedCollator(rules, status);
1570                 // Exit if newCollator could not be created.
1571                 if (newCollator == NULL) {
1572                 	return NULL;
1573                 }
1574             } else {
1575                 temp = NULL;
1576             }
1577             if (U_SUCCESS(status)) {
1578                 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1579                 // cast away const
1580                 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1581             } else {
1582                 delete newCollator;
1583             }
1584         }
1585         delete temp;
1586     }
1587 #endif
1588 
1589     // if lenient-parse mode is off, this will be null
1590     // (see setLenientParseMode())
1591     return collator;
1592 }
1593 
1594 
1595 /**
1596  * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1597  * instances owned by this formatter.  This object is lazily created: this function
1598  * creates it the first time it's called.
1599  * @return The DecimalFormatSymbols object that should be used by all DecimalFormat
1600  * instances owned by this formatter.
1601 */
1602 DecimalFormatSymbols*
getDecimalFormatSymbols() const1603 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1604 {
1605     // lazy-evaluate the DecimalFormatSymbols object.  This object
1606     // is shared by all DecimalFormat instances belonging to this
1607     // formatter
1608     if (decimalFormatSymbols == NULL) {
1609         UErrorCode status = U_ZERO_ERROR;
1610         DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1611         if (U_SUCCESS(status)) {
1612             ((RuleBasedNumberFormat*)this)->decimalFormatSymbols = temp;
1613         } else {
1614             delete temp;
1615         }
1616     }
1617     return decimalFormatSymbols;
1618 }
1619 
1620 U_NAMESPACE_END
1621 
1622 /* U_HAVE_RBNF */
1623 #endif
1624