1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 *******************************************************************************
8 */
9
10 #include "unicode/utypes.h"
11 #include "utypeinfo.h" // for 'typeid' to work
12
13 #include "unicode/rbnf.h"
14
15 #if U_HAVE_RBNF
16
17 #include "unicode/normlzr.h"
18 #include "unicode/plurfmt.h"
19 #include "unicode/tblcoll.h"
20 #include "unicode/uchar.h"
21 #include "unicode/ucol.h"
22 #include "unicode/uloc.h"
23 #include "unicode/unum.h"
24 #include "unicode/ures.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf16.h"
27 #include "unicode/udata.h"
28 #include "unicode/udisplaycontext.h"
29 #include "unicode/brkiter.h"
30 #include "nfrs.h"
31
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "patternprops.h"
35 #include "uresimp.h"
36
37 // debugging
38 // #define RBNF_DEBUG
39
40 #ifdef RBNF_DEBUG
41 #include <stdio.h>
42 #endif
43
44 #define U_ICUDATA_RBNF U_ICUDATA_NAME U_TREE_SEPARATOR_STRING "rbnf"
45
46 static const UChar gPercentPercent[] =
47 {
48 0x25, 0x25, 0
49 }; /* "%%" */
50
51 // All urbnf objects are created through openRules, so we init all of the
52 // Unicode string constants required by rbnf, nfrs, or nfr here.
53 static const UChar gLenientParse[] =
54 {
55 0x25, 0x25, 0x6C, 0x65, 0x6E, 0x69, 0x65, 0x6E, 0x74, 0x2D, 0x70, 0x61, 0x72, 0x73, 0x65, 0x3A, 0
56 }; /* "%%lenient-parse:" */
57 static const UChar gSemiColon = 0x003B;
58 static const UChar gSemiPercent[] =
59 {
60 0x3B, 0x25, 0
61 }; /* ";%" */
62
63 #define kSomeNumberOfBitsDiv2 22
64 #define kHalfMaxDouble (double)(1 << kSomeNumberOfBitsDiv2)
65 #define kMaxDouble (kHalfMaxDouble * kHalfMaxDouble)
66
67 U_NAMESPACE_BEGIN
68
69 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedNumberFormat)
70
71 /*
72 This is a utility class. It does not use ICU's RTTI.
73 If ICU's RTTI is needed again, you can uncomment the RTTI code and derive from UObject.
74 Please make sure that intltest passes on Windows in Release mode,
75 since the string pooling per compilation unit will mess up how RTTI works.
76 The RTTI code was also removed due to lack of code coverage.
77 */
78 class LocalizationInfo : public UMemory {
79 protected:
80 virtual ~LocalizationInfo();
81 uint32_t refcount;
82
83 public:
LocalizationInfo()84 LocalizationInfo() : refcount(0) {}
85
ref(void)86 LocalizationInfo* ref(void) {
87 ++refcount;
88 return this;
89 }
90
unref(void)91 LocalizationInfo* unref(void) {
92 if (refcount && --refcount == 0) {
93 delete this;
94 }
95 return NULL;
96 }
97
98 virtual UBool operator==(const LocalizationInfo* rhs) const;
operator !=(const LocalizationInfo * rhs) const99 inline UBool operator!=(const LocalizationInfo* rhs) const { return !operator==(rhs); }
100
101 virtual int32_t getNumberOfRuleSets(void) const = 0;
102 virtual const UChar* getRuleSetName(int32_t index) const = 0;
103 virtual int32_t getNumberOfDisplayLocales(void) const = 0;
104 virtual const UChar* getLocaleName(int32_t index) const = 0;
105 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const = 0;
106
107 virtual int32_t indexForLocale(const UChar* locale) const;
108 virtual int32_t indexForRuleSet(const UChar* ruleset) const;
109
110 // virtual UClassID getDynamicClassID() const = 0;
111 // static UClassID getStaticClassID(void);
112 };
113
~LocalizationInfo()114 LocalizationInfo::~LocalizationInfo() {}
115
116 //UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(LocalizationInfo)
117
118 // if both strings are NULL, this returns TRUE
119 static UBool
streq(const UChar * lhs,const UChar * rhs)120 streq(const UChar* lhs, const UChar* rhs) {
121 if (rhs == lhs) {
122 return TRUE;
123 }
124 if (lhs && rhs) {
125 return u_strcmp(lhs, rhs) == 0;
126 }
127 return FALSE;
128 }
129
130 UBool
operator ==(const LocalizationInfo * rhs) const131 LocalizationInfo::operator==(const LocalizationInfo* rhs) const {
132 if (rhs) {
133 if (this == rhs) {
134 return TRUE;
135 }
136
137 int32_t rsc = getNumberOfRuleSets();
138 if (rsc == rhs->getNumberOfRuleSets()) {
139 for (int i = 0; i < rsc; ++i) {
140 if (!streq(getRuleSetName(i), rhs->getRuleSetName(i))) {
141 return FALSE;
142 }
143 }
144 int32_t dlc = getNumberOfDisplayLocales();
145 if (dlc == rhs->getNumberOfDisplayLocales()) {
146 for (int i = 0; i < dlc; ++i) {
147 const UChar* locale = getLocaleName(i);
148 int32_t ix = rhs->indexForLocale(locale);
149 // if no locale, ix is -1, getLocaleName returns null, so streq returns false
150 if (!streq(locale, rhs->getLocaleName(ix))) {
151 return FALSE;
152 }
153 for (int j = 0; j < rsc; ++j) {
154 if (!streq(getDisplayName(i, j), rhs->getDisplayName(ix, j))) {
155 return FALSE;
156 }
157 }
158 }
159 return TRUE;
160 }
161 }
162 }
163 return FALSE;
164 }
165
166 int32_t
indexForLocale(const UChar * locale) const167 LocalizationInfo::indexForLocale(const UChar* locale) const {
168 for (int i = 0; i < getNumberOfDisplayLocales(); ++i) {
169 if (streq(locale, getLocaleName(i))) {
170 return i;
171 }
172 }
173 return -1;
174 }
175
176 int32_t
indexForRuleSet(const UChar * ruleset) const177 LocalizationInfo::indexForRuleSet(const UChar* ruleset) const {
178 if (ruleset) {
179 for (int i = 0; i < getNumberOfRuleSets(); ++i) {
180 if (streq(ruleset, getRuleSetName(i))) {
181 return i;
182 }
183 }
184 }
185 return -1;
186 }
187
188
189 typedef void (*Fn_Deleter)(void*);
190
191 class VArray {
192 void** buf;
193 int32_t cap;
194 int32_t size;
195 Fn_Deleter deleter;
196 public:
VArray()197 VArray() : buf(NULL), cap(0), size(0), deleter(NULL) {}
198
VArray(Fn_Deleter del)199 VArray(Fn_Deleter del) : buf(NULL), cap(0), size(0), deleter(del) {}
200
~VArray()201 ~VArray() {
202 if (deleter) {
203 for (int i = 0; i < size; ++i) {
204 (*deleter)(buf[i]);
205 }
206 }
207 uprv_free(buf);
208 }
209
length()210 int32_t length() {
211 return size;
212 }
213
add(void * elem,UErrorCode & status)214 void add(void* elem, UErrorCode& status) {
215 if (U_SUCCESS(status)) {
216 if (size == cap) {
217 if (cap == 0) {
218 cap = 1;
219 } else if (cap < 256) {
220 cap *= 2;
221 } else {
222 cap += 256;
223 }
224 if (buf == NULL) {
225 buf = (void**)uprv_malloc(cap * sizeof(void*));
226 } else {
227 buf = (void**)uprv_realloc(buf, cap * sizeof(void*));
228 }
229 if (buf == NULL) {
230 // if we couldn't realloc, we leak the memory we've already allocated, but we're in deep trouble anyway
231 status = U_MEMORY_ALLOCATION_ERROR;
232 return;
233 }
234 void* start = &buf[size];
235 size_t count = (cap - size) * sizeof(void*);
236 uprv_memset(start, 0, count); // fill with nulls, just because
237 }
238 buf[size++] = elem;
239 }
240 }
241
release(void)242 void** release(void) {
243 void** result = buf;
244 buf = NULL;
245 cap = 0;
246 size = 0;
247 return result;
248 }
249 };
250
251 class LocDataParser;
252
253 class StringLocalizationInfo : public LocalizationInfo {
254 UChar* info;
255 UChar*** data;
256 int32_t numRuleSets;
257 int32_t numLocales;
258
259 friend class LocDataParser;
260
StringLocalizationInfo(UChar * i,UChar *** d,int32_t numRS,int32_t numLocs)261 StringLocalizationInfo(UChar* i, UChar*** d, int32_t numRS, int32_t numLocs)
262 : info(i), data(d), numRuleSets(numRS), numLocales(numLocs)
263 {
264 }
265
266 public:
267 static StringLocalizationInfo* create(const UnicodeString& info, UParseError& perror, UErrorCode& status);
268
269 virtual ~StringLocalizationInfo();
getNumberOfRuleSets(void) const270 virtual int32_t getNumberOfRuleSets(void) const { return numRuleSets; }
271 virtual const UChar* getRuleSetName(int32_t index) const;
getNumberOfDisplayLocales(void) const272 virtual int32_t getNumberOfDisplayLocales(void) const { return numLocales; }
273 virtual const UChar* getLocaleName(int32_t index) const;
274 virtual const UChar* getDisplayName(int32_t localeIndex, int32_t ruleIndex) const;
275
276 // virtual UClassID getDynamicClassID() const;
277 // static UClassID getStaticClassID(void);
278
279 private:
280 void init(UErrorCode& status) const;
281 };
282
283
284 enum {
285 OPEN_ANGLE = 0x003c, /* '<' */
286 CLOSE_ANGLE = 0x003e, /* '>' */
287 COMMA = 0x002c,
288 TICK = 0x0027,
289 QUOTE = 0x0022,
290 SPACE = 0x0020
291 };
292
293 /**
294 * Utility for parsing a localization string and returning a StringLocalizationInfo*.
295 */
296 class LocDataParser {
297 UChar* data;
298 const UChar* e;
299 UChar* p;
300 UChar ch;
301 UParseError& pe;
302 UErrorCode& ec;
303
304 public:
LocDataParser(UParseError & parseError,UErrorCode & status)305 LocDataParser(UParseError& parseError, UErrorCode& status)
306 : data(NULL), e(NULL), p(NULL), ch(0xffff), pe(parseError), ec(status) {}
~LocDataParser()307 ~LocDataParser() {}
308
309 /*
310 * On a successful parse, return a StringLocalizationInfo*, otherwise delete locData, set perror and status,
311 * and return NULL. The StringLocalizationInfo will adopt locData if it is created.
312 */
313 StringLocalizationInfo* parse(UChar* data, int32_t len);
314
315 private:
316
inc(void)317 void inc(void) { ++p; ch = 0xffff; }
checkInc(UChar c)318 UBool checkInc(UChar c) { if (p < e && (ch == c || *p == c)) { inc(); return TRUE; } return FALSE; }
check(UChar c)319 UBool check(UChar c) { return p < e && (ch == c || *p == c); }
skipWhitespace(void)320 void skipWhitespace(void) { while (p < e && PatternProps::isWhiteSpace(ch != 0xffff ? ch : *p)) inc();}
inList(UChar c,const UChar * list) const321 UBool inList(UChar c, const UChar* list) const {
322 if (*list == SPACE && PatternProps::isWhiteSpace(c)) return TRUE;
323 while (*list && *list != c) ++list; return *list == c;
324 }
325 void parseError(const char* msg);
326
327 StringLocalizationInfo* doParse(void);
328
329 UChar** nextArray(int32_t& requiredLength);
330 UChar* nextString(void);
331 };
332
333 #ifdef RBNF_DEBUG
334 #define ERROR(msg) parseError(msg); return NULL;
335 #define EXPLANATION_ARG explanationArg
336 #else
337 #define ERROR(msg) parseError(NULL); return NULL;
338 #define EXPLANATION_ARG
339 #endif
340
341
342 static const UChar DQUOTE_STOPLIST[] = {
343 QUOTE, 0
344 };
345
346 static const UChar SQUOTE_STOPLIST[] = {
347 TICK, 0
348 };
349
350 static const UChar NOQUOTE_STOPLIST[] = {
351 SPACE, COMMA, CLOSE_ANGLE, OPEN_ANGLE, TICK, QUOTE, 0
352 };
353
354 static void
DeleteFn(void * p)355 DeleteFn(void* p) {
356 uprv_free(p);
357 }
358
359 StringLocalizationInfo*
parse(UChar * _data,int32_t len)360 LocDataParser::parse(UChar* _data, int32_t len) {
361 if (U_FAILURE(ec)) {
362 if (_data) uprv_free(_data);
363 return NULL;
364 }
365
366 pe.line = 0;
367 pe.offset = -1;
368 pe.postContext[0] = 0;
369 pe.preContext[0] = 0;
370
371 if (_data == NULL) {
372 ec = U_ILLEGAL_ARGUMENT_ERROR;
373 return NULL;
374 }
375
376 if (len <= 0) {
377 ec = U_ILLEGAL_ARGUMENT_ERROR;
378 uprv_free(_data);
379 return NULL;
380 }
381
382 data = _data;
383 e = data + len;
384 p = _data;
385 ch = 0xffff;
386
387 return doParse();
388 }
389
390
391 StringLocalizationInfo*
doParse(void)392 LocDataParser::doParse(void) {
393 skipWhitespace();
394 if (!checkInc(OPEN_ANGLE)) {
395 ERROR("Missing open angle");
396 } else {
397 VArray array(DeleteFn);
398 UBool mightHaveNext = TRUE;
399 int32_t requiredLength = -1;
400 while (mightHaveNext) {
401 mightHaveNext = FALSE;
402 UChar** elem = nextArray(requiredLength);
403 skipWhitespace();
404 UBool haveComma = check(COMMA);
405 if (elem) {
406 array.add(elem, ec);
407 if (haveComma) {
408 inc();
409 mightHaveNext = TRUE;
410 }
411 } else if (haveComma) {
412 ERROR("Unexpected character");
413 }
414 }
415
416 skipWhitespace();
417 if (!checkInc(CLOSE_ANGLE)) {
418 if (check(OPEN_ANGLE)) {
419 ERROR("Missing comma in outer array");
420 } else {
421 ERROR("Missing close angle bracket in outer array");
422 }
423 }
424
425 skipWhitespace();
426 if (p != e) {
427 ERROR("Extra text after close of localization data");
428 }
429
430 array.add(NULL, ec);
431 if (U_SUCCESS(ec)) {
432 int32_t numLocs = array.length() - 2; // subtract first, NULL
433 UChar*** result = (UChar***)array.release();
434
435 return new StringLocalizationInfo(data, result, requiredLength-2, numLocs); // subtract first, NULL
436 }
437 }
438
439 ERROR("Unknown error");
440 }
441
442 UChar**
nextArray(int32_t & requiredLength)443 LocDataParser::nextArray(int32_t& requiredLength) {
444 if (U_FAILURE(ec)) {
445 return NULL;
446 }
447
448 skipWhitespace();
449 if (!checkInc(OPEN_ANGLE)) {
450 ERROR("Missing open angle");
451 }
452
453 VArray array;
454 UBool mightHaveNext = TRUE;
455 while (mightHaveNext) {
456 mightHaveNext = FALSE;
457 UChar* elem = nextString();
458 skipWhitespace();
459 UBool haveComma = check(COMMA);
460 if (elem) {
461 array.add(elem, ec);
462 if (haveComma) {
463 inc();
464 mightHaveNext = TRUE;
465 }
466 } else if (haveComma) {
467 ERROR("Unexpected comma");
468 }
469 }
470 skipWhitespace();
471 if (!checkInc(CLOSE_ANGLE)) {
472 if (check(OPEN_ANGLE)) {
473 ERROR("Missing close angle bracket in inner array");
474 } else {
475 ERROR("Missing comma in inner array");
476 }
477 }
478
479 array.add(NULL, ec);
480 if (U_SUCCESS(ec)) {
481 if (requiredLength == -1) {
482 requiredLength = array.length() + 1;
483 } else if (array.length() != requiredLength) {
484 ec = U_ILLEGAL_ARGUMENT_ERROR;
485 ERROR("Array not of required length");
486 }
487
488 return (UChar**)array.release();
489 }
490 ERROR("Unknown Error");
491 }
492
493 UChar*
nextString()494 LocDataParser::nextString() {
495 UChar* result = NULL;
496
497 skipWhitespace();
498 if (p < e) {
499 const UChar* terminators;
500 UChar c = *p;
501 UBool haveQuote = c == QUOTE || c == TICK;
502 if (haveQuote) {
503 inc();
504 terminators = c == QUOTE ? DQUOTE_STOPLIST : SQUOTE_STOPLIST;
505 } else {
506 terminators = NOQUOTE_STOPLIST;
507 }
508 UChar* start = p;
509 while (p < e && !inList(*p, terminators)) ++p;
510 if (p == e) {
511 ERROR("Unexpected end of data");
512 }
513
514 UChar x = *p;
515 if (p > start) {
516 ch = x;
517 *p = 0x0; // terminate by writing to data
518 result = start; // just point into data
519 }
520 if (haveQuote) {
521 if (x != c) {
522 ERROR("Missing matching quote");
523 } else if (p == start) {
524 ERROR("Empty string");
525 }
526 inc();
527 } else if (x == OPEN_ANGLE || x == TICK || x == QUOTE) {
528 ERROR("Unexpected character in string");
529 }
530 }
531
532 // ok for there to be no next string
533 return result;
534 }
535
parseError(const char * EXPLANATION_ARG)536 void LocDataParser::parseError(const char* EXPLANATION_ARG)
537 {
538 if (!data) {
539 return;
540 }
541
542 const UChar* start = p - U_PARSE_CONTEXT_LEN - 1;
543 if (start < data) {
544 start = data;
545 }
546 for (UChar* x = p; --x >= start;) {
547 if (!*x) {
548 start = x+1;
549 break;
550 }
551 }
552 const UChar* limit = p + U_PARSE_CONTEXT_LEN - 1;
553 if (limit > e) {
554 limit = e;
555 }
556 u_strncpy(pe.preContext, start, (int32_t)(p-start));
557 pe.preContext[p-start] = 0;
558 u_strncpy(pe.postContext, p, (int32_t)(limit-p));
559 pe.postContext[limit-p] = 0;
560 pe.offset = (int32_t)(p - data);
561
562 #ifdef RBNF_DEBUG
563 fprintf(stderr, "%s at or near character %ld: ", EXPLANATION_ARG, p-data);
564
565 UnicodeString msg;
566 msg.append(start, p - start);
567 msg.append((UChar)0x002f); /* SOLIDUS/SLASH */
568 msg.append(p, limit-p);
569 msg.append(UNICODE_STRING_SIMPLE("'"));
570
571 char buf[128];
572 int32_t len = msg.extract(0, msg.length(), buf, 128);
573 if (len >= 128) {
574 buf[127] = 0;
575 } else {
576 buf[len] = 0;
577 }
578 fprintf(stderr, "%s\n", buf);
579 fflush(stderr);
580 #endif
581
582 uprv_free(data);
583 data = NULL;
584 p = NULL;
585 e = NULL;
586
587 if (U_SUCCESS(ec)) {
588 ec = U_PARSE_ERROR;
589 }
590 }
591
592 //UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringLocalizationInfo)
593
594 StringLocalizationInfo*
create(const UnicodeString & info,UParseError & perror,UErrorCode & status)595 StringLocalizationInfo::create(const UnicodeString& info, UParseError& perror, UErrorCode& status) {
596 if (U_FAILURE(status)) {
597 return NULL;
598 }
599
600 int32_t len = info.length();
601 if (len == 0) {
602 return NULL; // no error;
603 }
604
605 UChar* p = (UChar*)uprv_malloc(len * sizeof(UChar));
606 if (!p) {
607 status = U_MEMORY_ALLOCATION_ERROR;
608 return NULL;
609 }
610 info.extract(p, len, status);
611 if (!U_FAILURE(status)) {
612 status = U_ZERO_ERROR; // clear warning about non-termination
613 }
614
615 LocDataParser parser(perror, status);
616 return parser.parse(p, len);
617 }
618
~StringLocalizationInfo()619 StringLocalizationInfo::~StringLocalizationInfo() {
620 for (UChar*** p = (UChar***)data; *p; ++p) {
621 // remaining data is simply pointer into our unicode string data.
622 if (*p) uprv_free(*p);
623 }
624 if (data) uprv_free(data);
625 if (info) uprv_free(info);
626 }
627
628
629 const UChar*
getRuleSetName(int32_t index) const630 StringLocalizationInfo::getRuleSetName(int32_t index) const {
631 if (index >= 0 && index < getNumberOfRuleSets()) {
632 return data[0][index];
633 }
634 return NULL;
635 }
636
637 const UChar*
getLocaleName(int32_t index) const638 StringLocalizationInfo::getLocaleName(int32_t index) const {
639 if (index >= 0 && index < getNumberOfDisplayLocales()) {
640 return data[index+1][0];
641 }
642 return NULL;
643 }
644
645 const UChar*
getDisplayName(int32_t localeIndex,int32_t ruleIndex) const646 StringLocalizationInfo::getDisplayName(int32_t localeIndex, int32_t ruleIndex) const {
647 if (localeIndex >= 0 && localeIndex < getNumberOfDisplayLocales() &&
648 ruleIndex >= 0 && ruleIndex < getNumberOfRuleSets()) {
649 return data[localeIndex+1][ruleIndex+1];
650 }
651 return NULL;
652 }
653
654 // ----------
655
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,const Locale & alocale,UParseError & perror,UErrorCode & status)656 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
657 const UnicodeString& locs,
658 const Locale& alocale, UParseError& perror, UErrorCode& status)
659 : ruleSets(NULL)
660 , ruleSetDescriptions(NULL)
661 , numRuleSets(0)
662 , defaultRuleSet(NULL)
663 , locale(alocale)
664 , collator(NULL)
665 , decimalFormatSymbols(NULL)
666 , defaultInfinityRule(NULL)
667 , defaultNaNRule(NULL)
668 , lenient(FALSE)
669 , lenientParseRules(NULL)
670 , localizations(NULL)
671 , capitalizationInfoSet(FALSE)
672 , capitalizationForUIListMenu(FALSE)
673 , capitalizationForStandAlone(FALSE)
674 , capitalizationBrkIter(NULL)
675 {
676 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
677 init(description, locinfo, perror, status);
678 }
679
RuleBasedNumberFormat(const UnicodeString & description,const UnicodeString & locs,UParseError & perror,UErrorCode & status)680 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
681 const UnicodeString& locs,
682 UParseError& perror, UErrorCode& status)
683 : ruleSets(NULL)
684 , ruleSetDescriptions(NULL)
685 , numRuleSets(0)
686 , defaultRuleSet(NULL)
687 , locale(Locale::getDefault())
688 , collator(NULL)
689 , decimalFormatSymbols(NULL)
690 , defaultInfinityRule(NULL)
691 , defaultNaNRule(NULL)
692 , lenient(FALSE)
693 , lenientParseRules(NULL)
694 , localizations(NULL)
695 , capitalizationInfoSet(FALSE)
696 , capitalizationForUIListMenu(FALSE)
697 , capitalizationForStandAlone(FALSE)
698 , capitalizationBrkIter(NULL)
699 {
700 LocalizationInfo* locinfo = StringLocalizationInfo::create(locs, perror, status);
701 init(description, locinfo, perror, status);
702 }
703
RuleBasedNumberFormat(const UnicodeString & description,LocalizationInfo * info,const Locale & alocale,UParseError & perror,UErrorCode & status)704 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
705 LocalizationInfo* info,
706 const Locale& alocale, UParseError& perror, UErrorCode& status)
707 : ruleSets(NULL)
708 , ruleSetDescriptions(NULL)
709 , numRuleSets(0)
710 , defaultRuleSet(NULL)
711 , locale(alocale)
712 , collator(NULL)
713 , decimalFormatSymbols(NULL)
714 , defaultInfinityRule(NULL)
715 , defaultNaNRule(NULL)
716 , lenient(FALSE)
717 , lenientParseRules(NULL)
718 , localizations(NULL)
719 , capitalizationInfoSet(FALSE)
720 , capitalizationForUIListMenu(FALSE)
721 , capitalizationForStandAlone(FALSE)
722 , capitalizationBrkIter(NULL)
723 {
724 init(description, info, perror, status);
725 }
726
RuleBasedNumberFormat(const UnicodeString & description,UParseError & perror,UErrorCode & status)727 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
728 UParseError& perror,
729 UErrorCode& status)
730 : ruleSets(NULL)
731 , ruleSetDescriptions(NULL)
732 , numRuleSets(0)
733 , defaultRuleSet(NULL)
734 , locale(Locale::getDefault())
735 , collator(NULL)
736 , decimalFormatSymbols(NULL)
737 , defaultInfinityRule(NULL)
738 , defaultNaNRule(NULL)
739 , lenient(FALSE)
740 , lenientParseRules(NULL)
741 , localizations(NULL)
742 , capitalizationInfoSet(FALSE)
743 , capitalizationForUIListMenu(FALSE)
744 , capitalizationForStandAlone(FALSE)
745 , capitalizationBrkIter(NULL)
746 {
747 init(description, NULL, perror, status);
748 }
749
RuleBasedNumberFormat(const UnicodeString & description,const Locale & aLocale,UParseError & perror,UErrorCode & status)750 RuleBasedNumberFormat::RuleBasedNumberFormat(const UnicodeString& description,
751 const Locale& aLocale,
752 UParseError& perror,
753 UErrorCode& status)
754 : ruleSets(NULL)
755 , ruleSetDescriptions(NULL)
756 , numRuleSets(0)
757 , defaultRuleSet(NULL)
758 , locale(aLocale)
759 , collator(NULL)
760 , decimalFormatSymbols(NULL)
761 , defaultInfinityRule(NULL)
762 , defaultNaNRule(NULL)
763 , lenient(FALSE)
764 , lenientParseRules(NULL)
765 , localizations(NULL)
766 , capitalizationInfoSet(FALSE)
767 , capitalizationForUIListMenu(FALSE)
768 , capitalizationForStandAlone(FALSE)
769 , capitalizationBrkIter(NULL)
770 {
771 init(description, NULL, perror, status);
772 }
773
RuleBasedNumberFormat(URBNFRuleSetTag tag,const Locale & alocale,UErrorCode & status)774 RuleBasedNumberFormat::RuleBasedNumberFormat(URBNFRuleSetTag tag, const Locale& alocale, UErrorCode& status)
775 : ruleSets(NULL)
776 , ruleSetDescriptions(NULL)
777 , numRuleSets(0)
778 , defaultRuleSet(NULL)
779 , locale(alocale)
780 , collator(NULL)
781 , decimalFormatSymbols(NULL)
782 , defaultInfinityRule(NULL)
783 , defaultNaNRule(NULL)
784 , lenient(FALSE)
785 , lenientParseRules(NULL)
786 , localizations(NULL)
787 , capitalizationInfoSet(FALSE)
788 , capitalizationForUIListMenu(FALSE)
789 , capitalizationForStandAlone(FALSE)
790 , capitalizationBrkIter(NULL)
791 {
792 if (U_FAILURE(status)) {
793 return;
794 }
795
796 const char* rules_tag = "RBNFRules";
797 const char* fmt_tag = "";
798 switch (tag) {
799 case URBNF_SPELLOUT: fmt_tag = "SpelloutRules"; break;
800 case URBNF_ORDINAL: fmt_tag = "OrdinalRules"; break;
801 case URBNF_DURATION: fmt_tag = "DurationRules"; break;
802 case URBNF_NUMBERING_SYSTEM: fmt_tag = "NumberingSystemRules"; break;
803 default: status = U_ILLEGAL_ARGUMENT_ERROR; return;
804 }
805
806 // TODO: read localization info from resource
807 LocalizationInfo* locinfo = NULL;
808
809 UResourceBundle* nfrb = ures_open(U_ICUDATA_RBNF, locale.getName(), &status);
810 if (U_SUCCESS(status)) {
811 setLocaleIDs(ures_getLocaleByType(nfrb, ULOC_VALID_LOCALE, &status),
812 ures_getLocaleByType(nfrb, ULOC_ACTUAL_LOCALE, &status));
813
814 UResourceBundle* rbnfRules = ures_getByKeyWithFallback(nfrb, rules_tag, NULL, &status);
815 if (U_FAILURE(status)) {
816 ures_close(nfrb);
817 }
818 UResourceBundle* ruleSets = ures_getByKeyWithFallback(rbnfRules, fmt_tag, NULL, &status);
819 if (U_FAILURE(status)) {
820 ures_close(rbnfRules);
821 ures_close(nfrb);
822 return;
823 }
824
825 UnicodeString desc;
826 while (ures_hasNext(ruleSets)) {
827 desc.append(ures_getNextUnicodeString(ruleSets,NULL,&status));
828 }
829 UParseError perror;
830
831 init(desc, locinfo, perror, status);
832
833 ures_close(ruleSets);
834 ures_close(rbnfRules);
835 }
836 ures_close(nfrb);
837 }
838
RuleBasedNumberFormat(const RuleBasedNumberFormat & rhs)839 RuleBasedNumberFormat::RuleBasedNumberFormat(const RuleBasedNumberFormat& rhs)
840 : NumberFormat(rhs)
841 , ruleSets(NULL)
842 , ruleSetDescriptions(NULL)
843 , numRuleSets(0)
844 , defaultRuleSet(NULL)
845 , locale(rhs.locale)
846 , collator(NULL)
847 , decimalFormatSymbols(NULL)
848 , defaultInfinityRule(NULL)
849 , defaultNaNRule(NULL)
850 , lenient(FALSE)
851 , lenientParseRules(NULL)
852 , localizations(NULL)
853 , capitalizationInfoSet(FALSE)
854 , capitalizationForUIListMenu(FALSE)
855 , capitalizationForStandAlone(FALSE)
856 , capitalizationBrkIter(NULL)
857 {
858 this->operator=(rhs);
859 }
860
861 // --------
862
863 RuleBasedNumberFormat&
operator =(const RuleBasedNumberFormat & rhs)864 RuleBasedNumberFormat::operator=(const RuleBasedNumberFormat& rhs)
865 {
866 if (this == &rhs) {
867 return *this;
868 }
869 NumberFormat::operator=(rhs);
870 UErrorCode status = U_ZERO_ERROR;
871 dispose();
872 locale = rhs.locale;
873 lenient = rhs.lenient;
874
875 UParseError perror;
876 setDecimalFormatSymbols(*rhs.getDecimalFormatSymbols());
877 init(rhs.originalDescription, rhs.localizations ? rhs.localizations->ref() : NULL, perror, status);
878 setDefaultRuleSet(rhs.getDefaultRuleSetName(), status);
879
880 capitalizationInfoSet = rhs.capitalizationInfoSet;
881 capitalizationForUIListMenu = rhs.capitalizationForUIListMenu;
882 capitalizationForStandAlone = rhs.capitalizationForStandAlone;
883 #if !UCONFIG_NO_BREAK_ITERATION
884 capitalizationBrkIter = (rhs.capitalizationBrkIter!=NULL)? rhs.capitalizationBrkIter->clone(): NULL;
885 #endif
886
887 return *this;
888 }
889
~RuleBasedNumberFormat()890 RuleBasedNumberFormat::~RuleBasedNumberFormat()
891 {
892 dispose();
893 }
894
895 Format*
clone(void) const896 RuleBasedNumberFormat::clone(void) const
897 {
898 return new RuleBasedNumberFormat(*this);
899 }
900
901 UBool
operator ==(const Format & other) const902 RuleBasedNumberFormat::operator==(const Format& other) const
903 {
904 if (this == &other) {
905 return TRUE;
906 }
907
908 if (typeid(*this) == typeid(other)) {
909 const RuleBasedNumberFormat& rhs = (const RuleBasedNumberFormat&)other;
910 // test for capitalization info equality is adequately handled
911 // by the NumberFormat test for fCapitalizationContext equality;
912 // the info here is just derived from that.
913 if (locale == rhs.locale &&
914 lenient == rhs.lenient &&
915 (localizations == NULL
916 ? rhs.localizations == NULL
917 : (rhs.localizations == NULL
918 ? FALSE
919 : *localizations == rhs.localizations))) {
920
921 NFRuleSet** p = ruleSets;
922 NFRuleSet** q = rhs.ruleSets;
923 if (p == NULL) {
924 return q == NULL;
925 } else if (q == NULL) {
926 return FALSE;
927 }
928 while (*p && *q && (**p == **q)) {
929 ++p;
930 ++q;
931 }
932 return *q == NULL && *p == NULL;
933 }
934 }
935
936 return FALSE;
937 }
938
939 UnicodeString
getRules() const940 RuleBasedNumberFormat::getRules() const
941 {
942 UnicodeString result;
943 if (ruleSets != NULL) {
944 for (NFRuleSet** p = ruleSets; *p; ++p) {
945 (*p)->appendRules(result);
946 }
947 }
948 return result;
949 }
950
951 UnicodeString
getRuleSetName(int32_t index) const952 RuleBasedNumberFormat::getRuleSetName(int32_t index) const
953 {
954 if (localizations) {
955 UnicodeString string(TRUE, localizations->getRuleSetName(index), (int32_t)-1);
956 return string;
957 }
958 else if (ruleSets) {
959 UnicodeString result;
960 for (NFRuleSet** p = ruleSets; *p; ++p) {
961 NFRuleSet* rs = *p;
962 if (rs->isPublic()) {
963 if (--index == -1) {
964 rs->getName(result);
965 return result;
966 }
967 }
968 }
969 }
970 UnicodeString empty;
971 return empty;
972 }
973
974 int32_t
getNumberOfRuleSetNames() const975 RuleBasedNumberFormat::getNumberOfRuleSetNames() const
976 {
977 int32_t result = 0;
978 if (localizations) {
979 result = localizations->getNumberOfRuleSets();
980 }
981 else if (ruleSets) {
982 for (NFRuleSet** p = ruleSets; *p; ++p) {
983 if ((**p).isPublic()) {
984 ++result;
985 }
986 }
987 }
988 return result;
989 }
990
991 int32_t
getNumberOfRuleSetDisplayNameLocales(void) const992 RuleBasedNumberFormat::getNumberOfRuleSetDisplayNameLocales(void) const {
993 if (localizations) {
994 return localizations->getNumberOfDisplayLocales();
995 }
996 return 0;
997 }
998
999 Locale
getRuleSetDisplayNameLocale(int32_t index,UErrorCode & status) const1000 RuleBasedNumberFormat::getRuleSetDisplayNameLocale(int32_t index, UErrorCode& status) const {
1001 if (U_FAILURE(status)) {
1002 return Locale("");
1003 }
1004 if (localizations && index >= 0 && index < localizations->getNumberOfDisplayLocales()) {
1005 UnicodeString name(TRUE, localizations->getLocaleName(index), -1);
1006 char buffer[64];
1007 int32_t cap = name.length() + 1;
1008 char* bp = buffer;
1009 if (cap > 64) {
1010 bp = (char *)uprv_malloc(cap);
1011 if (bp == NULL) {
1012 status = U_MEMORY_ALLOCATION_ERROR;
1013 return Locale("");
1014 }
1015 }
1016 name.extract(0, name.length(), bp, cap, UnicodeString::kInvariant);
1017 Locale retLocale(bp);
1018 if (bp != buffer) {
1019 uprv_free(bp);
1020 }
1021 return retLocale;
1022 }
1023 status = U_ILLEGAL_ARGUMENT_ERROR;
1024 Locale retLocale;
1025 return retLocale;
1026 }
1027
1028 UnicodeString
getRuleSetDisplayName(int32_t index,const Locale & localeParam)1029 RuleBasedNumberFormat::getRuleSetDisplayName(int32_t index, const Locale& localeParam) {
1030 if (localizations && index >= 0 && index < localizations->getNumberOfRuleSets()) {
1031 UnicodeString localeName(localeParam.getBaseName(), -1, UnicodeString::kInvariant);
1032 int32_t len = localeName.length();
1033 UChar* localeStr = localeName.getBuffer(len + 1);
1034 while (len >= 0) {
1035 localeStr[len] = 0;
1036 int32_t ix = localizations->indexForLocale(localeStr);
1037 if (ix >= 0) {
1038 UnicodeString name(TRUE, localizations->getDisplayName(ix, index), -1);
1039 return name;
1040 }
1041
1042 // trim trailing portion, skipping over ommitted sections
1043 do { --len;} while (len > 0 && localeStr[len] != 0x005f); // underscore
1044 while (len > 0 && localeStr[len-1] == 0x005F) --len;
1045 }
1046 UnicodeString name(TRUE, localizations->getRuleSetName(index), -1);
1047 return name;
1048 }
1049 UnicodeString bogus;
1050 bogus.setToBogus();
1051 return bogus;
1052 }
1053
1054 UnicodeString
getRuleSetDisplayName(const UnicodeString & ruleSetName,const Locale & localeParam)1055 RuleBasedNumberFormat::getRuleSetDisplayName(const UnicodeString& ruleSetName, const Locale& localeParam) {
1056 if (localizations) {
1057 UnicodeString rsn(ruleSetName);
1058 int32_t ix = localizations->indexForRuleSet(rsn.getTerminatedBuffer());
1059 return getRuleSetDisplayName(ix, localeParam);
1060 }
1061 UnicodeString bogus;
1062 bogus.setToBogus();
1063 return bogus;
1064 }
1065
1066 NFRuleSet*
findRuleSet(const UnicodeString & name,UErrorCode & status) const1067 RuleBasedNumberFormat::findRuleSet(const UnicodeString& name, UErrorCode& status) const
1068 {
1069 if (U_SUCCESS(status) && ruleSets) {
1070 for (NFRuleSet** p = ruleSets; *p; ++p) {
1071 NFRuleSet* rs = *p;
1072 if (rs->isNamed(name)) {
1073 return rs;
1074 }
1075 }
1076 status = U_ILLEGAL_ARGUMENT_ERROR;
1077 }
1078 return NULL;
1079 }
1080
1081 UnicodeString&
format(int32_t number,UnicodeString & toAppendTo,FieldPosition &) const1082 RuleBasedNumberFormat::format(int32_t number,
1083 UnicodeString& toAppendTo,
1084 FieldPosition& /* pos */) const
1085 {
1086 if (defaultRuleSet) {
1087 UErrorCode status = U_ZERO_ERROR;
1088 int32_t startPos = toAppendTo.length();
1089 defaultRuleSet->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1090 adjustForCapitalizationContext(startPos, toAppendTo);
1091 }
1092 return toAppendTo;
1093 }
1094
1095
1096 UnicodeString&
format(int64_t number,UnicodeString & toAppendTo,FieldPosition &) const1097 RuleBasedNumberFormat::format(int64_t number,
1098 UnicodeString& toAppendTo,
1099 FieldPosition& /* pos */) const
1100 {
1101 if (defaultRuleSet) {
1102 UErrorCode status = U_ZERO_ERROR;
1103 int32_t startPos = toAppendTo.length();
1104 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1105 adjustForCapitalizationContext(startPos, toAppendTo);
1106 }
1107 return toAppendTo;
1108 }
1109
1110
1111 UnicodeString&
format(double number,UnicodeString & toAppendTo,FieldPosition &) const1112 RuleBasedNumberFormat::format(double number,
1113 UnicodeString& toAppendTo,
1114 FieldPosition& /* pos */) const
1115 {
1116 int32_t startPos = toAppendTo.length();
1117 if (defaultRuleSet) {
1118 UErrorCode status = U_ZERO_ERROR;
1119 defaultRuleSet->format(number, toAppendTo, toAppendTo.length(), 0, status);
1120 }
1121 return adjustForCapitalizationContext(startPos, toAppendTo);
1122 }
1123
1124
1125 UnicodeString&
format(int32_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1126 RuleBasedNumberFormat::format(int32_t number,
1127 const UnicodeString& ruleSetName,
1128 UnicodeString& toAppendTo,
1129 FieldPosition& /* pos */,
1130 UErrorCode& status) const
1131 {
1132 // return format((int64_t)number, ruleSetName, toAppendTo, pos, status);
1133 if (U_SUCCESS(status)) {
1134 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1135 // throw new IllegalArgumentException("Can't use internal rule set");
1136 status = U_ILLEGAL_ARGUMENT_ERROR;
1137 } else {
1138 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1139 if (rs) {
1140 int32_t startPos = toAppendTo.length();
1141 rs->format((int64_t)number, toAppendTo, toAppendTo.length(), 0, status);
1142 adjustForCapitalizationContext(startPos, toAppendTo);
1143 }
1144 }
1145 }
1146 return toAppendTo;
1147 }
1148
1149
1150 UnicodeString&
format(int64_t number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1151 RuleBasedNumberFormat::format(int64_t number,
1152 const UnicodeString& ruleSetName,
1153 UnicodeString& toAppendTo,
1154 FieldPosition& /* pos */,
1155 UErrorCode& status) const
1156 {
1157 if (U_SUCCESS(status)) {
1158 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1159 // throw new IllegalArgumentException("Can't use internal rule set");
1160 status = U_ILLEGAL_ARGUMENT_ERROR;
1161 } else {
1162 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1163 if (rs) {
1164 int32_t startPos = toAppendTo.length();
1165 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1166 adjustForCapitalizationContext(startPos, toAppendTo);
1167 }
1168 }
1169 }
1170 return toAppendTo;
1171 }
1172
1173
1174 UnicodeString&
format(double number,const UnicodeString & ruleSetName,UnicodeString & toAppendTo,FieldPosition &,UErrorCode & status) const1175 RuleBasedNumberFormat::format(double number,
1176 const UnicodeString& ruleSetName,
1177 UnicodeString& toAppendTo,
1178 FieldPosition& /* pos */,
1179 UErrorCode& status) const
1180 {
1181 if (U_SUCCESS(status)) {
1182 if (ruleSetName.indexOf(gPercentPercent, 2, 0) == 0) {
1183 // throw new IllegalArgumentException("Can't use internal rule set");
1184 status = U_ILLEGAL_ARGUMENT_ERROR;
1185 } else {
1186 NFRuleSet *rs = findRuleSet(ruleSetName, status);
1187 if (rs) {
1188 int32_t startPos = toAppendTo.length();
1189 rs->format(number, toAppendTo, toAppendTo.length(), 0, status);
1190 adjustForCapitalizationContext(startPos, toAppendTo);
1191 }
1192 }
1193 }
1194 return toAppendTo;
1195 }
1196
1197 UnicodeString&
adjustForCapitalizationContext(int32_t startPos,UnicodeString & currentResult) const1198 RuleBasedNumberFormat::adjustForCapitalizationContext(int32_t startPos,
1199 UnicodeString& currentResult) const
1200 {
1201 #if !UCONFIG_NO_BREAK_ITERATION
1202 if (startPos==0 && currentResult.length() > 0) {
1203 // capitalize currentResult according to context
1204 UChar32 ch = currentResult.char32At(0);
1205 UErrorCode status = U_ZERO_ERROR;
1206 UDisplayContext capitalizationContext = getContext(UDISPCTX_TYPE_CAPITALIZATION, status);
1207 if ( u_islower(ch) && U_SUCCESS(status) && capitalizationBrkIter!= NULL &&
1208 ( capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1209 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1210 (capitalizationContext==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1211 // titlecase first word of currentResult, here use sentence iterator unlike current implementations
1212 // in LocaleDisplayNamesImpl::adjustForUsageAndContext and RelativeDateFormat::format
1213 currentResult.toTitle(capitalizationBrkIter, locale, U_TITLECASE_NO_LOWERCASE | U_TITLECASE_NO_BREAK_ADJUSTMENT);
1214 }
1215 }
1216 #endif
1217 return currentResult;
1218 }
1219
1220
1221 void
parse(const UnicodeString & text,Formattable & result,ParsePosition & parsePosition) const1222 RuleBasedNumberFormat::parse(const UnicodeString& text,
1223 Formattable& result,
1224 ParsePosition& parsePosition) const
1225 {
1226 if (!ruleSets) {
1227 parsePosition.setErrorIndex(0);
1228 return;
1229 }
1230
1231 UnicodeString workingText(text, parsePosition.getIndex());
1232 ParsePosition workingPos(0);
1233
1234 ParsePosition high_pp(0);
1235 Formattable high_result;
1236
1237 for (NFRuleSet** p = ruleSets; *p; ++p) {
1238 NFRuleSet *rp = *p;
1239 if (rp->isPublic() && rp->isParseable()) {
1240 ParsePosition working_pp(0);
1241 Formattable working_result;
1242
1243 rp->parse(workingText, working_pp, kMaxDouble, working_result);
1244 if (working_pp.getIndex() > high_pp.getIndex()) {
1245 high_pp = working_pp;
1246 high_result = working_result;
1247
1248 if (high_pp.getIndex() == workingText.length()) {
1249 break;
1250 }
1251 }
1252 }
1253 }
1254
1255 int32_t startIndex = parsePosition.getIndex();
1256 parsePosition.setIndex(startIndex + high_pp.getIndex());
1257 if (high_pp.getIndex() > 0) {
1258 parsePosition.setErrorIndex(-1);
1259 } else {
1260 int32_t errorIndex = (high_pp.getErrorIndex()>0)? high_pp.getErrorIndex(): 0;
1261 parsePosition.setErrorIndex(startIndex + errorIndex);
1262 }
1263 result = high_result;
1264 if (result.getType() == Formattable::kDouble) {
1265 double d = result.getDouble();
1266 if (!uprv_isNaN(d) && d == uprv_trunc(d) && INT32_MIN <= d && d <= INT32_MAX) {
1267 // Note: casting a double to an int when the double is too large or small
1268 // to fit the destination is undefined behavior. The explicit range checks,
1269 // above, are required. Just casting and checking the result value is undefined.
1270 result.setLong(static_cast<int32_t>(d));
1271 }
1272 }
1273 }
1274
1275 #if !UCONFIG_NO_COLLATION
1276
1277 void
setLenient(UBool enabled)1278 RuleBasedNumberFormat::setLenient(UBool enabled)
1279 {
1280 lenient = enabled;
1281 if (!enabled && collator) {
1282 delete collator;
1283 collator = NULL;
1284 }
1285 }
1286
1287 #endif
1288
1289 void
setDefaultRuleSet(const UnicodeString & ruleSetName,UErrorCode & status)1290 RuleBasedNumberFormat::setDefaultRuleSet(const UnicodeString& ruleSetName, UErrorCode& status) {
1291 if (U_SUCCESS(status)) {
1292 if (ruleSetName.isEmpty()) {
1293 if (localizations) {
1294 UnicodeString name(TRUE, localizations->getRuleSetName(0), -1);
1295 defaultRuleSet = findRuleSet(name, status);
1296 } else {
1297 initDefaultRuleSet();
1298 }
1299 } else if (ruleSetName.startsWith(UNICODE_STRING_SIMPLE("%%"))) {
1300 status = U_ILLEGAL_ARGUMENT_ERROR;
1301 } else {
1302 NFRuleSet* result = findRuleSet(ruleSetName, status);
1303 if (result != NULL) {
1304 defaultRuleSet = result;
1305 }
1306 }
1307 }
1308 }
1309
1310 UnicodeString
getDefaultRuleSetName() const1311 RuleBasedNumberFormat::getDefaultRuleSetName() const {
1312 UnicodeString result;
1313 if (defaultRuleSet && defaultRuleSet->isPublic()) {
1314 defaultRuleSet->getName(result);
1315 } else {
1316 result.setToBogus();
1317 }
1318 return result;
1319 }
1320
1321 void
initDefaultRuleSet()1322 RuleBasedNumberFormat::initDefaultRuleSet()
1323 {
1324 defaultRuleSet = NULL;
1325 if (!ruleSets) {
1326 return;
1327 }
1328
1329 const UnicodeString spellout(UNICODE_STRING_SIMPLE("%spellout-numbering"));
1330 const UnicodeString ordinal(UNICODE_STRING_SIMPLE("%digits-ordinal"));
1331 const UnicodeString duration(UNICODE_STRING_SIMPLE("%duration"));
1332
1333 NFRuleSet**p = &ruleSets[0];
1334 while (*p) {
1335 if ((*p)->isNamed(spellout) || (*p)->isNamed(ordinal) || (*p)->isNamed(duration)) {
1336 defaultRuleSet = *p;
1337 return;
1338 } else {
1339 ++p;
1340 }
1341 }
1342
1343 defaultRuleSet = *--p;
1344 if (!defaultRuleSet->isPublic()) {
1345 while (p != ruleSets) {
1346 if ((*--p)->isPublic()) {
1347 defaultRuleSet = *p;
1348 break;
1349 }
1350 }
1351 }
1352 }
1353
1354
1355 void
init(const UnicodeString & rules,LocalizationInfo * localizationInfos,UParseError & pErr,UErrorCode & status)1356 RuleBasedNumberFormat::init(const UnicodeString& rules, LocalizationInfo* localizationInfos,
1357 UParseError& pErr, UErrorCode& status)
1358 {
1359 // TODO: implement UParseError
1360 uprv_memset(&pErr, 0, sizeof(UParseError));
1361 // Note: this can leave ruleSets == NULL, so remaining code should check
1362 if (U_FAILURE(status)) {
1363 return;
1364 }
1365
1366 initializeDecimalFormatSymbols(status);
1367 initializeDefaultInfinityRule(status);
1368 initializeDefaultNaNRule(status);
1369 if (U_FAILURE(status)) {
1370 return;
1371 }
1372
1373 this->localizations = localizationInfos == NULL ? NULL : localizationInfos->ref();
1374
1375 UnicodeString description(rules);
1376 if (!description.length()) {
1377 status = U_MEMORY_ALLOCATION_ERROR;
1378 return;
1379 }
1380
1381 // start by stripping the trailing whitespace from all the rules
1382 // (this is all the whitespace follwing each semicolon in the
1383 // description). This allows us to look for rule-set boundaries
1384 // by searching for ";%" without having to worry about whitespace
1385 // between the ; and the %
1386 stripWhitespace(description);
1387
1388 // check to see if there's a set of lenient-parse rules. If there
1389 // is, pull them out into our temporary holding place for them,
1390 // and delete them from the description before the real desciption-
1391 // parsing code sees them
1392 int32_t lp = description.indexOf(gLenientParse, -1, 0);
1393 if (lp != -1) {
1394 // we've got to make sure we're not in the middle of a rule
1395 // (where "%%lenient-parse" would actually get treated as
1396 // rule text)
1397 if (lp == 0 || description.charAt(lp - 1) == gSemiColon) {
1398 // locate the beginning and end of the actual collation
1399 // rules (there may be whitespace between the name and
1400 // the first token in the description)
1401 int lpEnd = description.indexOf(gSemiPercent, 2, lp);
1402
1403 if (lpEnd == -1) {
1404 lpEnd = description.length() - 1;
1405 }
1406 int lpStart = lp + u_strlen(gLenientParse);
1407 while (PatternProps::isWhiteSpace(description.charAt(lpStart))) {
1408 ++lpStart;
1409 }
1410
1411 // copy out the lenient-parse rules and delete them
1412 // from the description
1413 lenientParseRules = new UnicodeString();
1414 /* test for NULL */
1415 if (lenientParseRules == 0) {
1416 status = U_MEMORY_ALLOCATION_ERROR;
1417 return;
1418 }
1419 lenientParseRules->setTo(description, lpStart, lpEnd - lpStart);
1420
1421 description.remove(lp, lpEnd + 1 - lp);
1422 }
1423 }
1424
1425 // pre-flight parsing the description and count the number of
1426 // rule sets (";%" marks the end of one rule set and the beginning
1427 // of the next)
1428 numRuleSets = 0;
1429 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, p)) {
1430 ++numRuleSets;
1431 ++p;
1432 }
1433 ++numRuleSets;
1434
1435 // our rule list is an array of the appropriate size
1436 ruleSets = (NFRuleSet **)uprv_malloc((numRuleSets + 1) * sizeof(NFRuleSet *));
1437 /* test for NULL */
1438 if (ruleSets == 0) {
1439 status = U_MEMORY_ALLOCATION_ERROR;
1440 return;
1441 }
1442
1443 for (int i = 0; i <= numRuleSets; ++i) {
1444 ruleSets[i] = NULL;
1445 }
1446
1447 // divide up the descriptions into individual rule-set descriptions
1448 // and store them in a temporary array. At each step, we also
1449 // new up a rule set, but all this does is initialize its name
1450 // and remove it from its description. We can't actually parse
1451 // the rest of the descriptions and finish initializing everything
1452 // because we have to know the names and locations of all the rule
1453 // sets before we can actually set everything up
1454 if(!numRuleSets) {
1455 status = U_ILLEGAL_ARGUMENT_ERROR;
1456 return;
1457 }
1458
1459 ruleSetDescriptions = new UnicodeString[numRuleSets];
1460 if (ruleSetDescriptions == 0) {
1461 status = U_MEMORY_ALLOCATION_ERROR;
1462 return;
1463 }
1464
1465 {
1466 int curRuleSet = 0;
1467 int32_t start = 0;
1468 for (int32_t p = description.indexOf(gSemiPercent, 2, 0); p != -1; p = description.indexOf(gSemiPercent, 2, start)) {
1469 ruleSetDescriptions[curRuleSet].setTo(description, start, p + 1 - start);
1470 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1471 if (ruleSets[curRuleSet] == 0) {
1472 status = U_MEMORY_ALLOCATION_ERROR;
1473 return;
1474 }
1475 ++curRuleSet;
1476 start = p + 1;
1477 }
1478 ruleSetDescriptions[curRuleSet].setTo(description, start, description.length() - start);
1479 ruleSets[curRuleSet] = new NFRuleSet(this, ruleSetDescriptions, curRuleSet, status);
1480 if (ruleSets[curRuleSet] == 0) {
1481 status = U_MEMORY_ALLOCATION_ERROR;
1482 return;
1483 }
1484 }
1485
1486 // now we can take note of the formatter's default rule set, which
1487 // is the last public rule set in the description (it's the last
1488 // rather than the first so that a user can create a new formatter
1489 // from an existing formatter and change its default behavior just
1490 // by appending more rule sets to the end)
1491
1492 // {dlf} Initialization of a fraction rule set requires the default rule
1493 // set to be known. For purposes of initialization, this is always the
1494 // last public rule set, no matter what the localization data says.
1495 initDefaultRuleSet();
1496
1497 // finally, we can go back through the temporary descriptions
1498 // list and finish seting up the substructure (and we throw
1499 // away the temporary descriptions as we go)
1500 {
1501 for (int i = 0; i < numRuleSets; i++) {
1502 ruleSets[i]->parseRules(ruleSetDescriptions[i], status);
1503 }
1504 }
1505
1506 // Now that the rules are initialized, the 'real' default rule
1507 // set can be adjusted by the localization data.
1508
1509 // The C code keeps the localization array as is, rather than building
1510 // a separate array of the public rule set names, so we have less work
1511 // to do here-- but we still need to check the names.
1512
1513 if (localizationInfos) {
1514 // confirm the names, if any aren't in the rules, that's an error
1515 // it is ok if the rules contain public rule sets that are not in this list
1516 for (int32_t i = 0; i < localizationInfos->getNumberOfRuleSets(); ++i) {
1517 UnicodeString name(TRUE, localizationInfos->getRuleSetName(i), -1);
1518 NFRuleSet* rs = findRuleSet(name, status);
1519 if (rs == NULL) {
1520 break; // error
1521 }
1522 if (i == 0) {
1523 defaultRuleSet = rs;
1524 }
1525 }
1526 } else {
1527 defaultRuleSet = getDefaultRuleSet();
1528 }
1529 originalDescription = rules;
1530 }
1531
1532 // override the NumberFormat implementation in order to
1533 // lazily initialize relevant items
1534 void
setContext(UDisplayContext value,UErrorCode & status)1535 RuleBasedNumberFormat::setContext(UDisplayContext value, UErrorCode& status)
1536 {
1537 NumberFormat::setContext(value, status);
1538 if (U_SUCCESS(status)) {
1539 if (!capitalizationInfoSet &&
1540 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU || value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE)) {
1541 initCapitalizationContextInfo(locale);
1542 capitalizationInfoSet = TRUE;
1543 }
1544 #if !UCONFIG_NO_BREAK_ITERATION
1545 if ( capitalizationBrkIter == NULL && (value==UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
1546 (value==UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForUIListMenu) ||
1547 (value==UDISPCTX_CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone)) ) {
1548 UErrorCode status = U_ZERO_ERROR;
1549 capitalizationBrkIter = BreakIterator::createSentenceInstance(locale, status);
1550 if (U_FAILURE(status)) {
1551 delete capitalizationBrkIter;
1552 capitalizationBrkIter = NULL;
1553 }
1554 }
1555 #endif
1556 }
1557 }
1558
1559 void
initCapitalizationContextInfo(const Locale & thelocale)1560 RuleBasedNumberFormat::initCapitalizationContextInfo(const Locale& thelocale)
1561 {
1562 #if !UCONFIG_NO_BREAK_ITERATION
1563 const char * localeID = (thelocale != NULL)? thelocale.getBaseName(): NULL;
1564 UErrorCode status = U_ZERO_ERROR;
1565 UResourceBundle *rb = ures_open(NULL, localeID, &status);
1566 rb = ures_getByKeyWithFallback(rb, "contextTransforms", rb, &status);
1567 rb = ures_getByKeyWithFallback(rb, "number-spellout", rb, &status);
1568 if (U_SUCCESS(status) && rb != NULL) {
1569 int32_t len = 0;
1570 const int32_t * intVector = ures_getIntVector(rb, &len, &status);
1571 if (U_SUCCESS(status) && intVector != NULL && len >= 2) {
1572 capitalizationForUIListMenu = intVector[0];
1573 capitalizationForStandAlone = intVector[1];
1574 }
1575 }
1576 ures_close(rb);
1577 #endif
1578 }
1579
1580 void
stripWhitespace(UnicodeString & description)1581 RuleBasedNumberFormat::stripWhitespace(UnicodeString& description)
1582 {
1583 // iterate through the characters...
1584 UnicodeString result;
1585
1586 int start = 0;
1587 while (start != -1 && start < description.length()) {
1588 // seek to the first non-whitespace character...
1589 while (start < description.length()
1590 && PatternProps::isWhiteSpace(description.charAt(start))) {
1591 ++start;
1592 }
1593
1594 // locate the next semicolon in the text and copy the text from
1595 // our current position up to that semicolon into the result
1596 int32_t p = description.indexOf(gSemiColon, start);
1597 if (p == -1) {
1598 // or if we don't find a semicolon, just copy the rest of
1599 // the string into the result
1600 result.append(description, start, description.length() - start);
1601 start = -1;
1602 }
1603 else if (p < description.length()) {
1604 result.append(description, start, p + 1 - start);
1605 start = p + 1;
1606 }
1607
1608 // when we get here, we've seeked off the end of the sring, and
1609 // we terminate the loop (we continue until *start* is -1 rather
1610 // than until *p* is -1, because otherwise we'd miss the last
1611 // rule in the description)
1612 else {
1613 start = -1;
1614 }
1615 }
1616
1617 description.setTo(result);
1618 }
1619
1620
1621 void
dispose()1622 RuleBasedNumberFormat::dispose()
1623 {
1624 if (ruleSets) {
1625 for (NFRuleSet** p = ruleSets; *p; ++p) {
1626 delete *p;
1627 }
1628 uprv_free(ruleSets);
1629 ruleSets = NULL;
1630 }
1631
1632 if (ruleSetDescriptions) {
1633 delete [] ruleSetDescriptions;
1634 ruleSetDescriptions = NULL;
1635 }
1636
1637 #if !UCONFIG_NO_COLLATION
1638 delete collator;
1639 #endif
1640 collator = NULL;
1641
1642 delete decimalFormatSymbols;
1643 decimalFormatSymbols = NULL;
1644
1645 delete defaultInfinityRule;
1646 defaultInfinityRule = NULL;
1647
1648 delete defaultNaNRule;
1649 defaultNaNRule = NULL;
1650
1651 delete lenientParseRules;
1652 lenientParseRules = NULL;
1653
1654 #if !UCONFIG_NO_BREAK_ITERATION
1655 delete capitalizationBrkIter;
1656 capitalizationBrkIter = NULL;
1657 #endif
1658
1659 if (localizations) {
1660 localizations = localizations->unref();
1661 }
1662 }
1663
1664
1665 //-----------------------------------------------------------------------
1666 // package-internal API
1667 //-----------------------------------------------------------------------
1668
1669 /**
1670 * Returns the collator to use for lenient parsing. The collator is lazily created:
1671 * this function creates it the first time it's called.
1672 * @return The collator to use for lenient parsing, or null if lenient parsing
1673 * is turned off.
1674 */
1675 const RuleBasedCollator*
getCollator() const1676 RuleBasedNumberFormat::getCollator() const
1677 {
1678 #if !UCONFIG_NO_COLLATION
1679 if (!ruleSets) {
1680 return NULL;
1681 }
1682
1683 // lazy-evaluate the collator
1684 if (collator == NULL && lenient) {
1685 // create a default collator based on the formatter's locale,
1686 // then pull out that collator's rules, append any additional
1687 // rules specified in the description, and create a _new_
1688 // collator based on the combinaiton of those rules
1689
1690 UErrorCode status = U_ZERO_ERROR;
1691
1692 Collator* temp = Collator::createInstance(locale, status);
1693 RuleBasedCollator* newCollator;
1694 if (U_SUCCESS(status) && (newCollator = dynamic_cast<RuleBasedCollator*>(temp)) != NULL) {
1695 if (lenientParseRules) {
1696 UnicodeString rules(newCollator->getRules());
1697 rules.append(*lenientParseRules);
1698
1699 newCollator = new RuleBasedCollator(rules, status);
1700 // Exit if newCollator could not be created.
1701 if (newCollator == NULL) {
1702 return NULL;
1703 }
1704 } else {
1705 temp = NULL;
1706 }
1707 if (U_SUCCESS(status)) {
1708 newCollator->setAttribute(UCOL_DECOMPOSITION_MODE, UCOL_ON, status);
1709 // cast away const
1710 ((RuleBasedNumberFormat*)this)->collator = newCollator;
1711 } else {
1712 delete newCollator;
1713 }
1714 }
1715 delete temp;
1716 }
1717 #endif
1718
1719 // if lenient-parse mode is off, this will be null
1720 // (see setLenientParseMode())
1721 return collator;
1722 }
1723
1724
1725 DecimalFormatSymbols*
initializeDecimalFormatSymbols(UErrorCode & status)1726 RuleBasedNumberFormat::initializeDecimalFormatSymbols(UErrorCode &status)
1727 {
1728 // lazy-evaluate the DecimalFormatSymbols object. This object
1729 // is shared by all DecimalFormat instances belonging to this
1730 // formatter
1731 if (decimalFormatSymbols == NULL) {
1732 DecimalFormatSymbols* temp = new DecimalFormatSymbols(locale, status);
1733 if (U_SUCCESS(status)) {
1734 decimalFormatSymbols = temp;
1735 }
1736 else {
1737 delete temp;
1738 }
1739 }
1740 return decimalFormatSymbols;
1741 }
1742
1743 /**
1744 * Returns the DecimalFormatSymbols object that should be used by all DecimalFormat
1745 * instances owned by this formatter.
1746 */
1747 const DecimalFormatSymbols*
getDecimalFormatSymbols() const1748 RuleBasedNumberFormat::getDecimalFormatSymbols() const
1749 {
1750 return decimalFormatSymbols;
1751 }
1752
1753 NFRule*
initializeDefaultInfinityRule(UErrorCode & status)1754 RuleBasedNumberFormat::initializeDefaultInfinityRule(UErrorCode &status)
1755 {
1756 if (U_FAILURE(status)) {
1757 return NULL;
1758 }
1759 if (defaultInfinityRule == NULL) {
1760 UnicodeString rule(UNICODE_STRING_SIMPLE("Inf: "));
1761 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kInfinitySymbol));
1762 NFRule* temp = new NFRule(this, rule, status);
1763 if (U_SUCCESS(status)) {
1764 defaultInfinityRule = temp;
1765 }
1766 else {
1767 delete temp;
1768 }
1769 }
1770 return defaultInfinityRule;
1771 }
1772
1773 const NFRule*
getDefaultInfinityRule() const1774 RuleBasedNumberFormat::getDefaultInfinityRule() const
1775 {
1776 return defaultInfinityRule;
1777 }
1778
1779 NFRule*
initializeDefaultNaNRule(UErrorCode & status)1780 RuleBasedNumberFormat::initializeDefaultNaNRule(UErrorCode &status)
1781 {
1782 if (U_FAILURE(status)) {
1783 return NULL;
1784 }
1785 if (defaultNaNRule == NULL) {
1786 UnicodeString rule(UNICODE_STRING_SIMPLE("NaN: "));
1787 rule.append(getDecimalFormatSymbols()->getSymbol(DecimalFormatSymbols::kNaNSymbol));
1788 NFRule* temp = new NFRule(this, rule, status);
1789 if (U_SUCCESS(status)) {
1790 defaultNaNRule = temp;
1791 }
1792 else {
1793 delete temp;
1794 }
1795 }
1796 return defaultNaNRule;
1797 }
1798
1799 const NFRule*
getDefaultNaNRule() const1800 RuleBasedNumberFormat::getDefaultNaNRule() const
1801 {
1802 return defaultNaNRule;
1803 }
1804
1805 // De-owning the current localized symbols and adopt the new symbols.
1806 void
adoptDecimalFormatSymbols(DecimalFormatSymbols * symbolsToAdopt)1807 RuleBasedNumberFormat::adoptDecimalFormatSymbols(DecimalFormatSymbols* symbolsToAdopt)
1808 {
1809 if (symbolsToAdopt == NULL) {
1810 return; // do not allow caller to set decimalFormatSymbols to NULL
1811 }
1812
1813 if (decimalFormatSymbols != NULL) {
1814 delete decimalFormatSymbols;
1815 }
1816
1817 decimalFormatSymbols = symbolsToAdopt;
1818
1819 {
1820 // Apply the new decimalFormatSymbols by reparsing the rulesets
1821 UErrorCode status = U_ZERO_ERROR;
1822
1823 delete defaultInfinityRule;
1824 defaultInfinityRule = NULL;
1825 initializeDefaultInfinityRule(status); // Reset with the new DecimalFormatSymbols
1826
1827 delete defaultNaNRule;
1828 defaultNaNRule = NULL;
1829 initializeDefaultNaNRule(status); // Reset with the new DecimalFormatSymbols
1830
1831 if (ruleSets) {
1832 for (int32_t i = 0; i < numRuleSets; i++) {
1833 ruleSets[i]->setDecimalFormatSymbols(*symbolsToAdopt, status);
1834 }
1835 }
1836 }
1837 }
1838
1839 // Setting the symbols is equlivalent to adopting a newly created localized symbols.
1840 void
setDecimalFormatSymbols(const DecimalFormatSymbols & symbols)1841 RuleBasedNumberFormat::setDecimalFormatSymbols(const DecimalFormatSymbols& symbols)
1842 {
1843 adoptDecimalFormatSymbols(new DecimalFormatSymbols(symbols));
1844 }
1845
1846 PluralFormat *
createPluralFormat(UPluralType pluralType,const UnicodeString & pattern,UErrorCode & status) const1847 RuleBasedNumberFormat::createPluralFormat(UPluralType pluralType,
1848 const UnicodeString &pattern,
1849 UErrorCode& status) const
1850 {
1851 return new PluralFormat(locale, pluralType, pattern, status);
1852 }
1853
1854 U_NAMESPACE_END
1855
1856 /* U_HAVE_RBNF */
1857 #endif
1858