1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1997-2015, International Business Machines Corporation and *
6 * others. All Rights Reserved. *
7 *******************************************************************************
8 *
9 * File COMPACTDECIMALFORMAT.CPP
10 *
11 ********************************************************************************
12 */
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_FORMATTING
16
17 #include "charstr.h"
18 #include "cstring.h"
19 #include "digitlst.h"
20 #include "mutex.h"
21 #include "unicode/compactdecimalformat.h"
22 #include "unicode/numsys.h"
23 #include "unicode/plurrule.h"
24 #include "unicode/ures.h"
25 #include "ucln_in.h"
26 #include "uhash.h"
27 #include "umutex.h"
28 #include "unicode/ures.h"
29 #include "uresimp.h"
30
31 // Maps locale name to CDFLocaleData struct.
32 static UHashtable* gCompactDecimalData = NULL;
33 static UMutex gCompactDecimalMetaLock = U_MUTEX_INITIALIZER;
34
35 U_NAMESPACE_BEGIN
36
37 static const int32_t MAX_DIGITS = 15;
38 static const char gOther[] = "other";
39 static const char gLatnTag[] = "latn";
40 static const char gNumberElementsTag[] = "NumberElements";
41 static const char gDecimalFormatTag[] = "decimalFormat";
42 static const char gPatternsShort[] = "patternsShort";
43 static const char gPatternsLong[] = "patternsLong";
44 static const char gLatnPath[] = "NumberElements/latn";
45
46 static const UChar u_0 = 0x30;
47 static const UChar u_apos = 0x27;
48
49 static const UChar kZero[] = {u_0};
50
51 // Used to unescape single quotes.
52 enum QuoteState {
53 OUTSIDE,
54 INSIDE_EMPTY,
55 INSIDE_FULL
56 };
57
58 enum FallbackFlags {
59 ANY = 0,
60 MUST = 1,
61 NOT_ROOT = 2
62 // Next one will be 4 then 6 etc.
63 };
64
65
66 // CDFUnit represents a prefix-suffix pair for a particular variant
67 // and log10 value.
68 struct CDFUnit : public UMemory {
69 UnicodeString prefix;
70 UnicodeString suffix;
CDFUnitCDFUnit71 inline CDFUnit() : prefix(), suffix() {
72 prefix.setToBogus();
73 }
~CDFUnitCDFUnit74 inline ~CDFUnit() {}
isSetCDFUnit75 inline UBool isSet() const {
76 return !prefix.isBogus();
77 }
markAsSetCDFUnit78 inline void markAsSet() {
79 prefix.remove();
80 }
81 };
82
83 // CDFLocaleStyleData contains formatting data for a particular locale
84 // and style.
85 class CDFLocaleStyleData : public UMemory {
86 public:
87 // What to divide by for each log10 value when formatting. These values
88 // will be powers of 10. For English, would be:
89 // 1, 1, 1, 1000, 1000, 1000, 1000000, 1000000, 1000000, 1000000000 ...
90 double divisors[MAX_DIGITS];
91 // Maps plural variants to CDFUnit[MAX_DIGITS] arrays.
92 // To format a number x,
93 // first compute log10(x). Compute displayNum = (x / divisors[log10(x)]).
94 // Compute the plural variant for displayNum
95 // (e.g zero, one, two, few, many, other).
96 // Compute cdfUnits = unitsByVariant[pluralVariant].
97 // Prefix and suffix to use at cdfUnits[log10(x)]
98 UHashtable* unitsByVariant;
99 // A flag for whether or not this CDFLocaleStyleData was loaded from the
100 // Latin numbering system as a fallback from the locale numbering system.
101 // This value is meaningless if the object is bogus or empty.
102 UBool fromFallback;
CDFLocaleStyleData()103 inline CDFLocaleStyleData() : unitsByVariant(NULL), fromFallback(FALSE) {
104 uprv_memset(divisors, 0, sizeof(divisors));
105 }
106 ~CDFLocaleStyleData();
107 // Init initializes this object.
108 void Init(UErrorCode& status);
isBogus() const109 inline UBool isBogus() const {
110 return unitsByVariant == NULL;
111 }
112 void setToBogus();
isEmpty()113 UBool isEmpty() {
114 return unitsByVariant == NULL || unitsByVariant->count == 0;
115 }
116 private:
117 CDFLocaleStyleData(const CDFLocaleStyleData&);
118 CDFLocaleStyleData& operator=(const CDFLocaleStyleData&);
119 };
120
121 // CDFLocaleData contains formatting data for a particular locale.
122 struct CDFLocaleData : public UMemory {
123 CDFLocaleStyleData shortData;
124 CDFLocaleStyleData longData;
CDFLocaleDataCDFLocaleData125 inline CDFLocaleData() : shortData(), longData() { }
~CDFLocaleDataCDFLocaleData126 inline ~CDFLocaleData() { }
127 // Init initializes this object.
128 void Init(UErrorCode& status);
129 };
130
131 U_NAMESPACE_END
132
133 U_CDECL_BEGIN
134
cdf_cleanup(void)135 static UBool U_CALLCONV cdf_cleanup(void) {
136 if (gCompactDecimalData != NULL) {
137 uhash_close(gCompactDecimalData);
138 gCompactDecimalData = NULL;
139 }
140 return TRUE;
141 }
142
deleteCDFUnits(void * ptr)143 static void U_CALLCONV deleteCDFUnits(void* ptr) {
144 delete [] (icu::CDFUnit*) ptr;
145 }
146
deleteCDFLocaleData(void * ptr)147 static void U_CALLCONV deleteCDFLocaleData(void* ptr) {
148 delete (icu::CDFLocaleData*) ptr;
149 }
150
151 U_CDECL_END
152
153 U_NAMESPACE_BEGIN
154
155 static UBool divisors_equal(const double* lhs, const double* rhs);
156 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status);
157
158 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status);
159 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status);
160 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status);
161 static int32_t populatePrefixSuffix(const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status);
162 static double calculateDivisor(double power10, int32_t numZeros);
163 static UBool onlySpaces(UnicodeString u);
164 static void fixQuotes(UnicodeString& s);
165 static void checkForOtherVariants(CDFLocaleStyleData* result, UErrorCode& status);
166 static void fillInMissing(CDFLocaleStyleData* result);
167 static int32_t computeLog10(double x, UBool inRange);
168 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status);
169 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value);
170
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)171 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactDecimalFormat)
172
173 CompactDecimalFormat::CompactDecimalFormat(
174 const DecimalFormat& decimalFormat,
175 const UHashtable* unitsByVariant,
176 const double* divisors,
177 PluralRules* pluralRules)
178 : DecimalFormat(decimalFormat), _unitsByVariant(unitsByVariant), _divisors(divisors), _pluralRules(pluralRules) {
179 }
180
CompactDecimalFormat(const CompactDecimalFormat & source)181 CompactDecimalFormat::CompactDecimalFormat(const CompactDecimalFormat& source)
182 : DecimalFormat(source), _unitsByVariant(source._unitsByVariant), _divisors(source._divisors), _pluralRules(source._pluralRules->clone()) {
183 }
184
185 CompactDecimalFormat* U_EXPORT2
createInstance(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)186 CompactDecimalFormat::createInstance(
187 const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
188 LocalPointer<DecimalFormat> decfmt((DecimalFormat*) NumberFormat::makeInstance(inLocale, UNUM_DECIMAL, TRUE, status));
189 if (U_FAILURE(status)) {
190 return NULL;
191 }
192 LocalPointer<PluralRules> pluralRules(PluralRules::forLocale(inLocale, status));
193 if (U_FAILURE(status)) {
194 return NULL;
195 }
196 const CDFLocaleStyleData* data = getCDFLocaleStyleData(inLocale, style, status);
197 if (U_FAILURE(status)) {
198 return NULL;
199 }
200 CompactDecimalFormat* result =
201 new CompactDecimalFormat(*decfmt, data->unitsByVariant, data->divisors, pluralRules.getAlias());
202 if (result == NULL) {
203 status = U_MEMORY_ALLOCATION_ERROR;
204 return NULL;
205 }
206 pluralRules.orphan();
207 result->setMaximumSignificantDigits(3);
208 result->setSignificantDigitsUsed(TRUE);
209 result->setGroupingUsed(FALSE);
210 return result;
211 }
212
213 CompactDecimalFormat&
operator =(const CompactDecimalFormat & rhs)214 CompactDecimalFormat::operator=(const CompactDecimalFormat& rhs) {
215 if (this != &rhs) {
216 DecimalFormat::operator=(rhs);
217 _unitsByVariant = rhs._unitsByVariant;
218 _divisors = rhs._divisors;
219 delete _pluralRules;
220 _pluralRules = rhs._pluralRules->clone();
221 }
222 return *this;
223 }
224
~CompactDecimalFormat()225 CompactDecimalFormat::~CompactDecimalFormat() {
226 delete _pluralRules;
227 }
228
229
230 Format*
clone(void) const231 CompactDecimalFormat::clone(void) const {
232 return new CompactDecimalFormat(*this);
233 }
234
235 UBool
operator ==(const Format & that) const236 CompactDecimalFormat::operator==(const Format& that) const {
237 if (this == &that) {
238 return TRUE;
239 }
240 return (DecimalFormat::operator==(that) && eqHelper((const CompactDecimalFormat&) that));
241 }
242
243 UBool
eqHelper(const CompactDecimalFormat & that) const244 CompactDecimalFormat::eqHelper(const CompactDecimalFormat& that) const {
245 return uhash_equals(_unitsByVariant, that._unitsByVariant) && divisors_equal(_divisors, that._divisors) && (*_pluralRules == *that._pluralRules);
246 }
247
248 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos) const249 CompactDecimalFormat::format(
250 double number,
251 UnicodeString& appendTo,
252 FieldPosition& pos) const {
253 UErrorCode status = U_ZERO_ERROR;
254 return format(number, appendTo, pos, status);
255 }
256
257 UnicodeString&
format(double number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const258 CompactDecimalFormat::format(
259 double number,
260 UnicodeString& appendTo,
261 FieldPosition& pos,
262 UErrorCode &status) const {
263 if (U_FAILURE(status)) {
264 return appendTo;
265 }
266 DigitList orig, rounded;
267 orig.set(number);
268 UBool isNegative;
269 _round(orig, rounded, isNegative, status);
270 if (U_FAILURE(status)) {
271 return appendTo;
272 }
273 double roundedDouble = rounded.getDouble();
274 if (isNegative) {
275 roundedDouble = -roundedDouble;
276 }
277 int32_t baseIdx = computeLog10(roundedDouble, TRUE);
278 double numberToFormat = roundedDouble / _divisors[baseIdx];
279 UnicodeString variant = _pluralRules->select(numberToFormat);
280 if (isNegative) {
281 numberToFormat = -numberToFormat;
282 }
283 const CDFUnit* unit = getCDFUnitFallback(_unitsByVariant, variant, baseIdx);
284 appendTo += unit->prefix;
285 DecimalFormat::format(numberToFormat, appendTo, pos);
286 appendTo += unit->suffix;
287 return appendTo;
288 }
289
290 UnicodeString&
format(double,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const291 CompactDecimalFormat::format(
292 double /* number */,
293 UnicodeString& appendTo,
294 FieldPositionIterator* /* posIter */,
295 UErrorCode& status) const {
296 status = U_UNSUPPORTED_ERROR;
297 return appendTo;
298 }
299
300 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos) const301 CompactDecimalFormat::format(
302 int32_t number,
303 UnicodeString& appendTo,
304 FieldPosition& pos) const {
305 return format((double) number, appendTo, pos);
306 }
307
308 UnicodeString&
format(int32_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const309 CompactDecimalFormat::format(
310 int32_t number,
311 UnicodeString& appendTo,
312 FieldPosition& pos,
313 UErrorCode &status) const {
314 return format((double) number, appendTo, pos, status);
315 }
316
317 UnicodeString&
format(int32_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const318 CompactDecimalFormat::format(
319 int32_t /* number */,
320 UnicodeString& appendTo,
321 FieldPositionIterator* /* posIter */,
322 UErrorCode& status) const {
323 status = U_UNSUPPORTED_ERROR;
324 return appendTo;
325 }
326
327 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos) const328 CompactDecimalFormat::format(
329 int64_t number,
330 UnicodeString& appendTo,
331 FieldPosition& pos) const {
332 return format((double) number, appendTo, pos);
333 }
334
335 UnicodeString&
format(int64_t number,UnicodeString & appendTo,FieldPosition & pos,UErrorCode & status) const336 CompactDecimalFormat::format(
337 int64_t number,
338 UnicodeString& appendTo,
339 FieldPosition& pos,
340 UErrorCode &status) const {
341 return format((double) number, appendTo, pos, status);
342 }
343
344 UnicodeString&
format(int64_t,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const345 CompactDecimalFormat::format(
346 int64_t /* number */,
347 UnicodeString& appendTo,
348 FieldPositionIterator* /* posIter */,
349 UErrorCode& status) const {
350 status = U_UNSUPPORTED_ERROR;
351 return appendTo;
352 }
353
354 UnicodeString&
format(StringPiece,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const355 CompactDecimalFormat::format(
356 StringPiece /* number */,
357 UnicodeString& appendTo,
358 FieldPositionIterator* /* posIter */,
359 UErrorCode& status) const {
360 status = U_UNSUPPORTED_ERROR;
361 return appendTo;
362 }
363
364 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPositionIterator *,UErrorCode & status) const365 CompactDecimalFormat::format(
366 const DigitList& /* number */,
367 UnicodeString& appendTo,
368 FieldPositionIterator* /* posIter */,
369 UErrorCode& status) const {
370 status = U_UNSUPPORTED_ERROR;
371 return appendTo;
372 }
373
374 UnicodeString&
format(const DigitList &,UnicodeString & appendTo,FieldPosition &,UErrorCode & status) const375 CompactDecimalFormat::format(const DigitList& /* number */,
376 UnicodeString& appendTo,
377 FieldPosition& /* pos */,
378 UErrorCode& status) const {
379 status = U_UNSUPPORTED_ERROR;
380 return appendTo;
381 }
382
383 void
parse(const UnicodeString &,Formattable &,ParsePosition &) const384 CompactDecimalFormat::parse(
385 const UnicodeString& /* text */,
386 Formattable& /* result */,
387 ParsePosition& /* parsePosition */) const {
388 }
389
390 void
parse(const UnicodeString &,Formattable &,UErrorCode & status) const391 CompactDecimalFormat::parse(
392 const UnicodeString& /* text */,
393 Formattable& /* result */,
394 UErrorCode& status) const {
395 status = U_UNSUPPORTED_ERROR;
396 }
397
398 CurrencyAmount*
parseCurrency(const UnicodeString &,ParsePosition &) const399 CompactDecimalFormat::parseCurrency(
400 const UnicodeString& /* text */,
401 ParsePosition& /* pos */) const {
402 return NULL;
403 }
404
Init(UErrorCode & status)405 void CDFLocaleStyleData::Init(UErrorCode& status) {
406 if (unitsByVariant != NULL) {
407 return;
408 }
409 unitsByVariant = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
410 if (U_FAILURE(status)) {
411 return;
412 }
413 uhash_setKeyDeleter(unitsByVariant, uprv_free);
414 uhash_setValueDeleter(unitsByVariant, deleteCDFUnits);
415 }
416
~CDFLocaleStyleData()417 CDFLocaleStyleData::~CDFLocaleStyleData() {
418 setToBogus();
419 }
420
setToBogus()421 void CDFLocaleStyleData::setToBogus() {
422 if (unitsByVariant != NULL) {
423 uhash_close(unitsByVariant);
424 unitsByVariant = NULL;
425 }
426 }
427
Init(UErrorCode & status)428 void CDFLocaleData::Init(UErrorCode& status) {
429 shortData.Init(status);
430 if (U_FAILURE(status)) {
431 return;
432 }
433 longData.Init(status);
434 }
435
436 // Helper method for operator=
divisors_equal(const double * lhs,const double * rhs)437 static UBool divisors_equal(const double* lhs, const double* rhs) {
438 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
439 if (lhs[i] != rhs[i]) {
440 return FALSE;
441 }
442 }
443 return TRUE;
444 }
445
446 // getCDFLocaleStyleData returns pointer to formatting data for given locale and
447 // style within the global cache. On cache miss, getCDFLocaleStyleData loads
448 // the data from CLDR into the global cache before returning the pointer. If a
449 // UNUM_LONG data is requested for a locale, and that locale does not have
450 // UNUM_LONG data, getCDFLocaleStyleData will fall back to UNUM_SHORT data for
451 // that locale.
getCDFLocaleStyleData(const Locale & inLocale,UNumberCompactStyle style,UErrorCode & status)452 static const CDFLocaleStyleData* getCDFLocaleStyleData(const Locale& inLocale, UNumberCompactStyle style, UErrorCode& status) {
453 if (U_FAILURE(status)) {
454 return NULL;
455 }
456 CDFLocaleData* result = NULL;
457 const char* key = inLocale.getName();
458 {
459 Mutex lock(&gCompactDecimalMetaLock);
460 if (gCompactDecimalData == NULL) {
461 gCompactDecimalData = uhash_open(uhash_hashChars, uhash_compareChars, NULL, &status);
462 if (U_FAILURE(status)) {
463 return NULL;
464 }
465 uhash_setKeyDeleter(gCompactDecimalData, uprv_free);
466 uhash_setValueDeleter(gCompactDecimalData, deleteCDFLocaleData);
467 ucln_i18n_registerCleanup(UCLN_I18N_CDFINFO, cdf_cleanup);
468 } else {
469 result = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
470 }
471 }
472 if (result != NULL) {
473 return extractDataByStyleEnum(*result, style, status);
474 }
475
476 result = loadCDFLocaleData(inLocale, status);
477 if (U_FAILURE(status)) {
478 return NULL;
479 }
480
481 {
482 Mutex lock(&gCompactDecimalMetaLock);
483 CDFLocaleData* temp = (CDFLocaleData*) uhash_get(gCompactDecimalData, key);
484 if (temp != NULL) {
485 delete result;
486 result = temp;
487 } else {
488 uhash_put(gCompactDecimalData, uprv_strdup(key), (void*) result, &status);
489 if (U_FAILURE(status)) {
490 return NULL;
491 }
492 }
493 }
494 return extractDataByStyleEnum(*result, style, status);
495 }
496
extractDataByStyleEnum(const CDFLocaleData & data,UNumberCompactStyle style,UErrorCode & status)497 static const CDFLocaleStyleData* extractDataByStyleEnum(const CDFLocaleData& data, UNumberCompactStyle style, UErrorCode& status) {
498 switch (style) {
499 case UNUM_SHORT:
500 return &data.shortData;
501 case UNUM_LONG:
502 if (!data.longData.isBogus()) {
503 return &data.longData;
504 }
505 return &data.shortData;
506 default:
507 status = U_ILLEGAL_ARGUMENT_ERROR;
508 return NULL;
509 }
510 }
511
512 // loadCDFLocaleData loads formatting data from CLDR for a given locale. The
513 // caller owns the returned pointer.
loadCDFLocaleData(const Locale & inLocale,UErrorCode & status)514 static CDFLocaleData* loadCDFLocaleData(const Locale& inLocale, UErrorCode& status) {
515 if (U_FAILURE(status)) {
516 return NULL;
517 }
518 CDFLocaleData* result = new CDFLocaleData;
519 if (result == NULL) {
520 status = U_MEMORY_ALLOCATION_ERROR;
521 return NULL;
522 }
523 result->Init(status);
524 if (U_FAILURE(status)) {
525 delete result;
526 return NULL;
527 }
528
529 load(inLocale, result, status);
530
531 if (U_FAILURE(status)) {
532 delete result;
533 return NULL;
534 }
535 return result;
536 }
537
538 namespace {
539
540 struct CmptDecDataSink : public ResourceSink {
541
542 CDFLocaleData& dataBundle; // Where to save values when they are read
543 UBool isLatin; // Whether or not we are traversing the Latin tree
544 UBool isFallback; // Whether or not we are traversing the Latin tree as fallback
545
546 enum EPatternsTableKey { PATTERNS_SHORT, PATTERNS_LONG };
547 enum EFormatsTableKey { DECIMAL_FORMAT, CURRENCY_FORMAT };
548
549 /*
550 * NumberElements{ <-- top (numbering system table)
551 * latn{ <-- patternsTable (one per numbering system)
552 * patternsLong{ <-- formatsTable (one per pattern)
553 * decimalFormat{ <-- powersOfTenTable (one per format)
554 * 1000{ <-- pluralVariantsTable (one per power of ten)
555 * one{"0 thousand"} <-- plural variant and template
556 */
557
CmptDecDataSink__anonc7fee4260111::CmptDecDataSink558 CmptDecDataSink(CDFLocaleData& _dataBundle)
559 : dataBundle(_dataBundle), isLatin(FALSE), isFallback(FALSE) {}
560 virtual ~CmptDecDataSink();
561
put__anonc7fee4260111::CmptDecDataSink562 virtual void put(const char *key, ResourceValue &value, UBool isRoot, UErrorCode &errorCode) {
563 // SPECIAL CASE: Don't consume root in the non-Latin numbering system
564 if (isRoot && !isLatin) { return; }
565
566 ResourceTable patternsTable = value.getTable(errorCode);
567 if (U_FAILURE(errorCode)) { return; }
568 for (int i1 = 0; patternsTable.getKeyAndValue(i1, key, value); ++i1) {
569
570 // Check for patternsShort or patternsLong
571 EPatternsTableKey patternsTableKey;
572 if (uprv_strcmp(key, gPatternsShort) == 0) {
573 patternsTableKey = PATTERNS_SHORT;
574 } else if (uprv_strcmp(key, gPatternsLong) == 0) {
575 patternsTableKey = PATTERNS_LONG;
576 } else {
577 continue;
578 }
579
580 // Traverse into the formats table
581 ResourceTable formatsTable = value.getTable(errorCode);
582 if (U_FAILURE(errorCode)) { return; }
583 for (int i2 = 0; formatsTable.getKeyAndValue(i2, key, value); ++i2) {
584
585 // Check for decimalFormat or currencyFormat
586 EFormatsTableKey formatsTableKey;
587 if (uprv_strcmp(key, gDecimalFormatTag) == 0) {
588 formatsTableKey = DECIMAL_FORMAT;
589 // TODO: Enable this statement when currency support is added
590 // } else if (uprv_strcmp(key, gCurrencyFormat) == 0) {
591 // formatsTableKey = CURRENCY_FORMAT;
592 } else {
593 continue;
594 }
595
596 // Set the current style and destination based on the two keys
597 UNumberCompactStyle style;
598 CDFLocaleStyleData* destination = NULL;
599 if (patternsTableKey == PATTERNS_LONG
600 && formatsTableKey == DECIMAL_FORMAT) {
601 style = UNUM_LONG;
602 destination = &dataBundle.longData;
603 } else if (patternsTableKey == PATTERNS_SHORT
604 && formatsTableKey == DECIMAL_FORMAT) {
605 style = UNUM_SHORT;
606 destination = &dataBundle.shortData;
607 // TODO: Enable the following statements when currency support is added
608 // } else if (patternsTableKey == PATTERNS_SHORT
609 // && formatsTableKey == CURRENCY_FORMAT) {
610 // style = UNUM_SHORT_CURRENCY; // or whatever the enum gets named
611 // destination = &dataBundle.shortCurrencyData;
612 // } else {
613 // // Silently ignore this case
614 // continue;
615 }
616
617 // SPECIAL CASE: RULES FOR WHETHER OR NOT TO CONSUME THIS TABLE:
618 // 1) Don't consume longData if shortData was consumed from the non-Latin
619 // locale numbering system
620 // 2) Don't consume longData for the first time if this is the root bundle and
621 // shortData is already populated from a more specific locale. Note that if
622 // both longData and shortData are both only in root, longData will be
623 // consumed since it is alphabetically before shortData in the bundle.
624 if (isFallback
625 && style == UNUM_LONG
626 && !dataBundle.shortData.isEmpty()
627 && !dataBundle.shortData.fromFallback) {
628 continue;
629 }
630 if (isRoot
631 && style == UNUM_LONG
632 && dataBundle.longData.isEmpty()
633 && !dataBundle.shortData.isEmpty()) {
634 continue;
635 }
636
637 // Set the "fromFallback" flag on the data object
638 destination->fromFallback = isFallback;
639
640 // Traverse into the powers of ten table
641 ResourceTable powersOfTenTable = value.getTable(errorCode);
642 if (U_FAILURE(errorCode)) { return; }
643 for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) {
644
645 // The key will always be some even power of 10. e.g 10000.
646 char* endPtr = NULL;
647 double power10 = uprv_strtod(key, &endPtr);
648 if (*endPtr != 0) {
649 errorCode = U_INTERNAL_PROGRAM_ERROR;
650 return;
651 }
652 int32_t log10Value = computeLog10(power10, FALSE);
653
654 // Silently ignore divisors that are too big.
655 if (log10Value >= MAX_DIGITS) continue;
656
657 // Iterate over the plural variants ("one", "other", etc)
658 ResourceTable pluralVariantsTable = value.getTable(errorCode);
659 if (U_FAILURE(errorCode)) { return; }
660 for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) {
661 const char* pluralVariant = key;
662 const UnicodeString formatStr = value.getUnicodeString(errorCode);
663
664 // Copy the data into the in-memory data bundle (do not overwrite
665 // existing values)
666 int32_t numZeros = populatePrefixSuffix(
667 pluralVariant, log10Value, formatStr,
668 destination->unitsByVariant, FALSE, errorCode);
669
670 // If populatePrefixSuffix returns -1, it means that this key has been
671 // encountered already.
672 if (numZeros < 0) {
673 continue;
674 }
675
676 // Set the divisor, which is based on the number of zeros in the template
677 // string. If the divisor from here is different from the one previously
678 // stored, it means that the number of zeros in different plural variants
679 // differs; throw an exception.
680 // TODO: How should I check for floating-point errors here?
681 // Is there a good reason why "divisor" is double and not long like Java?
682 double divisor = calculateDivisor(power10, numZeros);
683 if (destination->divisors[log10Value] != 0.0
684 && destination->divisors[log10Value] != divisor) {
685 errorCode = U_INTERNAL_PROGRAM_ERROR;
686 return;
687 }
688 destination->divisors[log10Value] = divisor;
689 }
690 }
691 }
692 }
693 }
694 };
695
696 // Virtual destructors must be defined out of line.
~CmptDecDataSink()697 CmptDecDataSink::~CmptDecDataSink() {}
698
699 } // namespace
700
load(const Locale & inLocale,CDFLocaleData * result,UErrorCode & status)701 static void load(const Locale& inLocale, CDFLocaleData* result, UErrorCode& status) {
702 LocalPointer<NumberingSystem> ns(NumberingSystem::createInstance(inLocale, status));
703 if (U_FAILURE(status)) {
704 return;
705 }
706 const char* nsName = ns->getName();
707
708 LocalUResourceBundlePointer resource(ures_open(NULL, inLocale.getName(), &status));
709 if (U_FAILURE(status)) {
710 return;
711 }
712 CmptDecDataSink sink(*result);
713 sink.isFallback = FALSE;
714
715 // First load the number elements data if nsName is not Latin.
716 if (uprv_strcmp(nsName, gLatnTag) != 0) {
717 sink.isLatin = FALSE;
718 CharString path;
719 path.append(gNumberElementsTag, status)
720 .append('/', status)
721 .append(nsName, status);
722 ures_getAllItemsWithFallback(resource.getAlias(), path.data(), sink, status);
723 if (status == U_MISSING_RESOURCE_ERROR) {
724 // Silently ignore and use Latin
725 status = U_ZERO_ERROR;
726 } else if (U_FAILURE(status)) {
727 return;
728 }
729 sink.isFallback = TRUE;
730 }
731
732 // Now load Latin.
733 sink.isLatin = TRUE;
734 ures_getAllItemsWithFallback(resource.getAlias(), gLatnPath, sink, status);
735 if (U_FAILURE(status)) return;
736
737 // If longData is empty, default it to be equal to shortData
738 if (result->longData.isEmpty()) {
739 result->longData.setToBogus();
740 }
741
742 // Check for "other" variants in each of the three data classes, and resolve missing elements.
743
744 if (!result->longData.isBogus()) {
745 checkForOtherVariants(&result->longData, status);
746 if (U_FAILURE(status)) return;
747 fillInMissing(&result->longData);
748 }
749
750 checkForOtherVariants(&result->shortData, status);
751 if (U_FAILURE(status)) return;
752 fillInMissing(&result->shortData);
753
754 // TODO: Enable this statement when currency support is added
755 // checkForOtherVariants(&result->shortCurrencyData, status);
756 // if (U_FAILURE(status)) return;
757 // fillInMissing(&result->shortCurrencyData);
758 }
759
760 // populatePrefixSuffix Adds a specific prefix-suffix pair to result for a
761 // given variant and log10 value.
762 // variant is 'zero', 'one', 'two', 'few', 'many', or 'other'.
763 // formatStr is the format string from which the prefix and suffix are
764 // extracted. It is usually of form 'Pefix 000 suffix'.
765 // populatePrefixSuffix returns the number of 0's found in formatStr
766 // before the decimal point.
767 // In the special case that formatStr contains only spaces for prefix
768 // and suffix, populatePrefixSuffix returns log10Value + 1.
populatePrefixSuffix(const char * variant,int32_t log10Value,const UnicodeString & formatStr,UHashtable * result,UBool overwrite,UErrorCode & status)769 static int32_t populatePrefixSuffix(
770 const char* variant, int32_t log10Value, const UnicodeString& formatStr, UHashtable* result, UBool overwrite, UErrorCode& status) {
771 if (U_FAILURE(status)) {
772 return 0;
773 }
774 int32_t firstIdx = formatStr.indexOf(kZero, UPRV_LENGTHOF(kZero), 0);
775 // We must have 0's in format string.
776 if (firstIdx == -1) {
777 status = U_INTERNAL_PROGRAM_ERROR;
778 return 0;
779 }
780 int32_t lastIdx = formatStr.lastIndexOf(kZero, UPRV_LENGTHOF(kZero), firstIdx);
781 CDFUnit* unit = createCDFUnit(variant, log10Value, result, status);
782 if (U_FAILURE(status)) {
783 return 0;
784 }
785
786 // Return -1 if we are not overwriting an existing value
787 if (unit->isSet() && !overwrite) {
788 return -1;
789 }
790 unit->markAsSet();
791
792 // Everything up to first 0 is the prefix
793 unit->prefix = formatStr.tempSubString(0, firstIdx);
794 fixQuotes(unit->prefix);
795 // Everything beyond the last 0 is the suffix
796 unit->suffix = formatStr.tempSubString(lastIdx + 1);
797 fixQuotes(unit->suffix);
798
799 // If there is effectively no prefix or suffix, ignore the actual number of
800 // 0's and act as if the number of 0's matches the size of the number.
801 if (onlySpaces(unit->prefix) && onlySpaces(unit->suffix)) {
802 return log10Value + 1;
803 }
804
805 // Calculate number of zeros before decimal point
806 int32_t idx = firstIdx + 1;
807 while (idx <= lastIdx && formatStr.charAt(idx) == u_0) {
808 ++idx;
809 }
810 return (idx - firstIdx);
811 }
812
813 // Calculate a divisor based on the magnitude and number of zeros in the
814 // template string.
calculateDivisor(double power10,int32_t numZeros)815 static double calculateDivisor(double power10, int32_t numZeros) {
816 double divisor = power10;
817 for (int32_t i = 1; i < numZeros; ++i) {
818 divisor /= 10.0;
819 }
820 return divisor;
821 }
822
onlySpaces(UnicodeString u)823 static UBool onlySpaces(UnicodeString u) {
824 return u.trim().length() == 0;
825 }
826
827 // fixQuotes unescapes single quotes. Don''t -> Don't. Letter 'j' -> Letter j.
828 // Modifies s in place.
fixQuotes(UnicodeString & s)829 static void fixQuotes(UnicodeString& s) {
830 QuoteState state = OUTSIDE;
831 int32_t len = s.length();
832 int32_t dest = 0;
833 for (int32_t i = 0; i < len; ++i) {
834 UChar ch = s.charAt(i);
835 if (ch == u_apos) {
836 if (state == INSIDE_EMPTY) {
837 s.setCharAt(dest, ch);
838 ++dest;
839 }
840 } else {
841 s.setCharAt(dest, ch);
842 ++dest;
843 }
844
845 // Update state
846 switch (state) {
847 case OUTSIDE:
848 state = ch == u_apos ? INSIDE_EMPTY : OUTSIDE;
849 break;
850 case INSIDE_EMPTY:
851 case INSIDE_FULL:
852 state = ch == u_apos ? OUTSIDE : INSIDE_FULL;
853 break;
854 default:
855 break;
856 }
857 }
858 s.truncate(dest);
859 }
860
861 // Checks to make sure that an "other" variant is present in all
862 // powers of 10.
checkForOtherVariants(CDFLocaleStyleData * result,UErrorCode & status)863 static void checkForOtherVariants(CDFLocaleStyleData* result,
864 UErrorCode& status) {
865 if (result == NULL || result->unitsByVariant == NULL) {
866 return;
867 }
868
869 const CDFUnit* otherByBase =
870 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
871 if (otherByBase == NULL) {
872 status = U_INTERNAL_PROGRAM_ERROR;
873 return;
874 }
875
876 // Check all other plural variants, and make sure that if
877 // any of them are populated, then other is also populated
878 int32_t pos = UHASH_FIRST;
879 const UHashElement* element;
880 while ((element = uhash_nextElement(result->unitsByVariant, &pos)) != NULL) {
881 CDFUnit* variantsByBase = (CDFUnit*) element->value.pointer;
882 if (variantsByBase == otherByBase) continue;
883 for (int32_t log10Value = 0; log10Value < MAX_DIGITS; ++log10Value) {
884 if (variantsByBase[log10Value].isSet()
885 && !otherByBase[log10Value].isSet()) {
886 status = U_INTERNAL_PROGRAM_ERROR;
887 return;
888 }
889 }
890 }
891 }
892
893 // fillInMissing ensures that the data in result is complete.
894 // result data is complete if for each variant in result, there exists
895 // a prefix-suffix pair for each log10 value and there also exists
896 // a divisor for each log10 value.
897 //
898 // First this function figures out for which log10 values, the other
899 // variant already had data. These are the same log10 values defined
900 // in CLDR.
901 //
902 // For each log10 value not defined in CLDR, it uses the divisor for
903 // the last defined log10 value or 1.
904 //
905 // Then for each variant, it does the following. For each log10
906 // value not defined in CLDR, copy the prefix-suffix pair from the
907 // previous log10 value. If log10 value is defined in CLDR but is
908 // missing from given variant, copy the prefix-suffix pair for that
909 // log10 value from the 'other' variant.
fillInMissing(CDFLocaleStyleData * result)910 static void fillInMissing(CDFLocaleStyleData* result) {
911 const CDFUnit* otherUnits =
912 (const CDFUnit*) uhash_get(result->unitsByVariant, gOther);
913 UBool definedInCLDR[MAX_DIGITS];
914 double lastDivisor = 1.0;
915 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
916 if (!otherUnits[i].isSet()) {
917 result->divisors[i] = lastDivisor;
918 definedInCLDR[i] = FALSE;
919 } else {
920 lastDivisor = result->divisors[i];
921 definedInCLDR[i] = TRUE;
922 }
923 }
924 // Iterate over each variant.
925 int32_t pos = UHASH_FIRST;
926 const UHashElement* element = uhash_nextElement(result->unitsByVariant, &pos);
927 for (;element != NULL; element = uhash_nextElement(result->unitsByVariant, &pos)) {
928 CDFUnit* units = (CDFUnit*) element->value.pointer;
929 for (int32_t i = 0; i < MAX_DIGITS; ++i) {
930 if (definedInCLDR[i]) {
931 if (!units[i].isSet()) {
932 units[i] = otherUnits[i];
933 }
934 } else {
935 if (i == 0) {
936 units[0].markAsSet();
937 } else {
938 units[i] = units[i - 1];
939 }
940 }
941 }
942 }
943 }
944
945 // computeLog10 computes floor(log10(x)). If inRange is TRUE, the biggest
946 // value computeLog10 will return MAX_DIGITS -1 even for
947 // numbers > 10^MAX_DIGITS. If inRange is FALSE, computeLog10 will return
948 // up to MAX_DIGITS.
computeLog10(double x,UBool inRange)949 static int32_t computeLog10(double x, UBool inRange) {
950 int32_t result = 0;
951 int32_t max = inRange ? MAX_DIGITS - 1 : MAX_DIGITS;
952 while (x >= 10.0) {
953 x /= 10.0;
954 ++result;
955 if (result == max) {
956 break;
957 }
958 }
959 return result;
960 }
961
962 // createCDFUnit returns a pointer to the prefix-suffix pair for a given
963 // variant and log10 value within table. If no such prefix-suffix pair is
964 // stored in table, one is created within table before returning pointer.
createCDFUnit(const char * variant,int32_t log10Value,UHashtable * table,UErrorCode & status)965 static CDFUnit* createCDFUnit(const char* variant, int32_t log10Value, UHashtable* table, UErrorCode& status) {
966 if (U_FAILURE(status)) {
967 return NULL;
968 }
969 CDFUnit *cdfUnit = (CDFUnit*) uhash_get(table, variant);
970 if (cdfUnit == NULL) {
971 cdfUnit = new CDFUnit[MAX_DIGITS];
972 if (cdfUnit == NULL) {
973 status = U_MEMORY_ALLOCATION_ERROR;
974 return NULL;
975 }
976 uhash_put(table, uprv_strdup(variant), cdfUnit, &status);
977 if (U_FAILURE(status)) {
978 return NULL;
979 }
980 }
981 CDFUnit* result = &cdfUnit[log10Value];
982 return result;
983 }
984
985 // getCDFUnitFallback returns a pointer to the prefix-suffix pair for a given
986 // variant and log10 value within table. If the given variant doesn't exist, it
987 // falls back to the OTHER variant. Therefore, this method will always return
988 // some non-NULL value.
getCDFUnitFallback(const UHashtable * table,const UnicodeString & variant,int32_t log10Value)989 static const CDFUnit* getCDFUnitFallback(const UHashtable* table, const UnicodeString& variant, int32_t log10Value) {
990 CharString cvariant;
991 UErrorCode status = U_ZERO_ERROR;
992 const CDFUnit *cdfUnit = NULL;
993 cvariant.appendInvariantChars(variant, status);
994 if (!U_FAILURE(status)) {
995 cdfUnit = (const CDFUnit*) uhash_get(table, cvariant.data());
996 }
997 if (cdfUnit == NULL) {
998 cdfUnit = (const CDFUnit*) uhash_get(table, gOther);
999 }
1000 return &cdfUnit[log10Value];
1001 }
1002
1003 U_NAMESPACE_END
1004 #endif
1005