1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "cstring.h"
9 #include "unicode/ures.h"
10 #include "uresimp.h"
11 #include "charstr.h"
12 #include "number_formatimpl.h"
13 #include "unicode/numfmt.h"
14 #include "number_patternstring.h"
15 #include "number_utils.h"
16 #include "unicode/numberformatter.h"
17 #include "unicode/dcfmtsym.h"
18 #include "number_scientific.h"
19 #include "number_compact.h"
20 #include "uresimp.h"
21 #include "ureslocs.h"
22
23 using namespace icu;
24 using namespace icu::number;
25 using namespace icu::number::impl;
26
27 namespace {
28
29 struct CurrencyFormatInfoResult {
30 bool exists;
31 const char16_t* pattern;
32 const char16_t* decimalSeparator;
33 const char16_t* groupingSeparator;
34 };
35
36 CurrencyFormatInfoResult
getCurrencyFormatInfo(const Locale & locale,const char * isoCode,UErrorCode & status)37 getCurrencyFormatInfo(const Locale& locale, const char* isoCode, UErrorCode& status) {
38 // TODO: Load this data in a centralized location like ICU4J?
39 // TODO: Move this into the CurrencySymbols class?
40 // TODO: Parts of this same data are loaded in dcfmtsym.cpp; should clean up.
41 CurrencyFormatInfoResult result = {false, nullptr, nullptr, nullptr};
42 if (U_FAILURE(status)) { return result; }
43 CharString key;
44 key.append("Currencies/", status);
45 key.append(isoCode, status);
46 UErrorCode localStatus = status;
47 LocalUResourceBundlePointer bundle(ures_open(U_ICUDATA_CURR, locale.getName(), &localStatus));
48 ures_getByKeyWithFallback(bundle.getAlias(), key.data(), bundle.getAlias(), &localStatus);
49 if (U_SUCCESS(localStatus) &&
50 ures_getSize(bundle.getAlias()) > 2) { // the length is 3 if more data is present
51 ures_getByIndex(bundle.getAlias(), 2, bundle.getAlias(), &localStatus);
52 int32_t dummy;
53 result.exists = true;
54 result.pattern = ures_getStringByIndex(bundle.getAlias(), 0, &dummy, &localStatus);
55 result.decimalSeparator = ures_getStringByIndex(bundle.getAlias(), 1, &dummy, &localStatus);
56 result.groupingSeparator = ures_getStringByIndex(bundle.getAlias(), 2, &dummy, &localStatus);
57 status = localStatus;
58 } else if (localStatus != U_MISSING_RESOURCE_ERROR) {
59 status = localStatus;
60 }
61 return result;
62 }
63
64 } // namespace
65
66
67 MicroPropsGenerator::~MicroPropsGenerator() = default;
68
69
NumberFormatterImpl(const MacroProps & macros,UErrorCode & status)70 NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, UErrorCode& status)
71 : NumberFormatterImpl(macros, true, status) {
72 }
73
formatStatic(const MacroProps & macros,DecimalQuantity & inValue,NumberStringBuilder & outString,UErrorCode & status)74 int32_t NumberFormatterImpl::formatStatic(const MacroProps& macros, DecimalQuantity& inValue,
75 NumberStringBuilder& outString, UErrorCode& status) {
76 NumberFormatterImpl impl(macros, false, status);
77 MicroProps& micros = impl.preProcessUnsafe(inValue, status);
78 if (U_FAILURE(status)) { return 0; }
79 int32_t length = writeNumber(micros, inValue, outString, 0, status);
80 length += writeAffixes(micros, outString, 0, length, status);
81 return length;
82 }
83
getPrefixSuffixStatic(const MacroProps & macros,int8_t signum,StandardPlural::Form plural,NumberStringBuilder & outString,UErrorCode & status)84 int32_t NumberFormatterImpl::getPrefixSuffixStatic(const MacroProps& macros, int8_t signum,
85 StandardPlural::Form plural,
86 NumberStringBuilder& outString, UErrorCode& status) {
87 NumberFormatterImpl impl(macros, false, status);
88 return impl.getPrefixSuffixUnsafe(signum, plural, outString, status);
89 }
90
91 // NOTE: C++ SPECIFIC DIFFERENCE FROM JAVA:
92 // The "safe" apply method uses a new MicroProps. In the MicroPropsGenerator, fMicros is copied into the new instance.
93 // The "unsafe" method simply re-uses fMicros, eliminating the extra copy operation.
94 // See MicroProps::processQuantity() for details.
95
format(DecimalQuantity & inValue,NumberStringBuilder & outString,UErrorCode & status) const96 int32_t NumberFormatterImpl::format(DecimalQuantity& inValue, NumberStringBuilder& outString,
97 UErrorCode& status) const {
98 MicroProps micros;
99 preProcess(inValue, micros, status);
100 if (U_FAILURE(status)) { return 0; }
101 int32_t length = writeNumber(micros, inValue, outString, 0, status);
102 length += writeAffixes(micros, outString, 0, length, status);
103 return length;
104 }
105
preProcess(DecimalQuantity & inValue,MicroProps & microsOut,UErrorCode & status) const106 void NumberFormatterImpl::preProcess(DecimalQuantity& inValue, MicroProps& microsOut,
107 UErrorCode& status) const {
108 if (U_FAILURE(status)) { return; }
109 if (fMicroPropsGenerator == nullptr) {
110 status = U_INTERNAL_PROGRAM_ERROR;
111 return;
112 }
113 fMicroPropsGenerator->processQuantity(inValue, microsOut, status);
114 microsOut.rounder.apply(inValue, status);
115 microsOut.integerWidth.apply(inValue, status);
116 }
117
preProcessUnsafe(DecimalQuantity & inValue,UErrorCode & status)118 MicroProps& NumberFormatterImpl::preProcessUnsafe(DecimalQuantity& inValue, UErrorCode& status) {
119 if (U_FAILURE(status)) {
120 return fMicros; // must always return a value
121 }
122 if (fMicroPropsGenerator == nullptr) {
123 status = U_INTERNAL_PROGRAM_ERROR;
124 return fMicros; // must always return a value
125 }
126 fMicroPropsGenerator->processQuantity(inValue, fMicros, status);
127 fMicros.rounder.apply(inValue, status);
128 fMicros.integerWidth.apply(inValue, status);
129 return fMicros;
130 }
131
getPrefixSuffix(int8_t signum,StandardPlural::Form plural,NumberStringBuilder & outString,UErrorCode & status) const132 int32_t NumberFormatterImpl::getPrefixSuffix(int8_t signum, StandardPlural::Form plural,
133 NumberStringBuilder& outString, UErrorCode& status) const {
134 if (U_FAILURE(status)) { return 0; }
135 // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
136 // Safe path: use fImmutablePatternModifier.
137 const Modifier* modifier = fImmutablePatternModifier->getModifier(signum, plural);
138 modifier->apply(outString, 0, 0, status);
139 if (U_FAILURE(status)) { return 0; }
140 return modifier->getPrefixLength();
141 }
142
getPrefixSuffixUnsafe(int8_t signum,StandardPlural::Form plural,NumberStringBuilder & outString,UErrorCode & status)143 int32_t NumberFormatterImpl::getPrefixSuffixUnsafe(int8_t signum, StandardPlural::Form plural,
144 NumberStringBuilder& outString, UErrorCode& status) {
145 if (U_FAILURE(status)) { return 0; }
146 // #13453: DecimalFormat wants the affixes from the pattern only (modMiddle, aka pattern modifier).
147 // Unsafe path: use fPatternModifier.
148 fPatternModifier->setNumberProperties(signum, plural);
149 fPatternModifier->apply(outString, 0, 0, status);
150 if (U_FAILURE(status)) { return 0; }
151 return fPatternModifier->getPrefixLength();
152 }
153
NumberFormatterImpl(const MacroProps & macros,bool safe,UErrorCode & status)154 NumberFormatterImpl::NumberFormatterImpl(const MacroProps& macros, bool safe, UErrorCode& status) {
155 fMicroPropsGenerator = macrosToMicroGenerator(macros, safe, status);
156 }
157
158 //////////
159
160 const MicroPropsGenerator*
macrosToMicroGenerator(const MacroProps & macros,bool safe,UErrorCode & status)161 NumberFormatterImpl::macrosToMicroGenerator(const MacroProps& macros, bool safe, UErrorCode& status) {
162 if (U_FAILURE(status)) { return nullptr; }
163 const MicroPropsGenerator* chain = &fMicros;
164
165 // Check that macros is error-free before continuing.
166 if (macros.copyErrorTo(status)) {
167 return nullptr;
168 }
169
170 // TODO: Accept currency symbols from DecimalFormatSymbols?
171
172 // Pre-compute a few values for efficiency.
173 bool isCurrency = utils::unitIsCurrency(macros.unit);
174 bool isNoUnit = utils::unitIsNoUnit(macros.unit);
175 bool isPercent = isNoUnit && utils::unitIsPercent(macros.unit);
176 bool isPermille = isNoUnit && utils::unitIsPermille(macros.unit);
177 bool isCldrUnit = !isCurrency && !isNoUnit;
178 bool isAccounting =
179 macros.sign == UNUM_SIGN_ACCOUNTING || macros.sign == UNUM_SIGN_ACCOUNTING_ALWAYS ||
180 macros.sign == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO;
181 CurrencyUnit currency(nullptr, status);
182 if (isCurrency) {
183 currency = CurrencyUnit(macros.unit, status); // Restore CurrencyUnit from MeasureUnit
184 }
185 const CurrencySymbols* currencySymbols;
186 if (macros.currencySymbols != nullptr) {
187 // Used by the DecimalFormat code path
188 currencySymbols = macros.currencySymbols;
189 } else {
190 fWarehouse.fCurrencySymbols = {currency, macros.locale, status};
191 currencySymbols = &fWarehouse.fCurrencySymbols;
192 }
193 UNumberUnitWidth unitWidth = UNUM_UNIT_WIDTH_SHORT;
194 if (macros.unitWidth != UNUM_UNIT_WIDTH_COUNT) {
195 unitWidth = macros.unitWidth;
196 }
197
198 // Select the numbering system.
199 LocalPointer<const NumberingSystem> nsLocal;
200 const NumberingSystem* ns;
201 if (macros.symbols.isNumberingSystem()) {
202 ns = macros.symbols.getNumberingSystem();
203 } else {
204 // TODO: Is there a way to avoid creating the NumberingSystem object?
205 ns = NumberingSystem::createInstance(macros.locale, status);
206 // Give ownership to the function scope.
207 nsLocal.adoptInstead(ns);
208 }
209 const char* nsName = U_SUCCESS(status) ? ns->getName() : "latn";
210
211 // Resolve the symbols. Do this here because currency may need to customize them.
212 if (macros.symbols.isDecimalFormatSymbols()) {
213 fMicros.symbols = macros.symbols.getDecimalFormatSymbols();
214 } else {
215 fMicros.symbols = new DecimalFormatSymbols(macros.locale, *ns, status);
216 // Give ownership to the NumberFormatterImpl.
217 fSymbols.adoptInstead(fMicros.symbols);
218 }
219
220 // Load and parse the pattern string. It is used for grouping sizes and affixes only.
221 // If we are formatting currency, check for a currency-specific pattern.
222 const char16_t* pattern = nullptr;
223 if (isCurrency) {
224 CurrencyFormatInfoResult info = getCurrencyFormatInfo(
225 macros.locale, currency.getSubtype(), status);
226 if (info.exists) {
227 pattern = info.pattern;
228 // It's clunky to clone an object here, but this code is not frequently executed.
229 auto* symbols = new DecimalFormatSymbols(*fMicros.symbols);
230 fMicros.symbols = symbols;
231 fSymbols.adoptInstead(symbols);
232 symbols->setSymbol(
233 DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol,
234 UnicodeString(info.decimalSeparator),
235 FALSE);
236 symbols->setSymbol(
237 DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol,
238 UnicodeString(info.groupingSeparator),
239 FALSE);
240 }
241 }
242 if (pattern == nullptr) {
243 CldrPatternStyle patternStyle;
244 if (isPercent || isPermille) {
245 patternStyle = CLDR_PATTERN_STYLE_PERCENT;
246 } else if (!isCurrency || unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
247 patternStyle = CLDR_PATTERN_STYLE_DECIMAL;
248 } else if (isAccounting) {
249 // NOTE: Although ACCOUNTING and ACCOUNTING_ALWAYS are only supported in currencies right now,
250 // the API contract allows us to add support to other units in the future.
251 patternStyle = CLDR_PATTERN_STYLE_ACCOUNTING;
252 } else {
253 patternStyle = CLDR_PATTERN_STYLE_CURRENCY;
254 }
255 pattern = utils::getPatternForStyle(macros.locale, nsName, patternStyle, status);
256 }
257 auto patternInfo = new ParsedPatternInfo();
258 fPatternInfo.adoptInstead(patternInfo);
259 PatternParser::parseToPatternInfo(UnicodeString(pattern), *patternInfo, status);
260
261 /////////////////////////////////////////////////////////////////////////////////////
262 /// START POPULATING THE DEFAULT MICROPROPS AND BUILDING THE MICROPROPS GENERATOR ///
263 /////////////////////////////////////////////////////////////////////////////////////
264
265 // Multiplier
266 if (macros.scale.isValid()) {
267 fMicros.helpers.multiplier.setAndChain(macros.scale, chain);
268 chain = &fMicros.helpers.multiplier;
269 }
270
271 // Rounding strategy
272 Precision precision;
273 if (!macros.precision.isBogus()) {
274 precision = macros.precision;
275 } else if (macros.notation.fType == Notation::NTN_COMPACT) {
276 precision = Precision::integer().withMinDigits(2);
277 } else if (isCurrency) {
278 precision = Precision::currency(UCURR_USAGE_STANDARD);
279 } else {
280 precision = Precision::maxFraction(6);
281 }
282 UNumberFormatRoundingMode roundingMode;
283 if (macros.roundingMode != kDefaultMode) {
284 roundingMode = macros.roundingMode;
285 } else {
286 // Temporary until ICU 64
287 roundingMode = precision.fRoundingMode;
288 }
289 fMicros.rounder = {precision, roundingMode, currency, status};
290
291 // Grouping strategy
292 if (!macros.grouper.isBogus()) {
293 fMicros.grouping = macros.grouper;
294 } else if (macros.notation.fType == Notation::NTN_COMPACT) {
295 // Compact notation uses minGrouping by default since ICU 59
296 fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_MIN2);
297 } else {
298 fMicros.grouping = Grouper::forStrategy(UNUM_GROUPING_AUTO);
299 }
300 fMicros.grouping.setLocaleData(*fPatternInfo, macros.locale);
301
302 // Padding strategy
303 if (!macros.padder.isBogus()) {
304 fMicros.padding = macros.padder;
305 } else {
306 fMicros.padding = Padder::none();
307 }
308
309 // Integer width
310 if (!macros.integerWidth.isBogus()) {
311 fMicros.integerWidth = macros.integerWidth;
312 } else {
313 fMicros.integerWidth = IntegerWidth::standard();
314 }
315
316 // Sign display
317 if (macros.sign != UNUM_SIGN_COUNT) {
318 fMicros.sign = macros.sign;
319 } else {
320 fMicros.sign = UNUM_SIGN_AUTO;
321 }
322
323 // Decimal mark display
324 if (macros.decimal != UNUM_DECIMAL_SEPARATOR_COUNT) {
325 fMicros.decimal = macros.decimal;
326 } else {
327 fMicros.decimal = UNUM_DECIMAL_SEPARATOR_AUTO;
328 }
329
330 // Use monetary separator symbols
331 fMicros.useCurrency = isCurrency;
332
333 // Inner modifier (scientific notation)
334 if (macros.notation.fType == Notation::NTN_SCIENTIFIC) {
335 fScientificHandler.adoptInstead(new ScientificHandler(¯os.notation, fMicros.symbols, chain));
336 chain = fScientificHandler.getAlias();
337 } else {
338 // No inner modifier required
339 fMicros.modInner = &fMicros.helpers.emptyStrongModifier;
340 }
341
342 // Middle modifier (patterns, positive/negative, currency symbols, percent)
343 auto patternModifier = new MutablePatternModifier(false);
344 fPatternModifier.adoptInstead(patternModifier);
345 patternModifier->setPatternInfo(
346 macros.affixProvider != nullptr ? macros.affixProvider
347 : static_cast<const AffixPatternProvider*>(fPatternInfo.getAlias()));
348 patternModifier->setPatternAttributes(fMicros.sign, isPermille);
349 if (patternModifier->needsPlurals()) {
350 patternModifier->setSymbols(
351 fMicros.symbols,
352 currencySymbols,
353 unitWidth,
354 resolvePluralRules(macros.rules, macros.locale, status));
355 } else {
356 patternModifier->setSymbols(fMicros.symbols, currencySymbols, unitWidth, nullptr);
357 }
358 if (safe) {
359 fImmutablePatternModifier.adoptInstead(patternModifier->createImmutableAndChain(chain, status));
360 chain = fImmutablePatternModifier.getAlias();
361 } else {
362 patternModifier->addToChain(chain);
363 chain = patternModifier;
364 }
365
366 // Outer modifier (CLDR units and currency long names)
367 if (isCldrUnit) {
368 fLongNameHandler.adoptInstead(
369 LongNameHandler::forMeasureUnit(
370 macros.locale,
371 macros.unit,
372 macros.perUnit,
373 unitWidth,
374 resolvePluralRules(macros.rules, macros.locale, status),
375 chain,
376 status));
377 chain = fLongNameHandler.getAlias();
378 } else if (isCurrency && unitWidth == UNUM_UNIT_WIDTH_FULL_NAME) {
379 fLongNameHandler.adoptInstead(
380 LongNameHandler::forCurrencyLongNames(
381 macros.locale,
382 currency,
383 resolvePluralRules(macros.rules, macros.locale, status),
384 chain,
385 status));
386 chain = fLongNameHandler.getAlias();
387 } else {
388 // No outer modifier required
389 fMicros.modOuter = &fMicros.helpers.emptyWeakModifier;
390 }
391
392 // Compact notation
393 // NOTE: Compact notation can (but might not) override the middle modifier and rounding.
394 // It therefore needs to go at the end of the chain.
395 if (macros.notation.fType == Notation::NTN_COMPACT) {
396 CompactType compactType = (isCurrency && unitWidth != UNUM_UNIT_WIDTH_FULL_NAME)
397 ? CompactType::TYPE_CURRENCY : CompactType::TYPE_DECIMAL;
398 fCompactHandler.adoptInstead(
399 new CompactHandler(
400 macros.notation.fUnion.compactStyle,
401 macros.locale,
402 nsName,
403 compactType,
404 resolvePluralRules(macros.rules, macros.locale, status),
405 safe ? patternModifier : nullptr,
406 chain,
407 status));
408 chain = fCompactHandler.getAlias();
409 }
410
411 return chain;
412 }
413
414 const PluralRules*
resolvePluralRules(const PluralRules * rulesPtr,const Locale & locale,UErrorCode & status)415 NumberFormatterImpl::resolvePluralRules(const PluralRules* rulesPtr, const Locale& locale,
416 UErrorCode& status) {
417 if (rulesPtr != nullptr) {
418 return rulesPtr;
419 }
420 // Lazily create PluralRules
421 if (fRules.isNull()) {
422 fRules.adoptInstead(PluralRules::forLocale(locale, status));
423 }
424 return fRules.getAlias();
425 }
426
writeAffixes(const MicroProps & micros,NumberStringBuilder & string,int32_t start,int32_t end,UErrorCode & status)427 int32_t NumberFormatterImpl::writeAffixes(const MicroProps& micros, NumberStringBuilder& string,
428 int32_t start, int32_t end, UErrorCode& status) {
429 // Always apply the inner modifier (which is "strong").
430 int32_t length = micros.modInner->apply(string, start, end, status);
431 if (micros.padding.isValid()) {
432 length += micros.padding
433 .padAndApply(*micros.modMiddle, *micros.modOuter, string, start, length + end, status);
434 } else {
435 length += micros.modMiddle->apply(string, start, length + end, status);
436 length += micros.modOuter->apply(string, start, length + end, status);
437 }
438 return length;
439 }
440
writeNumber(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,int32_t index,UErrorCode & status)441 int32_t NumberFormatterImpl::writeNumber(const MicroProps& micros, DecimalQuantity& quantity,
442 NumberStringBuilder& string, int32_t index,
443 UErrorCode& status) {
444 int32_t length = 0;
445 if (quantity.isInfinite()) {
446 length += string.insert(
447 length + index,
448 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kInfinitySymbol),
449 UNUM_INTEGER_FIELD,
450 status);
451
452 } else if (quantity.isNaN()) {
453 length += string.insert(
454 length + index,
455 micros.symbols->getSymbol(DecimalFormatSymbols::ENumberFormatSymbol::kNaNSymbol),
456 UNUM_INTEGER_FIELD,
457 status);
458
459 } else {
460 // Add the integer digits
461 length += writeIntegerDigits(micros, quantity, string, length + index, status);
462
463 // Add the decimal point
464 if (quantity.getLowerDisplayMagnitude() < 0 || micros.decimal == UNUM_DECIMAL_SEPARATOR_ALWAYS) {
465 length += string.insert(
466 length + index,
467 micros.useCurrency ? micros.symbols->getSymbol(
468 DecimalFormatSymbols::ENumberFormatSymbol::kMonetarySeparatorSymbol) : micros
469 .symbols
470 ->getSymbol(
471 DecimalFormatSymbols::ENumberFormatSymbol::kDecimalSeparatorSymbol),
472 UNUM_DECIMAL_SEPARATOR_FIELD,
473 status);
474 }
475
476 // Add the fraction digits
477 length += writeFractionDigits(micros, quantity, string, length + index, status);
478 }
479
480 return length;
481 }
482
writeIntegerDigits(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,int32_t index,UErrorCode & status)483 int32_t NumberFormatterImpl::writeIntegerDigits(const MicroProps& micros, DecimalQuantity& quantity,
484 NumberStringBuilder& string, int32_t index,
485 UErrorCode& status) {
486 int length = 0;
487 int integerCount = quantity.getUpperDisplayMagnitude() + 1;
488 for (int i = 0; i < integerCount; i++) {
489 // Add grouping separator
490 if (micros.grouping.groupAtPosition(i, quantity)) {
491 length += string.insert(
492 index,
493 micros.useCurrency ? micros.symbols->getSymbol(
494 DecimalFormatSymbols::ENumberFormatSymbol::kMonetaryGroupingSeparatorSymbol)
495 : micros.symbols->getSymbol(
496 DecimalFormatSymbols::ENumberFormatSymbol::kGroupingSeparatorSymbol),
497 UNUM_GROUPING_SEPARATOR_FIELD,
498 status);
499 }
500
501 // Get and append the next digit value
502 int8_t nextDigit = quantity.getDigit(i);
503 length += utils::insertDigitFromSymbols(
504 string, index, nextDigit, *micros.symbols, UNUM_INTEGER_FIELD, status);
505 }
506 return length;
507 }
508
writeFractionDigits(const MicroProps & micros,DecimalQuantity & quantity,NumberStringBuilder & string,int32_t index,UErrorCode & status)509 int32_t NumberFormatterImpl::writeFractionDigits(const MicroProps& micros, DecimalQuantity& quantity,
510 NumberStringBuilder& string, int32_t index,
511 UErrorCode& status) {
512 int length = 0;
513 int fractionCount = -quantity.getLowerDisplayMagnitude();
514 for (int i = 0; i < fractionCount; i++) {
515 // Get and append the next digit value
516 int8_t nextDigit = quantity.getDigit(-i - 1);
517 length += utils::insertDigitFromSymbols(
518 string, length + index, nextDigit, *micros.symbols, UNUM_FRACTION_FIELD, status);
519 }
520 return length;
521 }
522
523 #endif /* #if !UCONFIG_NO_FORMATTING */
524