• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12 
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16 
17 namespace {
18 
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21 
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce {};
26 
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTSZ = nullptr;
29 
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31     delete UNISET_DIGIT;
32     UNISET_DIGIT = nullptr;
33     delete UNISET_NOTSZ;
34     UNISET_NOTSZ = nullptr;
35     gDefaultCurrencySpacingInitOnce.reset();
36     return true;
37 }
38 
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40     ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41     UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42     UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43     if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44         status = U_MEMORY_ALLOCATION_ERROR;
45         return;
46     }
47     UNISET_DIGIT->freeze();
48     UNISET_NOTSZ->freeze();
49 }
50 
51 }  // namespace
52 
53 
54 Modifier::~Modifier() = default;
55 
Parameters()56 Modifier::Parameters::Parameters()
57         : obj(nullptr) {}
58 
Parameters(const ModifierStore * _obj,Signum _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60     const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61         : obj(_obj), signum(_signum), plural(_plural) {}
62 
semanticallyEquivalent(const Modifier & other) const63 bool Modifier::semanticallyEquivalent(const Modifier& other) const {
64     Parameters paramsThis;
65     Parameters paramsOther;
66     getParameters(paramsThis);
67     other.getParameters(paramsOther);
68     if (paramsThis.obj == nullptr && paramsOther.obj == nullptr) {
69         return strictEquals(other);
70     } else if (paramsThis.obj == nullptr || paramsOther.obj == nullptr) {
71         return false;
72     }
73     for (size_t i=0; i<SIGNUM_COUNT; i++) {
74         auto signum = static_cast<Signum>(i);
75         for (size_t j=0; j<StandardPlural::COUNT; j++) {
76             auto plural = static_cast<StandardPlural::Form>(j);
77             const auto* mod1 = paramsThis.obj->getModifier(signum, plural);
78             const auto* mod2 = paramsOther.obj->getModifier(signum, plural);
79             if (mod1 == mod2) {
80                 // Equal pointers
81                 continue;
82             } else if (mod1 == nullptr || mod2 == nullptr) {
83                 // One pointer is null but not the other
84                 return false;
85             } else if (!mod1->strictEquals(*mod2)) {
86                 // The modifiers are NOT equivalent
87                 return false;
88             } else {
89                 // The modifiers are equivalent
90                 continue;
91             }
92         }
93     }
94     return true;
95 }
96 
97 
98 ModifierStore::~ModifierStore() = default;
99 
~AdoptingSignumModifierStore()100 AdoptingSignumModifierStore::~AdoptingSignumModifierStore()  {
101     for (const Modifier *mod : mods) {
102         delete mod;
103     }
104 }
105 
106 AdoptingSignumModifierStore&
operator =(AdoptingSignumModifierStore && other)107 AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept {
108     for (size_t i=0; i<SIGNUM_COUNT; i++) {
109         this->mods[i] = other.mods[i];
110         other.mods[i] = nullptr;
111     }
112     return *this;
113 }
114 
115 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const116 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
117                                      UErrorCode &status) const {
118     // Insert the suffix first since inserting the prefix will change the rightIndex
119     int length = output.insert(rightIndex, fSuffix, fField, status);
120     length += output.insert(leftIndex, fPrefix, fField, status);
121     return length;
122 }
123 
getPrefixLength() const124 int32_t ConstantAffixModifier::getPrefixLength() const {
125     return fPrefix.length();
126 }
127 
getCodePointCount() const128 int32_t ConstantAffixModifier::getCodePointCount() const {
129     return fPrefix.countChar32() + fSuffix.countChar32();
130 }
131 
isStrong() const132 bool ConstantAffixModifier::isStrong() const {
133     return fStrong;
134 }
135 
containsField(Field field) const136 bool ConstantAffixModifier::containsField(Field field) const {
137     (void)field;
138     // This method is not currently used.
139     UPRV_UNREACHABLE_EXIT;
140 }
141 
getParameters(Parameters & output) const142 void ConstantAffixModifier::getParameters(Parameters& output) const {
143     (void)output;
144     // This method is not currently used.
145     UPRV_UNREACHABLE_EXIT;
146 }
147 
strictEquals(const Modifier & other) const148 bool ConstantAffixModifier::strictEquals(const Modifier& other) const {
149     const auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
150     if (_other == nullptr) {
151         return false;
152     }
153     return fPrefix == _other->fPrefix
154         && fSuffix == _other->fSuffix
155         && fField == _other->fField
156         && fStrong == _other->fStrong;
157 }
158 
159 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)160 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
161         : SimpleModifier(simpleFormatter, field, strong, {}) {}
162 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)163 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
164                                const Modifier::Parameters parameters)
165         : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
166           fParameters(parameters) {
167     int32_t argLimit = SimpleFormatter::getArgumentLimit(
168             fCompiledPattern.getBuffer(), fCompiledPattern.length());
169     if (argLimit == 0) {
170         // No arguments in compiled pattern
171         fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
172         U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
173         // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
174         fSuffixOffset = -1;
175         fSuffixLength = 0;
176     } else {
177         U_ASSERT(argLimit == 1);
178         if (fCompiledPattern.charAt(1) != 0) {
179             // Found prefix
180             fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
181             fSuffixOffset = 3 + fPrefixLength;
182         } else {
183             // No prefix
184             fPrefixLength = 0;
185             fSuffixOffset = 2;
186         }
187         if (3 + fPrefixLength < fCompiledPattern.length()) {
188             // Found suffix
189             fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
190         } else {
191             // No suffix
192             fSuffixLength = 0;
193         }
194     }
195 }
196 
SimpleModifier()197 SimpleModifier::SimpleModifier()
198         : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
199 }
200 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const201 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
202                               UErrorCode &status) const {
203     return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
204 }
205 
getPrefixLength() const206 int32_t SimpleModifier::getPrefixLength() const {
207     return fPrefixLength;
208 }
209 
getCodePointCount() const210 int32_t SimpleModifier::getCodePointCount() const {
211     int32_t count = 0;
212     if (fPrefixLength > 0) {
213         count += fCompiledPattern.countChar32(2, fPrefixLength);
214     }
215     if (fSuffixLength > 0) {
216         count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
217     }
218     return count;
219 }
220 
isStrong() const221 bool SimpleModifier::isStrong() const {
222     return fStrong;
223 }
224 
containsField(Field field) const225 bool SimpleModifier::containsField(Field field) const {
226     (void)field;
227     // This method is not currently used.
228     UPRV_UNREACHABLE_EXIT;
229 }
230 
getParameters(Parameters & output) const231 void SimpleModifier::getParameters(Parameters& output) const {
232     output = fParameters;
233 }
234 
strictEquals(const Modifier & other) const235 bool SimpleModifier::strictEquals(const Modifier& other) const {
236     const auto* _other = dynamic_cast<const SimpleModifier*>(&other);
237     if (_other == nullptr) {
238         return false;
239     }
240     return fCompiledPattern == _other->fCompiledPattern
241         && fField == _other->fField
242         && fStrong == _other->fStrong;
243 }
244 
245 
246 int32_t
formatAsPrefixSuffix(FormattedStringBuilder & result,int32_t startIndex,int32_t endIndex,UErrorCode & status) const247 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
248                                      UErrorCode &status) const {
249     if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
250         // There is no argument for the inner number; overwrite the entire segment with our string.
251         return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
252     } else {
253         if (fPrefixLength > 0) {
254             result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
255         }
256         if (fSuffixLength > 0) {
257             result.insert(
258                     endIndex + fPrefixLength,
259                     fCompiledPattern,
260                     1 + fSuffixOffset,
261                     1 + fSuffixOffset + fSuffixLength,
262                     fField,
263                     status);
264         }
265         return fPrefixLength + fSuffixLength;
266     }
267 }
268 
269 
270 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,FormattedStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)271 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
272                                     int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
273                                     Field field, UErrorCode& status) {
274     const UnicodeString& compiledPattern = compiled.compiledPattern;
275     int32_t argLimit = SimpleFormatter::getArgumentLimit(
276             compiledPattern.getBuffer(), compiledPattern.length());
277     if (argLimit != 2) {
278         status = U_INTERNAL_PROGRAM_ERROR;
279         return 0;
280     }
281     int32_t offset = 1; // offset into compiledPattern
282     int32_t length = 0; // chars added to result
283 
284     int32_t prefixLength = compiledPattern.charAt(offset);
285     offset++;
286     if (prefixLength < ARG_NUM_LIMIT) {
287         // No prefix
288         prefixLength = 0;
289     } else {
290         prefixLength -= ARG_NUM_LIMIT;
291         result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
292         offset += prefixLength;
293         length += prefixLength;
294         offset++;
295     }
296 
297     int32_t infixLength = compiledPattern.charAt(offset);
298     offset++;
299     if (infixLength < ARG_NUM_LIMIT) {
300         // No infix
301         infixLength = 0;
302     } else {
303         infixLength -= ARG_NUM_LIMIT;
304         result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
305         offset += infixLength;
306         length += infixLength;
307         offset++;
308     }
309 
310     int32_t suffixLength;
311     if (offset == compiledPattern.length()) {
312         // No suffix
313         suffixLength = 0;
314     } else {
315         suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
316         offset++;
317         result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
318         length += suffixLength;
319     }
320 
321     *outPrefixLength = prefixLength;
322     *outSuffixLength = suffixLength;
323 
324     return length;
325 }
326 
327 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const328 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
329                                           UErrorCode &status) const {
330     int32_t length = output.insert(leftIndex, fPrefix, status);
331     if (fOverwrite) {
332         length += output.splice(
333             leftIndex + length,
334             rightIndex + length,
335             UnicodeString(), 0, 0,
336             kUndefinedField, status);
337     }
338     length += output.insert(rightIndex + length, fSuffix, status);
339     return length;
340 }
341 
getPrefixLength() const342 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
343     return fPrefix.length();
344 }
345 
getCodePointCount() const346 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
347     return fPrefix.codePointCount() + fSuffix.codePointCount();
348 }
349 
isStrong() const350 bool ConstantMultiFieldModifier::isStrong() const {
351     return fStrong;
352 }
353 
containsField(Field field) const354 bool ConstantMultiFieldModifier::containsField(Field field) const {
355     return fPrefix.containsField(field) || fSuffix.containsField(field);
356 }
357 
getParameters(Parameters & output) const358 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
359     output = fParameters;
360 }
361 
strictEquals(const Modifier & other) const362 bool ConstantMultiFieldModifier::strictEquals(const Modifier& other) const {
363     const auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
364     if (_other == nullptr) {
365         return false;
366     }
367     return fPrefix.contentEquals(_other->fPrefix)
368         && fSuffix.contentEquals(_other->fSuffix)
369         && fOverwrite == _other->fOverwrite
370         && fStrong == _other->fStrong;
371 }
372 
373 
CurrencySpacingEnabledModifier(const FormattedStringBuilder & prefix,const FormattedStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)374 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
375                                                                const FormattedStringBuilder &suffix,
376                                                                bool overwrite,
377                                                                bool strong,
378                                                                const DecimalFormatSymbols &symbols,
379                                                                UErrorCode &status)
380         : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
381     // Check for currency spacing. Do not build the UnicodeSets unless there is
382     // a currency code point at a boundary.
383     if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
384         int prefixCp = prefix.getLastCodePoint();
385         UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
386         if (prefixUnicodeSet.contains(prefixCp)) {
387             fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
388             fAfterPrefixUnicodeSet.freeze();
389             fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
390         } else {
391             fAfterPrefixUnicodeSet.setToBogus();
392             fAfterPrefixInsert.setToBogus();
393         }
394     } else {
395         fAfterPrefixUnicodeSet.setToBogus();
396         fAfterPrefixInsert.setToBogus();
397     }
398     if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
399         int suffixCp = suffix.getFirstCodePoint();
400         UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
401         if (suffixUnicodeSet.contains(suffixCp)) {
402             fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
403             fBeforeSuffixUnicodeSet.freeze();
404             fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
405         } else {
406             fBeforeSuffixUnicodeSet.setToBogus();
407             fBeforeSuffixInsert.setToBogus();
408         }
409     } else {
410         fBeforeSuffixUnicodeSet.setToBogus();
411         fBeforeSuffixInsert.setToBogus();
412     }
413 }
414 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const415 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
416                                               UErrorCode &status) const {
417     // Currency spacing logic
418     int length = 0;
419     if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
420         fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
421         // TODO: Should we use the CURRENCY field here?
422         length += output.insert(
423             leftIndex,
424             fAfterPrefixInsert,
425             kUndefinedField,
426             status);
427     }
428     if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
429         fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
430         // TODO: Should we use the CURRENCY field here?
431         length += output.insert(
432             rightIndex + length,
433             fBeforeSuffixInsert,
434             kUndefinedField,
435             status);
436     }
437 
438     // Call super for the remaining logic
439     length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
440     return length;
441 }
442 
443 int32_t
applyCurrencySpacing(FormattedStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)444 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
445                                                      int32_t prefixLen, int32_t suffixStart,
446                                                      int32_t suffixLen,
447                                                      const DecimalFormatSymbols &symbols,
448                                                      UErrorCode &status) {
449     int length = 0;
450     bool hasPrefix = (prefixLen > 0);
451     bool hasSuffix = (suffixLen > 0);
452     bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
453     if (hasPrefix && hasNumber) {
454         length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
455     }
456     if (hasSuffix && hasNumber) {
457         length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
458     }
459     return length;
460 }
461 
462 int32_t
applyCurrencySpacingAffix(FormattedStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)463 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
464                                                           EAffix affix,
465                                                           const DecimalFormatSymbols &symbols,
466                                                           UErrorCode &status) {
467     // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
468     // This works even if the last code point in the prefix is 2 code units because the
469     // field value gets populated to both indices in the field array.
470     Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
471     if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
472         return 0;
473     }
474     int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
475     UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
476     if (!affixUniset.contains(affixCp)) {
477         return 0;
478     }
479     int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
480     UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
481     if (!numberUniset.contains(numberCp)) {
482         return 0;
483     }
484     UnicodeString spacingString = getInsertString(symbols, affix, status);
485 
486     // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
487     // It would be more efficient if this could be done before affixes were attached,
488     // so that it could be prepended/appended instead of inserted.
489     // However, the build code path is more efficient, and this is the most natural
490     // place to put currency spacing in the non-build code path.
491     // TODO: Should we use the CURRENCY field here?
492     return output.insert(index, spacingString, kUndefinedField, status);
493 }
494 
495 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)496 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
497                                               EAffix affix, UErrorCode &status) {
498     // Ensure the static defaults are initialized:
499     umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
500     if (U_FAILURE(status)) {
501         return {};
502     }
503 
504     const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
505             position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
506             affix == SUFFIX,
507             status);
508     if (pattern.compare(u"[:digit:]", -1) == 0) {
509         return *UNISET_DIGIT;
510     } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
511         return *UNISET_NOTSZ;
512     } else {
513         return UnicodeSet(pattern, status);
514     }
515 }
516 
517 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)518 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
519                                                 UErrorCode &status) {
520     return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
521 }
522 
523 #endif /* #if !UCONFIG_NO_FORMATTING */
524