• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "pluralranges.h"
16 #include "uresimp.h"
17 #include "util.h"
18 
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22 
23 namespace {
24 
25 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
27     return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
28 }
29 
30 
31 struct NumberRangeData {
32     SimpleFormatter rangePattern;
33     // Note: approximatelyPattern is unused since ICU 69.
34     // SimpleFormatter approximatelyPattern;
35 };
36 
37 class NumberRangeDataSink : public ResourceSink {
38   public:
NumberRangeDataSink(NumberRangeData & data)39     NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
40 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)41     void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override {
42         ResourceTable miscTable = value.getTable(status);
43         if (U_FAILURE(status)) { return; }
44         for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
45             if (uprv_strcmp(key, "range") == 0) {
46                 if (hasRangeData()) {
47                     continue; // have already seen this pattern
48                 }
49                 fData.rangePattern = {value.getUnicodeString(status), status};
50             }
51             /*
52             // Note: approximatelyPattern is unused since ICU 69.
53             else if (uprv_strcmp(key, "approximately") == 0) {
54                 if (hasApproxData()) {
55                     continue; // have already seen this pattern
56                 }
57                 fData.approximatelyPattern = {value.getUnicodeString(status), status};
58             }
59             */
60         }
61     }
62 
hasRangeData()63     bool hasRangeData() {
64         return fData.rangePattern.getArgumentLimit() != 0;
65     }
66 
67     /*
68     // Note: approximatelyPattern is unused since ICU 69.
69     bool hasApproxData() {
70         return fData.approximatelyPattern.getArgumentLimit() != 0;
71     }
72     */
73 
isComplete()74     bool isComplete() {
75         return hasRangeData() /* && hasApproxData() */;
76     }
77 
fillInDefaults(UErrorCode & status)78     void fillInDefaults(UErrorCode& status) {
79         if (!hasRangeData()) {
80             fData.rangePattern = {u"{0}–{1}", status};
81         }
82         /*
83         if (!hasApproxData()) {
84             fData.approximatelyPattern = {u"~{0}", status};
85         }
86         */
87     }
88 
89   private:
90     NumberRangeData& fData;
91 };
92 
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)93 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
94     if (U_FAILURE(status)) { return; }
95     LocalUResourceBundlePointer rb(ures_open(nullptr, localeName, &status));
96     if (U_FAILURE(status)) { return; }
97     NumberRangeDataSink sink(data);
98 
99     CharString dataPath;
100     dataPath.append("NumberElements/", -1, status);
101     dataPath.append(nsName, -1, status);
102     dataPath.append("/miscPatterns", -1, status);
103     if (U_FAILURE(status)) { return; }
104 
105     UErrorCode localStatus = U_ZERO_ERROR;
106     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
107     if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
108         status = localStatus;
109         return;
110     }
111 
112     // Fall back to latn if necessary
113     if (!sink.isComplete()) {
114         ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
115     }
116 
117     sink.fillInDefaults(status);
118 }
119 
120 } // namespace
121 
122 
123 
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)124 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
125     : formatterImpl1(macros.formatter1.fMacros, status),
126       formatterImpl2(macros.formatter2.fMacros, status),
127       fSameFormatters(macros.singleFormatter),
128       fCollapse(macros.collapse),
129       fIdentityFallback(macros.identityFallback),
130       fApproximatelyFormatter(status) {
131 
132     const char* nsName = formatterImpl1.getRawMicroProps().nsName;
133     if (!fSameFormatters && uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
134         status = U_ILLEGAL_ARGUMENT_ERROR;
135         return;
136     }
137 
138     NumberRangeData data;
139     getNumberRangeData(macros.locale.getName(), nsName, data, status);
140     if (U_FAILURE(status)) { return; }
141     fRangeFormatter = data.rangePattern;
142 
143     if (fSameFormatters && (
144             fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY ||
145             fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) {
146         MacroProps approximatelyMacros(macros.formatter1.fMacros);
147         approximatelyMacros.approximately = true;
148         // Use in-place construction because NumberFormatterImpl has internal self-pointers
149         fApproximatelyFormatter.~NumberFormatterImpl();
150         new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status);
151     }
152 
153     // TODO: Get locale from PluralRules instead?
154     fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
155     if (U_FAILURE(status)) { return; }
156 }
157 
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const158 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
159     if (U_FAILURE(status)) {
160         return;
161     }
162 
163     MicroProps micros1;
164     MicroProps micros2;
165     formatterImpl1.preProcess(data.quantity1, micros1, status);
166     if (fSameFormatters) {
167         formatterImpl1.preProcess(data.quantity2, micros2, status);
168     } else {
169         formatterImpl2.preProcess(data.quantity2, micros2, status);
170     }
171     if (U_FAILURE(status)) {
172         return;
173     }
174 
175     // If any of the affixes are different, an identity is not possible
176     // and we must use formatRange().
177     // TODO: Write this as MicroProps operator==() ?
178     // TODO: Avoid the redundancy of these equality operations with the
179     // ones in formatRange?
180     if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
181             || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
182             || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
183         formatRange(data, micros1, micros2, status);
184         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
185         return;
186     }
187 
188     // Check for identity
189     if (equalBeforeRounding) {
190         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
191     } else if (data.quantity1 == data.quantity2) {
192         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
193     } else {
194         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
195     }
196 
197     switch (identity2d(fIdentityFallback, data.identityResult)) {
198         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
199                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
200         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
201                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
202         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
203                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
204         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
205                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
206         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
207                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
208         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
209                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
210             formatRange(data, micros1, micros2, status);
211             break;
212 
213         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
214                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
215         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
216                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
217         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
218                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
219             formatApproximately(data, micros1, micros2, status);
220             break;
221 
222         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
223                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
224         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
225                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
226         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
227                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
228             formatSingleValue(data, micros1, micros2, status);
229             break;
230 
231         default:
232             UPRV_UNREACHABLE_EXIT;
233     }
234 }
235 
236 
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const237 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
238                                                  MicroProps& micros1, MicroProps& micros2,
239                                                  UErrorCode& status) const {
240     if (U_FAILURE(status)) { return; }
241     if (fSameFormatters) {
242         int32_t length = NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, data.getStringRef(), 0, status);
243         NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
244     } else {
245         formatRange(data, micros1, micros2, status);
246     }
247 }
248 
249 
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const250 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
251                                                     MicroProps& micros1, MicroProps& micros2,
252                                                     UErrorCode& status) const {
253     if (U_FAILURE(status)) { return; }
254     if (fSameFormatters) {
255         // Re-format using the approximately formatter:
256         MicroProps microsAppx;
257         data.quantity1.resetExponent();
258         fApproximatelyFormatter.preProcess(data.quantity1, microsAppx, status);
259         int32_t length = NumberFormatterImpl::writeNumber(microsAppx.simple, data.quantity1, data.getStringRef(), 0, status);
260         length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status);
261         length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status);
262         microsAppx.modOuter->apply(data.getStringRef(), 0, length, status);
263     } else {
264         formatRange(data, micros1, micros2, status);
265     }
266 }
267 
268 
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const269 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
270                                            MicroProps& micros1, MicroProps& micros2,
271                                            UErrorCode& status) const {
272     if (U_FAILURE(status)) { return; }
273 
274     // modInner is always notation (scientific); collapsable in ALL.
275     // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
276     // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
277     // Never collapse an outer mod but not an inner mod.
278     bool collapseOuter, collapseMiddle, collapseInner;
279     switch (fCollapse) {
280         case UNUM_RANGE_COLLAPSE_ALL:
281         case UNUM_RANGE_COLLAPSE_AUTO:
282         case UNUM_RANGE_COLLAPSE_UNIT:
283         {
284             // OUTER MODIFIER
285             collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
286 
287             if (!collapseOuter) {
288                 // Never collapse inner mods if outer mods are not collapsable
289                 collapseMiddle = false;
290                 collapseInner = false;
291                 break;
292             }
293 
294             // MIDDLE MODIFIER
295             collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
296 
297             if (!collapseMiddle) {
298                 // Never collapse inner mods if outer mods are not collapsable
299                 collapseInner = false;
300                 break;
301             }
302 
303             // MIDDLE MODIFIER HEURISTICS
304             // (could disable collapsing of the middle modifier)
305             // The modifiers are equal by this point, so we can look at just one of them.
306             const Modifier* mm = micros1.modMiddle;
307             if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
308                 // Only collapse if the modifier is a unit.
309                 // TODO: Make a better way to check for a unit?
310                 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
311                 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
312                         && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
313                     collapseMiddle = false;
314                 }
315             } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
316                 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
317                 if (mm->getCodePointCount() <= 1) {
318                     collapseMiddle = false;
319                 }
320             }
321 
322             if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
323                 collapseInner = false;
324                 break;
325             }
326 
327             // INNER MODIFIER
328             collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
329 
330             // All done checking for collapsibility.
331             break;
332         }
333 
334         default:
335             collapseOuter = false;
336             collapseMiddle = false;
337             collapseInner = false;
338             break;
339     }
340 
341     FormattedStringBuilder& string = data.getStringRef();
342     int32_t lengthPrefix = 0;
343     int32_t length1 = 0;
344     int32_t lengthInfix = 0;
345     int32_t length2 = 0;
346     int32_t lengthSuffix = 0;
347 
348     // Use #define so that these are evaluated at the call site.
349     #define UPRV_INDEX_0 (lengthPrefix)
350     #define UPRV_INDEX_1 (lengthPrefix + length1)
351     #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
352     #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
353     #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix)
354 
355     int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
356         fRangeFormatter,
357         string,
358         0,
359         &lengthPrefix,
360         &lengthSuffix,
361         kUndefinedField,
362         status);
363     if (U_FAILURE(status)) { return; }
364     lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
365     U_ASSERT(lengthInfix > 0);
366 
367     // SPACING HEURISTIC
368     // Add spacing unless all modifiers are collapsed.
369     // TODO: add API to control this?
370     // TODO: Use a data-driven heuristic like currency spacing?
371     // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
372     {
373         bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
374         bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
375         bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
376         if (repeatInner || repeatMiddle || repeatOuter) {
377             // Add spacing if there is not already spacing
378             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
379                 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
380             }
381             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
382                 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
383             }
384         }
385     }
386 
387     length1 += NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, string, UPRV_INDEX_0, status);
388     // ICU-21684: Write the second number to a temp string to avoid repeated insert operations
389     FormattedStringBuilder tempString;
390     NumberFormatterImpl::writeNumber(micros2.simple, data.quantity2, tempString, 0, status);
391     length2 += string.insert(UPRV_INDEX_2, tempString, status);
392 
393     // TODO: Support padding?
394 
395     if (collapseInner) {
396         const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
397         lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
398         lengthPrefix += mod.getPrefixLength();
399         lengthSuffix -= mod.getPrefixLength();
400     } else {
401         length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
402         length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
403     }
404 
405     if (collapseMiddle) {
406         const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
407         lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
408         lengthPrefix += mod.getPrefixLength();
409         lengthSuffix -= mod.getPrefixLength();
410     } else {
411         length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
412         length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
413     }
414 
415     if (collapseOuter) {
416         const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
417         lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
418         lengthPrefix += mod.getPrefixLength();
419         lengthSuffix -= mod.getPrefixLength();
420     } else {
421         length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
422         length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
423     }
424 
425     // Now that all pieces are added, save the span info.
426     data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status);
427     data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status);
428 }
429 
430 
431 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const432 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
433     Modifier::Parameters parameters;
434     first.getParameters(parameters);
435     if (parameters.obj == nullptr) {
436         // No plural form; return a fallback (e.g., the first)
437         return first;
438     }
439     StandardPlural::Form firstPlural = parameters.plural;
440 
441     second.getParameters(parameters);
442     if (parameters.obj == nullptr) {
443         // No plural form; return a fallback (e.g., the first)
444         return first;
445     }
446     StandardPlural::Form secondPlural = parameters.plural;
447 
448     // Get the required plural form from data
449     StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
450 
451     // Get and return the new Modifier
452     const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
453     U_ASSERT(mod != nullptr);
454     return *mod;
455 }
456 
457 
458 
459 #endif /* #if !UCONFIG_NO_FORMATTING */
460