• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11 
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "uresimp.h"
16 #include "util.h"
17 
18 using namespace icu;
19 using namespace icu::number;
20 using namespace icu::number::impl;
21 
22 namespace {
23 
24 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)25 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
26     return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
27 }
28 
29 
30 struct NumberRangeData {
31     SimpleFormatter rangePattern;
32     SimpleFormatter approximatelyPattern;
33 };
34 
35 class NumberRangeDataSink : public ResourceSink {
36   public:
NumberRangeDataSink(NumberRangeData & data)37     NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
38 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)39     void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
40         ResourceTable miscTable = value.getTable(status);
41         if (U_FAILURE(status)) { return; }
42         for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
43             if (uprv_strcmp(key, "range") == 0) {
44                 if (fData.rangePattern.getArgumentLimit() != 0) {
45                     continue; // have already seen this pattern
46                 }
47                 fData.rangePattern = {value.getUnicodeString(status), status};
48             } else if (uprv_strcmp(key, "approximately") == 0) {
49                 if (fData.approximatelyPattern.getArgumentLimit() != 0) {
50                     continue; // have already seen this pattern
51                 }
52                 fData.approximatelyPattern = {value.getUnicodeString(status), status};
53             }
54         }
55     }
56 
57   private:
58     NumberRangeData& fData;
59 };
60 
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)61 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
62     if (U_FAILURE(status)) { return; }
63     LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
64     if (U_FAILURE(status)) { return; }
65     NumberRangeDataSink sink(data);
66 
67     CharString dataPath;
68     dataPath.append("NumberElements/", -1, status);
69     dataPath.append(nsName, -1, status);
70     dataPath.append("/miscPatterns", -1, status);
71     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
72     if (U_FAILURE(status)) { return; }
73 
74     // TODO: Is it necessary to manually fall back to latn, or does the data sink take care of that?
75 
76     if (data.rangePattern.getArgumentLimit() == 0) {
77         // No data!
78         data.rangePattern = {u"{0}–{1}", status};
79     }
80     if (data.approximatelyPattern.getArgumentLimit() == 0) {
81         // No data!
82         data.approximatelyPattern = {u"~{0}", status};
83     }
84 }
85 
86 class PluralRangesDataSink : public ResourceSink {
87   public:
PluralRangesDataSink(StandardPluralRanges & output)88     PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
89 
put(const char *,ResourceValue & value,UBool,UErrorCode & status)90     void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
91         ResourceArray entriesArray = value.getArray(status);
92         if (U_FAILURE(status)) { return; }
93         fOutput.setCapacity(entriesArray.getSize());
94         for (int i = 0; entriesArray.getValue(i, value); i++) {
95             ResourceArray pluralFormsArray = value.getArray(status);
96             if (U_FAILURE(status)) { return; }
97             pluralFormsArray.getValue(0, value);
98             StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
99             if (U_FAILURE(status)) { return; }
100             pluralFormsArray.getValue(1, value);
101             StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
102             if (U_FAILURE(status)) { return; }
103             pluralFormsArray.getValue(2, value);
104             StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
105             if (U_FAILURE(status)) { return; }
106             fOutput.addPluralRange(first, second, result);
107         }
108     }
109 
110   private:
111     StandardPluralRanges& fOutput;
112 };
113 
getPluralRangesData(const Locale & locale,StandardPluralRanges & output,UErrorCode & status)114 void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
115     if (U_FAILURE(status)) { return; }
116     LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
117     if (U_FAILURE(status)) { return; }
118 
119     CharString dataPath;
120     dataPath.append("locales/", -1, status);
121     dataPath.append(locale.getLanguage(), -1, status);
122     if (U_FAILURE(status)) { return; }
123     int32_t setLen;
124     // Not all languages are covered: fail gracefully
125     UErrorCode internalStatus = U_ZERO_ERROR;
126     const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
127     if (U_FAILURE(internalStatus)) { return; }
128 
129     dataPath.clear();
130     dataPath.append("rules/", -1, status);
131     dataPath.appendInvariantChars(set, setLen, status);
132     if (U_FAILURE(status)) { return; }
133     PluralRangesDataSink sink(output);
134     ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
135     if (U_FAILURE(status)) { return; }
136 }
137 
138 } // namespace
139 
140 
initialize(const Locale & locale,UErrorCode & status)141 void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
142     getPluralRangesData(locale, *this, status);
143 }
144 
addPluralRange(StandardPlural::Form first,StandardPlural::Form second,StandardPlural::Form result)145 void StandardPluralRanges::addPluralRange(
146         StandardPlural::Form first,
147         StandardPlural::Form second,
148         StandardPlural::Form result) {
149     U_ASSERT(fTriplesLen < fTriples.getCapacity());
150     fTriples[fTriplesLen] = {first, second, result};
151     fTriplesLen++;
152 }
153 
setCapacity(int32_t length)154 void StandardPluralRanges::setCapacity(int32_t length) {
155     if (length > fTriples.getCapacity()) {
156         fTriples.resize(length, 0);
157     }
158 }
159 
160 StandardPlural::Form
resolve(StandardPlural::Form first,StandardPlural::Form second) const161 StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
162     for (int32_t i=0; i<fTriplesLen; i++) {
163         const auto& triple = fTriples[i];
164         if (triple.first == first && triple.second == second) {
165             return triple.result;
166         }
167     }
168     // Default fallback
169     return StandardPlural::OTHER;
170 }
171 
172 
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)173 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
174     : formatterImpl1(macros.formatter1.fMacros, status),
175       formatterImpl2(macros.formatter2.fMacros, status),
176       fSameFormatters(macros.singleFormatter),
177       fCollapse(macros.collapse),
178       fIdentityFallback(macros.identityFallback) {
179 
180     // TODO: As of this writing (ICU 63), there is no locale that has different number miscPatterns
181     // based on numbering system.  Therefore, data is loaded only from latn.  If this changes,
182     // this part of the code should be updated to load from the local numbering system.
183     // The numbering system could come from the one specified in the NumberFormatter passed to
184     // numberFormatterBoth() or similar.
185     // See ICU-20144
186 
187     NumberRangeData data;
188     getNumberRangeData(macros.locale.getName(), "latn", data, status);
189     if (U_FAILURE(status)) { return; }
190     fRangeFormatter = data.rangePattern;
191     fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
192 
193     // TODO: Get locale from PluralRules instead?
194     fPluralRanges.initialize(macros.locale, status);
195     if (U_FAILURE(status)) { return; }
196 }
197 
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const198 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
199     if (U_FAILURE(status)) {
200         return;
201     }
202 
203     MicroProps micros1;
204     MicroProps micros2;
205     formatterImpl1.preProcess(data.quantity1, micros1, status);
206     if (fSameFormatters) {
207         formatterImpl1.preProcess(data.quantity2, micros2, status);
208     } else {
209         formatterImpl2.preProcess(data.quantity2, micros2, status);
210     }
211     if (U_FAILURE(status)) {
212         return;
213     }
214 
215     // If any of the affixes are different, an identity is not possible
216     // and we must use formatRange().
217     // TODO: Write this as MicroProps operator==() ?
218     // TODO: Avoid the redundancy of these equality operations with the
219     // ones in formatRange?
220     if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
221             || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
222             || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
223         formatRange(data, micros1, micros2, status);
224         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
225         return;
226     }
227 
228     // Check for identity
229     if (equalBeforeRounding) {
230         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
231     } else if (data.quantity1 == data.quantity2) {
232         data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
233     } else {
234         data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
235     }
236 
237     switch (identity2d(fIdentityFallback, data.identityResult)) {
238         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
239                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
240         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
241                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
242         case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
243                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
244         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
245                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
246         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
247                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
248         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
249                         UNUM_IDENTITY_RESULT_NOT_EQUAL):
250             formatRange(data, micros1, micros2, status);
251             break;
252 
253         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
254                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
255         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
256                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
257         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
258                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
259             formatApproximately(data, micros1, micros2, status);
260             break;
261 
262         case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
263                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
264         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
265                         UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
266         case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
267                         UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
268             formatSingleValue(data, micros1, micros2, status);
269             break;
270 
271         default:
272             U_ASSERT(false);
273             break;
274     }
275 }
276 
277 
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const278 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
279                                                  MicroProps& micros1, MicroProps& micros2,
280                                                  UErrorCode& status) const {
281     if (U_FAILURE(status)) { return; }
282     if (fSameFormatters) {
283         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status);
284         NumberFormatterImpl::writeAffixes(micros1, data.string, 0, length, status);
285     } else {
286         formatRange(data, micros1, micros2, status);
287     }
288 }
289 
290 
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const291 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
292                                                     MicroProps& micros1, MicroProps& micros2,
293                                                     UErrorCode& status) const {
294     if (U_FAILURE(status)) { return; }
295     if (fSameFormatters) {
296         int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status);
297         // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
298         length += micros1.modInner->apply(data.string, 0, length, status);
299         length += micros1.modMiddle->apply(data.string, 0, length, status);
300         length += fApproximatelyModifier.apply(data.string, 0, length, status);
301         micros1.modOuter->apply(data.string, 0, length, status);
302     } else {
303         formatRange(data, micros1, micros2, status);
304     }
305 }
306 
307 
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const308 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
309                                            MicroProps& micros1, MicroProps& micros2,
310                                            UErrorCode& status) const {
311     if (U_FAILURE(status)) { return; }
312 
313     // modInner is always notation (scientific); collapsable in ALL.
314     // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
315     // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
316     // Never collapse an outer mod but not an inner mod.
317     bool collapseOuter, collapseMiddle, collapseInner;
318     switch (fCollapse) {
319         case UNUM_RANGE_COLLAPSE_ALL:
320         case UNUM_RANGE_COLLAPSE_AUTO:
321         case UNUM_RANGE_COLLAPSE_UNIT:
322         {
323             // OUTER MODIFIER
324             collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
325 
326             if (!collapseOuter) {
327                 // Never collapse inner mods if outer mods are not collapsable
328                 collapseMiddle = false;
329                 collapseInner = false;
330                 break;
331             }
332 
333             // MIDDLE MODIFIER
334             collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
335 
336             if (!collapseMiddle) {
337                 // Never collapse inner mods if outer mods are not collapsable
338                 collapseInner = false;
339                 break;
340             }
341 
342             // MIDDLE MODIFIER HEURISTICS
343             // (could disable collapsing of the middle modifier)
344             // The modifiers are equal by this point, so we can look at just one of them.
345             const Modifier* mm = micros1.modMiddle;
346             if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
347                 // Only collapse if the modifier is a unit.
348                 // TODO: Make a better way to check for a unit?
349                 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
350                 if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
351                     collapseMiddle = false;
352                 }
353             } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
354                 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
355                 if (mm->getCodePointCount() <= 1) {
356                     collapseMiddle = false;
357                 }
358             }
359 
360             if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
361                 collapseInner = false;
362                 break;
363             }
364 
365             // INNER MODIFIER
366             collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
367 
368             // All done checking for collapsability.
369             break;
370         }
371 
372         default:
373             collapseOuter = false;
374             collapseMiddle = false;
375             collapseInner = false;
376             break;
377     }
378 
379     NumberStringBuilder& string = data.string;
380     int32_t lengthPrefix = 0;
381     int32_t length1 = 0;
382     int32_t lengthInfix = 0;
383     int32_t length2 = 0;
384     int32_t lengthSuffix = 0;
385 
386     // Use #define so that these are evaluated at the call site.
387     #define UPRV_INDEX_0 (lengthPrefix)
388     #define UPRV_INDEX_1 (lengthPrefix + length1)
389     #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
390     #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
391 
392     int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
393         fRangeFormatter,
394         string,
395         0,
396         &lengthPrefix,
397         &lengthSuffix,
398         UNUM_FIELD_COUNT,
399         status);
400     if (U_FAILURE(status)) { return; }
401     lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
402     U_ASSERT(lengthInfix > 0);
403 
404     // SPACING HEURISTIC
405     // Add spacing unless all modifiers are collapsed.
406     // TODO: add API to control this?
407     // TODO: Use a data-driven heuristic like currency spacing?
408     // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
409     {
410         bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
411         bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
412         bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
413         if (repeatInner || repeatMiddle || repeatOuter) {
414             // Add spacing if there is not already spacing
415             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
416                 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status);
417             }
418             if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
419                 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status);
420             }
421         }
422     }
423 
424     length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
425     length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
426 
427     // TODO: Support padding?
428 
429     if (collapseInner) {
430         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
431         const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
432         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
433     } else {
434         length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
435         length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
436     }
437 
438     if (collapseMiddle) {
439         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
440         const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
441         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
442     } else {
443         length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
444         length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
445     }
446 
447     if (collapseOuter) {
448         // Note: this is actually a mix of prefix and suffix, but adding to infix length works
449         const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
450         lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
451     } else {
452         length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
453         length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
454     }
455 }
456 
457 
458 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const459 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
460     Modifier::Parameters parameters;
461     first.getParameters(parameters);
462     if (parameters.obj == nullptr) {
463         // No plural form; return a fallback (e.g., the first)
464         return first;
465     }
466     StandardPlural::Form firstPlural = parameters.plural;
467 
468     second.getParameters(parameters);
469     if (parameters.obj == nullptr) {
470         // No plural form; return a fallback (e.g., the first)
471         return first;
472     }
473     StandardPlural::Form secondPlural = parameters.plural;
474 
475     // Get the required plural form from data
476     StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
477 
478     // Get and return the new Modifier
479     const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
480     U_ASSERT(mod != nullptr);
481     return *mod;
482 }
483 
484 
485 
486 #endif /* #if !UCONFIG_NO_FORMATTING */
487