1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "uresimp.h"
16 #include "util.h"
17
18 using namespace icu;
19 using namespace icu::number;
20 using namespace icu::number::impl;
21
22 namespace {
23
24 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)25 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
26 return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
27 }
28
29
30 struct NumberRangeData {
31 SimpleFormatter rangePattern;
32 SimpleFormatter approximatelyPattern;
33 };
34
35 class NumberRangeDataSink : public ResourceSink {
36 public:
NumberRangeDataSink(NumberRangeData & data)37 NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
38
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)39 void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
40 ResourceTable miscTable = value.getTable(status);
41 if (U_FAILURE(status)) { return; }
42 for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
43 if (uprv_strcmp(key, "range") == 0) {
44 if (fData.rangePattern.getArgumentLimit() != 0) {
45 continue; // have already seen this pattern
46 }
47 fData.rangePattern = {value.getUnicodeString(status), status};
48 } else if (uprv_strcmp(key, "approximately") == 0) {
49 if (fData.approximatelyPattern.getArgumentLimit() != 0) {
50 continue; // have already seen this pattern
51 }
52 fData.approximatelyPattern = {value.getUnicodeString(status), status};
53 }
54 }
55 }
56
57 private:
58 NumberRangeData& fData;
59 };
60
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)61 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
62 if (U_FAILURE(status)) { return; }
63 LocalUResourceBundlePointer rb(ures_open(NULL, localeName, &status));
64 if (U_FAILURE(status)) { return; }
65 NumberRangeDataSink sink(data);
66
67 CharString dataPath;
68 dataPath.append("NumberElements/", -1, status);
69 dataPath.append(nsName, -1, status);
70 dataPath.append("/miscPatterns", -1, status);
71 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
72 if (U_FAILURE(status)) { return; }
73
74 // TODO: Is it necessary to manually fall back to latn, or does the data sink take care of that?
75
76 if (data.rangePattern.getArgumentLimit() == 0) {
77 // No data!
78 data.rangePattern = {u"{0}–{1}", status};
79 }
80 if (data.approximatelyPattern.getArgumentLimit() == 0) {
81 // No data!
82 data.approximatelyPattern = {u"~{0}", status};
83 }
84 }
85
86 class PluralRangesDataSink : public ResourceSink {
87 public:
PluralRangesDataSink(StandardPluralRanges & output)88 PluralRangesDataSink(StandardPluralRanges& output) : fOutput(output) {}
89
put(const char *,ResourceValue & value,UBool,UErrorCode & status)90 void put(const char* /*key*/, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) U_OVERRIDE {
91 ResourceArray entriesArray = value.getArray(status);
92 if (U_FAILURE(status)) { return; }
93 fOutput.setCapacity(entriesArray.getSize());
94 for (int i = 0; entriesArray.getValue(i, value); i++) {
95 ResourceArray pluralFormsArray = value.getArray(status);
96 if (U_FAILURE(status)) { return; }
97 pluralFormsArray.getValue(0, value);
98 StandardPlural::Form first = StandardPlural::fromString(value.getUnicodeString(status), status);
99 if (U_FAILURE(status)) { return; }
100 pluralFormsArray.getValue(1, value);
101 StandardPlural::Form second = StandardPlural::fromString(value.getUnicodeString(status), status);
102 if (U_FAILURE(status)) { return; }
103 pluralFormsArray.getValue(2, value);
104 StandardPlural::Form result = StandardPlural::fromString(value.getUnicodeString(status), status);
105 if (U_FAILURE(status)) { return; }
106 fOutput.addPluralRange(first, second, result);
107 }
108 }
109
110 private:
111 StandardPluralRanges& fOutput;
112 };
113
getPluralRangesData(const Locale & locale,StandardPluralRanges & output,UErrorCode & status)114 void getPluralRangesData(const Locale& locale, StandardPluralRanges& output, UErrorCode& status) {
115 if (U_FAILURE(status)) { return; }
116 LocalUResourceBundlePointer rb(ures_openDirect(nullptr, "pluralRanges", &status));
117 if (U_FAILURE(status)) { return; }
118
119 CharString dataPath;
120 dataPath.append("locales/", -1, status);
121 dataPath.append(locale.getLanguage(), -1, status);
122 if (U_FAILURE(status)) { return; }
123 int32_t setLen;
124 // Not all languages are covered: fail gracefully
125 UErrorCode internalStatus = U_ZERO_ERROR;
126 const UChar* set = ures_getStringByKeyWithFallback(rb.getAlias(), dataPath.data(), &setLen, &internalStatus);
127 if (U_FAILURE(internalStatus)) { return; }
128
129 dataPath.clear();
130 dataPath.append("rules/", -1, status);
131 dataPath.appendInvariantChars(set, setLen, status);
132 if (U_FAILURE(status)) { return; }
133 PluralRangesDataSink sink(output);
134 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, status);
135 if (U_FAILURE(status)) { return; }
136 }
137
138 } // namespace
139
140
initialize(const Locale & locale,UErrorCode & status)141 void StandardPluralRanges::initialize(const Locale& locale, UErrorCode& status) {
142 getPluralRangesData(locale, *this, status);
143 }
144
addPluralRange(StandardPlural::Form first,StandardPlural::Form second,StandardPlural::Form result)145 void StandardPluralRanges::addPluralRange(
146 StandardPlural::Form first,
147 StandardPlural::Form second,
148 StandardPlural::Form result) {
149 U_ASSERT(fTriplesLen < fTriples.getCapacity());
150 fTriples[fTriplesLen] = {first, second, result};
151 fTriplesLen++;
152 }
153
setCapacity(int32_t length)154 void StandardPluralRanges::setCapacity(int32_t length) {
155 if (length > fTriples.getCapacity()) {
156 fTriples.resize(length, 0);
157 }
158 }
159
160 StandardPlural::Form
resolve(StandardPlural::Form first,StandardPlural::Form second) const161 StandardPluralRanges::resolve(StandardPlural::Form first, StandardPlural::Form second) const {
162 for (int32_t i=0; i<fTriplesLen; i++) {
163 const auto& triple = fTriples[i];
164 if (triple.first == first && triple.second == second) {
165 return triple.result;
166 }
167 }
168 // Default fallback
169 return StandardPlural::OTHER;
170 }
171
172
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)173 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
174 : formatterImpl1(macros.formatter1.fMacros, status),
175 formatterImpl2(macros.formatter2.fMacros, status),
176 fSameFormatters(macros.singleFormatter),
177 fCollapse(macros.collapse),
178 fIdentityFallback(macros.identityFallback) {
179
180 // TODO: As of this writing (ICU 63), there is no locale that has different number miscPatterns
181 // based on numbering system. Therefore, data is loaded only from latn. If this changes,
182 // this part of the code should be updated to load from the local numbering system.
183 // The numbering system could come from the one specified in the NumberFormatter passed to
184 // numberFormatterBoth() or similar.
185 // See ICU-20144
186
187 NumberRangeData data;
188 getNumberRangeData(macros.locale.getName(), "latn", data, status);
189 if (U_FAILURE(status)) { return; }
190 fRangeFormatter = data.rangePattern;
191 fApproximatelyModifier = {data.approximatelyPattern, UNUM_FIELD_COUNT, false};
192
193 // TODO: Get locale from PluralRules instead?
194 fPluralRanges.initialize(macros.locale, status);
195 if (U_FAILURE(status)) { return; }
196 }
197
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const198 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
199 if (U_FAILURE(status)) {
200 return;
201 }
202
203 MicroProps micros1;
204 MicroProps micros2;
205 formatterImpl1.preProcess(data.quantity1, micros1, status);
206 if (fSameFormatters) {
207 formatterImpl1.preProcess(data.quantity2, micros2, status);
208 } else {
209 formatterImpl2.preProcess(data.quantity2, micros2, status);
210 }
211 if (U_FAILURE(status)) {
212 return;
213 }
214
215 // If any of the affixes are different, an identity is not possible
216 // and we must use formatRange().
217 // TODO: Write this as MicroProps operator==() ?
218 // TODO: Avoid the redundancy of these equality operations with the
219 // ones in formatRange?
220 if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
221 || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
222 || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
223 formatRange(data, micros1, micros2, status);
224 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
225 return;
226 }
227
228 // Check for identity
229 if (equalBeforeRounding) {
230 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
231 } else if (data.quantity1 == data.quantity2) {
232 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
233 } else {
234 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
235 }
236
237 switch (identity2d(fIdentityFallback, data.identityResult)) {
238 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
239 UNUM_IDENTITY_RESULT_NOT_EQUAL):
240 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
241 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
242 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
243 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
244 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
245 UNUM_IDENTITY_RESULT_NOT_EQUAL):
246 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
247 UNUM_IDENTITY_RESULT_NOT_EQUAL):
248 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
249 UNUM_IDENTITY_RESULT_NOT_EQUAL):
250 formatRange(data, micros1, micros2, status);
251 break;
252
253 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
254 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
255 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
256 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
257 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
258 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
259 formatApproximately(data, micros1, micros2, status);
260 break;
261
262 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
263 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
264 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
265 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
266 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
267 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
268 formatSingleValue(data, micros1, micros2, status);
269 break;
270
271 default:
272 U_ASSERT(false);
273 break;
274 }
275 }
276
277
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const278 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
279 MicroProps& micros1, MicroProps& micros2,
280 UErrorCode& status) const {
281 if (U_FAILURE(status)) { return; }
282 if (fSameFormatters) {
283 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status);
284 NumberFormatterImpl::writeAffixes(micros1, data.string, 0, length, status);
285 } else {
286 formatRange(data, micros1, micros2, status);
287 }
288 }
289
290
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const291 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
292 MicroProps& micros1, MicroProps& micros2,
293 UErrorCode& status) const {
294 if (U_FAILURE(status)) { return; }
295 if (fSameFormatters) {
296 int32_t length = NumberFormatterImpl::writeNumber(micros1, data.quantity1, data.string, 0, status);
297 // HEURISTIC: Desired modifier order: inner, middle, approximately, outer.
298 length += micros1.modInner->apply(data.string, 0, length, status);
299 length += micros1.modMiddle->apply(data.string, 0, length, status);
300 length += fApproximatelyModifier.apply(data.string, 0, length, status);
301 micros1.modOuter->apply(data.string, 0, length, status);
302 } else {
303 formatRange(data, micros1, micros2, status);
304 }
305 }
306
307
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const308 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
309 MicroProps& micros1, MicroProps& micros2,
310 UErrorCode& status) const {
311 if (U_FAILURE(status)) { return; }
312
313 // modInner is always notation (scientific); collapsable in ALL.
314 // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
315 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
316 // Never collapse an outer mod but not an inner mod.
317 bool collapseOuter, collapseMiddle, collapseInner;
318 switch (fCollapse) {
319 case UNUM_RANGE_COLLAPSE_ALL:
320 case UNUM_RANGE_COLLAPSE_AUTO:
321 case UNUM_RANGE_COLLAPSE_UNIT:
322 {
323 // OUTER MODIFIER
324 collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
325
326 if (!collapseOuter) {
327 // Never collapse inner mods if outer mods are not collapsable
328 collapseMiddle = false;
329 collapseInner = false;
330 break;
331 }
332
333 // MIDDLE MODIFIER
334 collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
335
336 if (!collapseMiddle) {
337 // Never collapse inner mods if outer mods are not collapsable
338 collapseInner = false;
339 break;
340 }
341
342 // MIDDLE MODIFIER HEURISTICS
343 // (could disable collapsing of the middle modifier)
344 // The modifiers are equal by this point, so we can look at just one of them.
345 const Modifier* mm = micros1.modMiddle;
346 if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
347 // Only collapse if the modifier is a unit.
348 // TODO: Make a better way to check for a unit?
349 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
350 if (!mm->containsField(UNUM_CURRENCY_FIELD) && !mm->containsField(UNUM_PERCENT_FIELD)) {
351 collapseMiddle = false;
352 }
353 } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
354 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
355 if (mm->getCodePointCount() <= 1) {
356 collapseMiddle = false;
357 }
358 }
359
360 if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
361 collapseInner = false;
362 break;
363 }
364
365 // INNER MODIFIER
366 collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
367
368 // All done checking for collapsability.
369 break;
370 }
371
372 default:
373 collapseOuter = false;
374 collapseMiddle = false;
375 collapseInner = false;
376 break;
377 }
378
379 NumberStringBuilder& string = data.string;
380 int32_t lengthPrefix = 0;
381 int32_t length1 = 0;
382 int32_t lengthInfix = 0;
383 int32_t length2 = 0;
384 int32_t lengthSuffix = 0;
385
386 // Use #define so that these are evaluated at the call site.
387 #define UPRV_INDEX_0 (lengthPrefix)
388 #define UPRV_INDEX_1 (lengthPrefix + length1)
389 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
390 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
391
392 int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
393 fRangeFormatter,
394 string,
395 0,
396 &lengthPrefix,
397 &lengthSuffix,
398 UNUM_FIELD_COUNT,
399 status);
400 if (U_FAILURE(status)) { return; }
401 lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
402 U_ASSERT(lengthInfix > 0);
403
404 // SPACING HEURISTIC
405 // Add spacing unless all modifiers are collapsed.
406 // TODO: add API to control this?
407 // TODO: Use a data-driven heuristic like currency spacing?
408 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
409 {
410 bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
411 bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
412 bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
413 if (repeatInner || repeatMiddle || repeatOuter) {
414 // Add spacing if there is not already spacing
415 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
416 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', UNUM_FIELD_COUNT, status);
417 }
418 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
419 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', UNUM_FIELD_COUNT, status);
420 }
421 }
422 }
423
424 length1 += NumberFormatterImpl::writeNumber(micros1, data.quantity1, string, UPRV_INDEX_0, status);
425 length2 += NumberFormatterImpl::writeNumber(micros2, data.quantity2, string, UPRV_INDEX_2, status);
426
427 // TODO: Support padding?
428
429 if (collapseInner) {
430 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
431 const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
432 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
433 } else {
434 length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
435 length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
436 }
437
438 if (collapseMiddle) {
439 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
440 const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
441 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
442 } else {
443 length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
444 length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
445 }
446
447 if (collapseOuter) {
448 // Note: this is actually a mix of prefix and suffix, but adding to infix length works
449 const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
450 lengthInfix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_3, status);
451 } else {
452 length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
453 length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_3, status);
454 }
455 }
456
457
458 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const459 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
460 Modifier::Parameters parameters;
461 first.getParameters(parameters);
462 if (parameters.obj == nullptr) {
463 // No plural form; return a fallback (e.g., the first)
464 return first;
465 }
466 StandardPlural::Form firstPlural = parameters.plural;
467
468 second.getParameters(parameters);
469 if (parameters.obj == nullptr) {
470 // No plural form; return a fallback (e.g., the first)
471 return first;
472 }
473 StandardPlural::Form secondPlural = parameters.plural;
474
475 // Get the required plural form from data
476 StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
477
478 // Get and return the new Modifier
479 const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
480 U_ASSERT(mod != nullptr);
481 return *mod;
482 }
483
484
485
486 #endif /* #if !UCONFIG_NO_FORMATTING */
487