1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
9 // Helpful in toString methods and elsewhere.
10 #define UNISTR_FROM_STRING_EXPLICIT
11
12 #include "unicode/numberrangeformatter.h"
13 #include "numrange_impl.h"
14 #include "patternprops.h"
15 #include "pluralranges.h"
16 #include "uresimp.h"
17 #include "util.h"
18
19 using namespace icu;
20 using namespace icu::number;
21 using namespace icu::number::impl;
22
23 namespace {
24
25 // Helper function for 2-dimensional switch statement
identity2d(UNumberRangeIdentityFallback a,UNumberRangeIdentityResult b)26 constexpr int8_t identity2d(UNumberRangeIdentityFallback a, UNumberRangeIdentityResult b) {
27 return static_cast<int8_t>(a) | (static_cast<int8_t>(b) << 4);
28 }
29
30
31 struct NumberRangeData {
32 SimpleFormatter rangePattern;
33 // Note: approximatelyPattern is unused since ICU 69.
34 // SimpleFormatter approximatelyPattern;
35 };
36
37 class NumberRangeDataSink : public ResourceSink {
38 public:
NumberRangeDataSink(NumberRangeData & data)39 NumberRangeDataSink(NumberRangeData& data) : fData(data) {}
40
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)41 void put(const char* key, ResourceValue& value, UBool /*noFallback*/, UErrorCode& status) override {
42 ResourceTable miscTable = value.getTable(status);
43 if (U_FAILURE(status)) { return; }
44 for (int i = 0; miscTable.getKeyAndValue(i, key, value); i++) {
45 if (uprv_strcmp(key, "range") == 0) {
46 if (hasRangeData()) {
47 continue; // have already seen this pattern
48 }
49 fData.rangePattern = {value.getUnicodeString(status), status};
50 }
51 /*
52 // Note: approximatelyPattern is unused since ICU 69.
53 else if (uprv_strcmp(key, "approximately") == 0) {
54 if (hasApproxData()) {
55 continue; // have already seen this pattern
56 }
57 fData.approximatelyPattern = {value.getUnicodeString(status), status};
58 }
59 */
60 }
61 }
62
hasRangeData()63 bool hasRangeData() {
64 return fData.rangePattern.getArgumentLimit() != 0;
65 }
66
67 /*
68 // Note: approximatelyPattern is unused since ICU 69.
69 bool hasApproxData() {
70 return fData.approximatelyPattern.getArgumentLimit() != 0;
71 }
72 */
73
isComplete()74 bool isComplete() {
75 return hasRangeData() /* && hasApproxData() */;
76 }
77
fillInDefaults(UErrorCode & status)78 void fillInDefaults(UErrorCode& status) {
79 if (!hasRangeData()) {
80 fData.rangePattern = {u"{0}–{1}", status};
81 }
82 /*
83 if (!hasApproxData()) {
84 fData.approximatelyPattern = {u"~{0}", status};
85 }
86 */
87 }
88
89 private:
90 NumberRangeData& fData;
91 };
92
getNumberRangeData(const char * localeName,const char * nsName,NumberRangeData & data,UErrorCode & status)93 void getNumberRangeData(const char* localeName, const char* nsName, NumberRangeData& data, UErrorCode& status) {
94 if (U_FAILURE(status)) { return; }
95 LocalUResourceBundlePointer rb(ures_open(nullptr, localeName, &status));
96 if (U_FAILURE(status)) { return; }
97 NumberRangeDataSink sink(data);
98
99 CharString dataPath;
100 dataPath.append("NumberElements/", -1, status);
101 dataPath.append(nsName, -1, status);
102 dataPath.append("/miscPatterns", -1, status);
103 if (U_FAILURE(status)) { return; }
104
105 UErrorCode localStatus = U_ZERO_ERROR;
106 ures_getAllItemsWithFallback(rb.getAlias(), dataPath.data(), sink, localStatus);
107 if (U_FAILURE(localStatus) && localStatus != U_MISSING_RESOURCE_ERROR) {
108 status = localStatus;
109 return;
110 }
111
112 // Fall back to latn if necessary
113 if (!sink.isComplete()) {
114 ures_getAllItemsWithFallback(rb.getAlias(), "NumberElements/latn/miscPatterns", sink, status);
115 }
116
117 sink.fillInDefaults(status);
118 }
119
120 } // namespace
121
122
123
NumberRangeFormatterImpl(const RangeMacroProps & macros,UErrorCode & status)124 NumberRangeFormatterImpl::NumberRangeFormatterImpl(const RangeMacroProps& macros, UErrorCode& status)
125 : formatterImpl1(macros.formatter1.fMacros, status),
126 formatterImpl2(macros.formatter2.fMacros, status),
127 fSameFormatters(macros.singleFormatter),
128 fCollapse(macros.collapse),
129 fIdentityFallback(macros.identityFallback),
130 fApproximatelyFormatter(status) {
131
132 const char* nsName = formatterImpl1.getRawMicroProps().nsName;
133 if (!fSameFormatters && uprv_strcmp(nsName, formatterImpl2.getRawMicroProps().nsName) != 0) {
134 status = U_ILLEGAL_ARGUMENT_ERROR;
135 return;
136 }
137
138 NumberRangeData data;
139 getNumberRangeData(macros.locale.getName(), nsName, data, status);
140 if (U_FAILURE(status)) { return; }
141 fRangeFormatter = data.rangePattern;
142
143 if (fSameFormatters && (
144 fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY ||
145 fIdentityFallback == UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE)) {
146 MacroProps approximatelyMacros(macros.formatter1.fMacros);
147 approximatelyMacros.approximately = true;
148 // Use in-place construction because NumberFormatterImpl has internal self-pointers
149 fApproximatelyFormatter.~NumberFormatterImpl();
150 new (&fApproximatelyFormatter) NumberFormatterImpl(approximatelyMacros, status);
151 }
152
153 // TODO: Get locale from PluralRules instead?
154 fPluralRanges = StandardPluralRanges::forLocale(macros.locale, status);
155 if (U_FAILURE(status)) { return; }
156 }
157
format(UFormattedNumberRangeData & data,bool equalBeforeRounding,UErrorCode & status) const158 void NumberRangeFormatterImpl::format(UFormattedNumberRangeData& data, bool equalBeforeRounding, UErrorCode& status) const {
159 if (U_FAILURE(status)) {
160 return;
161 }
162
163 MicroProps micros1;
164 MicroProps micros2;
165 formatterImpl1.preProcess(data.quantity1, micros1, status);
166 if (fSameFormatters) {
167 formatterImpl1.preProcess(data.quantity2, micros2, status);
168 } else {
169 formatterImpl2.preProcess(data.quantity2, micros2, status);
170 }
171 if (U_FAILURE(status)) {
172 return;
173 }
174
175 // If any of the affixes are different, an identity is not possible
176 // and we must use formatRange().
177 // TODO: Write this as MicroProps operator==() ?
178 // TODO: Avoid the redundancy of these equality operations with the
179 // ones in formatRange?
180 if (!micros1.modInner->semanticallyEquivalent(*micros2.modInner)
181 || !micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle)
182 || !micros1.modOuter->semanticallyEquivalent(*micros2.modOuter)) {
183 formatRange(data, micros1, micros2, status);
184 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
185 return;
186 }
187
188 // Check for identity
189 if (equalBeforeRounding) {
190 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING;
191 } else if (data.quantity1 == data.quantity2) {
192 data.identityResult = UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING;
193 } else {
194 data.identityResult = UNUM_IDENTITY_RESULT_NOT_EQUAL;
195 }
196
197 switch (identity2d(fIdentityFallback, data.identityResult)) {
198 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
199 UNUM_IDENTITY_RESULT_NOT_EQUAL):
200 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
201 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
202 case identity2d(UNUM_IDENTITY_FALLBACK_RANGE,
203 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
204 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
205 UNUM_IDENTITY_RESULT_NOT_EQUAL):
206 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
207 UNUM_IDENTITY_RESULT_NOT_EQUAL):
208 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
209 UNUM_IDENTITY_RESULT_NOT_EQUAL):
210 formatRange(data, micros1, micros2, status);
211 break;
212
213 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
214 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
215 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY,
216 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
217 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
218 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
219 formatApproximately(data, micros1, micros2, status);
220 break;
221
222 case identity2d(UNUM_IDENTITY_FALLBACK_APPROXIMATELY_OR_SINGLE_VALUE,
223 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
224 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
225 UNUM_IDENTITY_RESULT_EQUAL_AFTER_ROUNDING):
226 case identity2d(UNUM_IDENTITY_FALLBACK_SINGLE_VALUE,
227 UNUM_IDENTITY_RESULT_EQUAL_BEFORE_ROUNDING):
228 formatSingleValue(data, micros1, micros2, status);
229 break;
230
231 default:
232 UPRV_UNREACHABLE_EXIT;
233 }
234 }
235
236
formatSingleValue(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const237 void NumberRangeFormatterImpl::formatSingleValue(UFormattedNumberRangeData& data,
238 MicroProps& micros1, MicroProps& micros2,
239 UErrorCode& status) const {
240 if (U_FAILURE(status)) { return; }
241 if (fSameFormatters) {
242 int32_t length = NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, data.getStringRef(), 0, status);
243 NumberFormatterImpl::writeAffixes(micros1, data.getStringRef(), 0, length, status);
244 } else {
245 formatRange(data, micros1, micros2, status);
246 }
247 }
248
249
formatApproximately(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const250 void NumberRangeFormatterImpl::formatApproximately (UFormattedNumberRangeData& data,
251 MicroProps& micros1, MicroProps& micros2,
252 UErrorCode& status) const {
253 if (U_FAILURE(status)) { return; }
254 if (fSameFormatters) {
255 // Re-format using the approximately formatter:
256 MicroProps microsAppx;
257 data.quantity1.resetExponent();
258 fApproximatelyFormatter.preProcess(data.quantity1, microsAppx, status);
259 int32_t length = NumberFormatterImpl::writeNumber(microsAppx.simple, data.quantity1, data.getStringRef(), 0, status);
260 length += microsAppx.modInner->apply(data.getStringRef(), 0, length, status);
261 length += microsAppx.modMiddle->apply(data.getStringRef(), 0, length, status);
262 microsAppx.modOuter->apply(data.getStringRef(), 0, length, status);
263 } else {
264 formatRange(data, micros1, micros2, status);
265 }
266 }
267
268
formatRange(UFormattedNumberRangeData & data,MicroProps & micros1,MicroProps & micros2,UErrorCode & status) const269 void NumberRangeFormatterImpl::formatRange(UFormattedNumberRangeData& data,
270 MicroProps& micros1, MicroProps& micros2,
271 UErrorCode& status) const {
272 if (U_FAILURE(status)) { return; }
273
274 // modInner is always notation (scientific); collapsable in ALL.
275 // modOuter is always units; collapsable in ALL, AUTO, and UNIT.
276 // modMiddle could be either; collapsable in ALL and sometimes AUTO and UNIT.
277 // Never collapse an outer mod but not an inner mod.
278 bool collapseOuter, collapseMiddle, collapseInner;
279 switch (fCollapse) {
280 case UNUM_RANGE_COLLAPSE_ALL:
281 case UNUM_RANGE_COLLAPSE_AUTO:
282 case UNUM_RANGE_COLLAPSE_UNIT:
283 {
284 // OUTER MODIFIER
285 collapseOuter = micros1.modOuter->semanticallyEquivalent(*micros2.modOuter);
286
287 if (!collapseOuter) {
288 // Never collapse inner mods if outer mods are not collapsable
289 collapseMiddle = false;
290 collapseInner = false;
291 break;
292 }
293
294 // MIDDLE MODIFIER
295 collapseMiddle = micros1.modMiddle->semanticallyEquivalent(*micros2.modMiddle);
296
297 if (!collapseMiddle) {
298 // Never collapse inner mods if outer mods are not collapsable
299 collapseInner = false;
300 break;
301 }
302
303 // MIDDLE MODIFIER HEURISTICS
304 // (could disable collapsing of the middle modifier)
305 // The modifiers are equal by this point, so we can look at just one of them.
306 const Modifier* mm = micros1.modMiddle;
307 if (fCollapse == UNUM_RANGE_COLLAPSE_UNIT) {
308 // Only collapse if the modifier is a unit.
309 // TODO: Make a better way to check for a unit?
310 // TODO: Handle case where the modifier has both notation and unit (compact currency)?
311 if (!mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD})
312 && !mm->containsField({UFIELD_CATEGORY_NUMBER, UNUM_PERCENT_FIELD})) {
313 collapseMiddle = false;
314 }
315 } else if (fCollapse == UNUM_RANGE_COLLAPSE_AUTO) {
316 // Heuristic as of ICU 63: collapse only if the modifier is more than one code point.
317 if (mm->getCodePointCount() <= 1) {
318 collapseMiddle = false;
319 }
320 }
321
322 if (!collapseMiddle || fCollapse != UNUM_RANGE_COLLAPSE_ALL) {
323 collapseInner = false;
324 break;
325 }
326
327 // INNER MODIFIER
328 collapseInner = micros1.modInner->semanticallyEquivalent(*micros2.modInner);
329
330 // All done checking for collapsibility.
331 break;
332 }
333
334 default:
335 collapseOuter = false;
336 collapseMiddle = false;
337 collapseInner = false;
338 break;
339 }
340
341 FormattedStringBuilder& string = data.getStringRef();
342 int32_t lengthPrefix = 0;
343 int32_t length1 = 0;
344 int32_t lengthInfix = 0;
345 int32_t length2 = 0;
346 int32_t lengthSuffix = 0;
347
348 // Use #define so that these are evaluated at the call site.
349 #define UPRV_INDEX_0 (lengthPrefix)
350 #define UPRV_INDEX_1 (lengthPrefix + length1)
351 #define UPRV_INDEX_2 (lengthPrefix + length1 + lengthInfix)
352 #define UPRV_INDEX_3 (lengthPrefix + length1 + lengthInfix + length2)
353 #define UPRV_INDEX_4 (lengthPrefix + length1 + lengthInfix + length2 + lengthSuffix)
354
355 int32_t lengthRange = SimpleModifier::formatTwoArgPattern(
356 fRangeFormatter,
357 string,
358 0,
359 &lengthPrefix,
360 &lengthSuffix,
361 kUndefinedField,
362 status);
363 if (U_FAILURE(status)) { return; }
364 lengthInfix = lengthRange - lengthPrefix - lengthSuffix;
365 U_ASSERT(lengthInfix > 0);
366
367 // SPACING HEURISTIC
368 // Add spacing unless all modifiers are collapsed.
369 // TODO: add API to control this?
370 // TODO: Use a data-driven heuristic like currency spacing?
371 // TODO: Use Unicode [:whitespace:] instead of PatternProps whitespace? (consider speed implications)
372 {
373 bool repeatInner = !collapseInner && micros1.modInner->getCodePointCount() > 0;
374 bool repeatMiddle = !collapseMiddle && micros1.modMiddle->getCodePointCount() > 0;
375 bool repeatOuter = !collapseOuter && micros1.modOuter->getCodePointCount() > 0;
376 if (repeatInner || repeatMiddle || repeatOuter) {
377 // Add spacing if there is not already spacing
378 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_1))) {
379 lengthInfix += string.insertCodePoint(UPRV_INDEX_1, u'\u0020', kUndefinedField, status);
380 }
381 if (!PatternProps::isWhiteSpace(string.charAt(UPRV_INDEX_2 - 1))) {
382 lengthInfix += string.insertCodePoint(UPRV_INDEX_2, u'\u0020', kUndefinedField, status);
383 }
384 }
385 }
386
387 length1 += NumberFormatterImpl::writeNumber(micros1.simple, data.quantity1, string, UPRV_INDEX_0, status);
388 // ICU-21684: Write the second number to a temp string to avoid repeated insert operations
389 FormattedStringBuilder tempString;
390 NumberFormatterImpl::writeNumber(micros2.simple, data.quantity2, tempString, 0, status);
391 length2 += string.insert(UPRV_INDEX_2, tempString, status);
392
393 // TODO: Support padding?
394
395 if (collapseInner) {
396 const Modifier& mod = resolveModifierPlurals(*micros1.modInner, *micros2.modInner);
397 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
398 lengthPrefix += mod.getPrefixLength();
399 lengthSuffix -= mod.getPrefixLength();
400 } else {
401 length1 += micros1.modInner->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
402 length2 += micros2.modInner->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
403 }
404
405 if (collapseMiddle) {
406 const Modifier& mod = resolveModifierPlurals(*micros1.modMiddle, *micros2.modMiddle);
407 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
408 lengthPrefix += mod.getPrefixLength();
409 lengthSuffix -= mod.getPrefixLength();
410 } else {
411 length1 += micros1.modMiddle->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
412 length2 += micros2.modMiddle->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
413 }
414
415 if (collapseOuter) {
416 const Modifier& mod = resolveModifierPlurals(*micros1.modOuter, *micros2.modOuter);
417 lengthSuffix += mod.apply(string, UPRV_INDEX_0, UPRV_INDEX_4, status);
418 lengthPrefix += mod.getPrefixLength();
419 lengthSuffix -= mod.getPrefixLength();
420 } else {
421 length1 += micros1.modOuter->apply(string, UPRV_INDEX_0, UPRV_INDEX_1, status);
422 length2 += micros2.modOuter->apply(string, UPRV_INDEX_2, UPRV_INDEX_4, status);
423 }
424
425 // Now that all pieces are added, save the span info.
426 data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 0, UPRV_INDEX_0, length1, status);
427 data.appendSpanInfo(UFIELD_CATEGORY_NUMBER_RANGE_SPAN, 1, UPRV_INDEX_2, length2, status);
428 }
429
430
431 const Modifier&
resolveModifierPlurals(const Modifier & first,const Modifier & second) const432 NumberRangeFormatterImpl::resolveModifierPlurals(const Modifier& first, const Modifier& second) const {
433 Modifier::Parameters parameters;
434 first.getParameters(parameters);
435 if (parameters.obj == nullptr) {
436 // No plural form; return a fallback (e.g., the first)
437 return first;
438 }
439 StandardPlural::Form firstPlural = parameters.plural;
440
441 second.getParameters(parameters);
442 if (parameters.obj == nullptr) {
443 // No plural form; return a fallback (e.g., the first)
444 return first;
445 }
446 StandardPlural::Form secondPlural = parameters.plural;
447
448 // Get the required plural form from data
449 StandardPlural::Form resultPlural = fPluralRanges.resolve(firstPlural, secondPlural);
450
451 // Get and return the new Modifier
452 const Modifier* mod = parameters.obj->getModifier(parameters.signum, resultPlural);
453 U_ASSERT(mod != nullptr);
454 return *mod;
455 }
456
457
458
459 #endif /* #if !UCONFIG_NO_FORMATTING */
460