1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include <cstdlib>
9
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "unicode/plurrule.h"
13 #include "unicode/strenum.h"
14 #include "ureslocs.h"
15 #include "charstr.h"
16 #include "uresimp.h"
17 #include "measunit_impl.h"
18 #include "number_longnames.h"
19 #include "number_microprops.h"
20 #include <algorithm>
21 #include "cstring.h"
22 #include "util.h"
23 #include "sharedpluralrules.h"
24
25 using namespace icu;
26 using namespace icu::number;
27 using namespace icu::number::impl;
28
29 namespace {
30
31 /**
32 * Display Name (this format has no placeholder).
33 *
34 * Used as an index into the LongNameHandler::simpleFormats array. Units
35 * resources cover the normal set of PluralRules keys, as well as `dnam` and
36 * `per` forms.
37 */
38 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
39 /**
40 * "per" form (e.g. "{0} per day" is day's "per" form).
41 *
42 * Used as an index into the LongNameHandler::simpleFormats array. Units
43 * resources cover the normal set of PluralRules keys, as well as `dnam` and
44 * `per` forms.
45 */
46 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
47 /**
48 * Gender of the word, in languages with grammatical gender.
49 */
50 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
51 // Number of keys in the array populated by PluralTableSink.
52 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
53
54 // TODO(icu-units#28): load this list from resources, after creating a "&set"
55 // function for use in ldml2icu rules.
56 const int32_t GENDER_COUNT = 7;
57 const char *gGenders[GENDER_COUNT] = {"animate", "common", "feminine", "inanimate",
58 "masculine", "neuter", "personal"};
59
60 // Converts a UnicodeString to a const char*, either pointing to a string in
61 // gGenders, or pointing to an empty string if an appropriate string was not
62 // found.
getGenderString(UnicodeString uGender,UErrorCode status)63 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
64 if (uGender.length() == 0) {
65 return "";
66 }
67 CharString gender;
68 gender.appendInvariantChars(uGender, status);
69 if (U_FAILURE(status)) {
70 return "";
71 }
72 int32_t first = 0;
73 int32_t last = GENDER_COUNT;
74 while (first < last) {
75 int32_t mid = (first + last) / 2;
76 int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
77 if (cmp == 0) {
78 return gGenders[mid];
79 } else if (cmp > 0) {
80 first = mid + 1;
81 } else if (cmp < 0) {
82 last = mid;
83 }
84 }
85 // We don't return an error in case our gGenders list is incomplete in
86 // production.
87 //
88 // TODO(icu-units#28): a unit test checking all locales' genders are covered
89 // by gGenders? Else load a complete list of genders found in
90 // grammaticalFeatures in an initOnce.
91 return "";
92 }
93
94 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)95 int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
96 // pluralKeyword can also be "dnam", "per", or "gender"
97 switch (*pluralKeyword) {
98 case 'd':
99 if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
100 return DNAM_INDEX;
101 }
102 break;
103 case 'g':
104 if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
105 return GENDER_INDEX;
106 }
107 break;
108 case 'p':
109 if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
110 return PER_INDEX;
111 }
112 break;
113 default:
114 break;
115 }
116 StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
117 return plural;
118 }
119
120 // Selects a string out of the `strings` array which corresponds to the
121 // specified plural form, with fallback to the OTHER form.
122 //
123 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
124 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)125 UnicodeString getWithPlural(
126 const UnicodeString* strings,
127 StandardPlural::Form plural,
128 UErrorCode& status) {
129 UnicodeString result = strings[plural];
130 if (result.isBogus()) {
131 result = strings[StandardPlural::Form::OTHER];
132 }
133 if (result.isBogus()) {
134 // There should always be data in the "other" plural variant.
135 status = U_INTERNAL_PROGRAM_ERROR;
136 }
137 return result;
138 }
139
140 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
141
142 /**
143 * Returns three outputs extracted from pattern.
144 *
145 * @param coreUnit is extracted as per Extract(...) in the spec:
146 * https://unicode.org/reports/tr35/tr35-general.html#compound-units
147 * @param PlaceholderPosition indicates where in the string the placeholder was
148 * found.
149 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
150 * contains the space character (if any) that separated the placeholder from
151 * the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
152 * space character is considered.
153 */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,char16_t & joinerChar)154 void extractCorePattern(const UnicodeString &pattern,
155 UnicodeString &coreUnit,
156 PlaceholderPosition &placeholderPosition,
157 char16_t &joinerChar) {
158 joinerChar = 0;
159 int32_t len = pattern.length();
160 if (pattern.startsWith(u"{0}", 3)) {
161 placeholderPosition = PH_BEGINNING;
162 if (u_isJavaSpaceChar(pattern[3])) {
163 joinerChar = pattern[3];
164 coreUnit.setTo(pattern, 4, len - 4);
165 } else {
166 coreUnit.setTo(pattern, 3, len - 3);
167 }
168 } else if (pattern.endsWith(u"{0}", 3)) {
169 placeholderPosition = PH_END;
170 if (u_isJavaSpaceChar(pattern[len - 4])) {
171 coreUnit.setTo(pattern, 0, len - 4);
172 joinerChar = pattern[len - 4];
173 } else {
174 coreUnit.setTo(pattern, 0, len - 3);
175 }
176 } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
177 placeholderPosition = PH_NONE;
178 coreUnit = pattern;
179 } else {
180 placeholderPosition = PH_MIDDLE;
181 coreUnit = pattern;
182 }
183 }
184
185 //////////////////////////
186 /// BEGIN DATA LOADING ///
187 //////////////////////////
188
189 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
190 // string both in case of unknown gender and in case of unknown unit.
191 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)192 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
193 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
194 if (U_FAILURE(status)) { return {}; }
195
196 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
197 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
198 StringPiece subtypeForResource;
199 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
200 if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
201 subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
202 } else {
203 subtypeForResource = builtinUnit.getSubtype();
204 }
205
206 CharString key;
207 key.append("units/", status);
208 key.append(builtinUnit.getType(), status);
209 key.append("/", status);
210 key.append(subtypeForResource, status);
211 key.append("/gender", status);
212
213 UErrorCode localStatus = status;
214 int32_t resultLen = 0;
215 const char16_t *result =
216 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
217 if (U_SUCCESS(localStatus)) {
218 status = localStatus;
219 return UnicodeString(true, result, resultLen);
220 } else {
221 // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
222 // check whether the parent "$unitRes" exists? Then we could return
223 // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
224 // being a builtin).
225 return {};
226 }
227 }
228
229 // Loads data from a resource tree with paths matching
230 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
231 // and genders.
232 //
233 // An InflectedPluralSink is configured to load data for a specific gender and
234 // case. It loads all plural forms, because selection between plural forms is
235 // dependent upon the value being formatted.
236 //
237 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
238 // units/compound/power2: German has case, French has differences for gender,
239 // but no case.
240 //
241 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
242 // tree structures are different. After homogenizing the structures, we may be
243 // able to unify the two classes.
244 //
245 // TODO: Spec violation: expects presence of "count" - does not fallback to an
246 // absent "count"! If this fallback were added, getCompoundValue could be
247 // superseded?
248 class InflectedPluralSink : public ResourceSink {
249 public:
250 // Accepts `char*` rather than StringPiece because
251 // ResourceTable::findValue(...) requires a null-terminated `char*`.
252 //
253 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
254 // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)255 explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
256 : gender(gender), caseVariant(caseVariant), outArray(outArray) {
257 // Initialize the array to bogus strings.
258 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
259 outArray[i].setToBogus();
260 }
261 }
262
263 // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)264 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
265 int32_t pluralIndex = getIndex(key, status);
266 if (U_FAILURE(status)) { return; }
267 if (!outArray[pluralIndex].isBogus()) {
268 // We already have a pattern
269 return;
270 }
271 ResourceTable genderTable = value.getTable(status);
272 ResourceTable caseTable; // This instance has to outlive `value`
273 if (loadForPluralForm(genderTable, caseTable, value, status)) {
274 outArray[pluralIndex] = value.getUnicodeString(status);
275 }
276 }
277
278 private:
279 // Tries to load data for the configured gender from `genderTable`. Returns
280 // true if found, returning the data in `value`. The returned data will be
281 // for the configured gender if found, falling back to "neuter" and
282 // no-gender if not. The caseTable parameter holds the intermediate
283 // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)284 bool loadForPluralForm(const ResourceTable &genderTable,
285 ResourceTable &caseTable,
286 ResourceValue &value,
287 UErrorCode &status) {
288 if (uprv_strcmp(gender, "") != 0) {
289 if (loadForGender(genderTable, gender, caseTable, value, status)) {
290 return true;
291 }
292 if (uprv_strcmp(gender, "neuter") != 0 &&
293 loadForGender(genderTable, "neuter", caseTable, value, status)) {
294 return true;
295 }
296 }
297 if (loadForGender(genderTable, "_", caseTable, value, status)) {
298 return true;
299 }
300 return false;
301 }
302
303 // Tries to load data for the given gender from `genderTable`. Returns true
304 // if found, returning the data in `value`. The returned data will be for
305 // the configured case if found, falling back to "nominative" and no-case if
306 // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)307 bool loadForGender(const ResourceTable &genderTable,
308 const char *genderVal,
309 ResourceTable &caseTable,
310 ResourceValue &value,
311 UErrorCode &status) {
312 if (!genderTable.findValue(genderVal, value)) {
313 return false;
314 }
315 caseTable = value.getTable(status);
316 if (uprv_strcmp(caseVariant, "") != 0) {
317 if (loadForCase(caseTable, caseVariant, value)) {
318 return true;
319 }
320 if (uprv_strcmp(caseVariant, "nominative") != 0 &&
321 loadForCase(caseTable, "nominative", value)) {
322 return true;
323 }
324 }
325 if (loadForCase(caseTable, "_", value)) {
326 return true;
327 }
328 return false;
329 }
330
331 // Tries to load data for the given case from `caseTable`. Returns true if
332 // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)333 bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
334 if (!caseTable.findValue(caseValue, value)) {
335 return false;
336 }
337 return true;
338 }
339
340 const char *gender;
341 const char *caseVariant;
342 UnicodeString *outArray;
343 };
344
345 // Fetches localised formatting patterns for the given subKey. See documentation
346 // for InflectedPluralSink for details.
347 //
348 // Data is loaded for the appropriate unit width, with missing data filled in
349 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)350 void getInflectedMeasureData(StringPiece subKey,
351 const Locale &locale,
352 const UNumberUnitWidth &width,
353 const char *gender,
354 const char *caseVariant,
355 UnicodeString *outArray,
356 UErrorCode &status) {
357 InflectedPluralSink sink(gender, caseVariant, outArray);
358 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
359 if (U_FAILURE(status)) { return; }
360
361 CharString key;
362 key.append("units", status);
363 if (width == UNUM_UNIT_WIDTH_NARROW) {
364 key.append("Narrow", status);
365 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
366 key.append("Short", status);
367 }
368 key.append("/", status);
369 key.append(subKey, status);
370
371 UErrorCode localStatus = status;
372 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
373 if (width == UNUM_UNIT_WIDTH_SHORT) {
374 status = localStatus;
375 return;
376 }
377 }
378
379 class PluralTableSink : public ResourceSink {
380 public:
381 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
382 // checking is performed.
PluralTableSink(UnicodeString * outArray)383 explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
384 // Initialize the array to bogus strings.
385 for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
386 outArray[i].setToBogus();
387 }
388 }
389
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)390 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
391 if (uprv_strcmp(key, "case") == 0) {
392 return;
393 }
394 int32_t index = getIndex(key, status);
395 if (U_FAILURE(status)) { return; }
396 if (!outArray[index].isBogus()) {
397 return;
398 }
399 outArray[index] = value.getUnicodeString(status);
400 if (U_FAILURE(status)) { return; }
401 }
402
403 private:
404 UnicodeString *outArray;
405 };
406
407 /**
408 * Populates outArray with `locale`-specific values for `unit` through use of
409 * PluralTableSink. Only the set of basic units are supported!
410 *
411 * Reading from resources *unitsNarrow* and *unitsShort* (for width
412 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
413 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
414 *
415 * @param unit must be a built-in unit, i.e. must have a type and subtype,
416 * listed in gTypes and gSubTypes in measunit.cpp.
417 * @param unitDisplayCase the empty string and "nominative" are treated the
418 * same. For other cases, strings for the requested case are used if found.
419 * (For any missing case-specific data, we fall back to nominative.)
420 * @param outArray must be of fixed length ARRAY_LENGTH.
421 */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)422 void getMeasureData(const Locale &locale,
423 const MeasureUnit &unit,
424 const UNumberUnitWidth &width,
425 const char *unitDisplayCase,
426 UnicodeString *outArray,
427 UErrorCode &status) {
428 PluralTableSink sink(outArray);
429 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
430 if (U_FAILURE(status)) { return; }
431
432 CharString subKey;
433 subKey.append("/", status);
434 subKey.append(unit.getType(), status);
435 subKey.append("/", status);
436
437 // Check if unitSubType is an alias or not.
438 LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status));
439
440 UErrorCode aliasStatus = status;
441 StackUResourceBundle aliasFillIn;
442 CharString aliasKey;
443 aliasKey.append("alias/unit/", aliasStatus);
444 aliasKey.append(unit.getSubtype(), aliasStatus);
445 aliasKey.append("/replacement", aliasStatus);
446 ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(),
447 &aliasStatus);
448 CharString unitSubType;
449 if (!U_FAILURE(aliasStatus)) {
450 // This means the subType is an alias. Then, replace unitSubType with the replacement.
451 auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status);
452 unitSubType.appendInvariantChars(replacement, status);
453 } else {
454 unitSubType.append(unit.getSubtype(), status);
455 }
456
457 // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
458 // TODO(ICU-20400): Get duration-*-person data properly with aliases.
459 int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()));
460 if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) {
461 subKey.append({unitSubType.data(), subtypeLen - 7}, status);
462 } else {
463 subKey.append({unitSubType.data(), subtypeLen}, status);
464 }
465
466 if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
467 UErrorCode localStatus = status;
468 CharString genderKey;
469 genderKey.append("units", localStatus);
470 genderKey.append(subKey, localStatus);
471 genderKey.append("/gender", localStatus);
472 StackUResourceBundle fillIn;
473 ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
474 &localStatus);
475 outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
476 }
477
478 CharString key;
479 key.append("units", status);
480 if (width == UNUM_UNIT_WIDTH_NARROW) {
481 key.append("Narrow", status);
482 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
483 key.append("Short", status);
484 }
485 key.append(subKey, status);
486
487 // Grab desired case first, if available. Then grab no-case data to fill in
488 // the gaps.
489 if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
490 CharString caseKey;
491 caseKey.append(key, status);
492 caseKey.append("/case/", status);
493 caseKey.append(unitDisplayCase, status);
494
495 UErrorCode localStatus = U_ZERO_ERROR;
496 // TODO(icu-units#138): our fallback logic is not spec-compliant:
497 // lateral fallback should happen before locale fallback. Switch to
498 // getInflectedMeasureData after homogenizing data format? Find a unit
499 // test case that demonstrates the incorrect fallback logic (via
500 // regional variant of an inflected language?)
501 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
502 }
503
504 // TODO(icu-units#138): our fallback logic is not spec-compliant: we
505 // check the given case, then go straight to the no-case data. The spec
506 // states we should first look for case="nominative". As part of #138,
507 // either get the spec changed, or add unit tests that warn us if
508 // case="nominative" data differs from no-case data?
509 UErrorCode localStatus = U_ZERO_ERROR;
510 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
511 if (width == UNUM_UNIT_WIDTH_SHORT) {
512 if (U_FAILURE(localStatus)) {
513 status = localStatus;
514 }
515 return;
516 }
517 }
518
519 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)520 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit ¤cy, UnicodeString *outArray,
521 UErrorCode &status) {
522 // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
523 // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
524 PluralTableSink sink(outArray);
525 // Here all outArray entries are bogus.
526 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
527 if (U_FAILURE(status)) { return; }
528 ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
529 if (U_FAILURE(status)) { return; }
530 // Here the outArray[] entries are filled in with any CurrencyUnitPatterns data for locale,
531 // or if there is no CurrencyUnitPatterns data for locale since the patterns all inherited
532 // from the "other" pattern in root (which is true for many locales in CLDR 46), then only
533 // the "other" entry has a currency pattern. So now what we do is: For all valid plural keywords
534 // for the locale, if the corresponding outArray[] entry is bogus, fill it in from the "other"
535 // entry. In the longer run, clients of this should instead consider using CurrencyPluralInfo
536 // (see i18n/unicode/currpinf.h).
537 UErrorCode localStatus = U_ZERO_ERROR;
538 const SharedPluralRules *pr = PluralRules::createSharedInstance(
539 locale, UPLURAL_TYPE_CARDINAL, localStatus);
540 if (U_SUCCESS(localStatus)) {
541 LocalPointer<StringEnumeration> keywords((*pr)->getKeywords(localStatus), localStatus);
542 if (U_SUCCESS(localStatus)) {
543 const char* keyword;
544 while (((keyword = keywords->next(nullptr, localStatus)) != nullptr) && U_SUCCESS(localStatus)) {
545 int32_t index = StandardPlural::indexOrOtherIndexFromString(keyword);
546 if (index != StandardPlural::Form::OTHER && outArray[index].isBogus()) {
547 outArray[index].setTo(outArray[StandardPlural::Form::OTHER]);
548 }
549 }
550 }
551 pr->removeRef();
552 }
553
554 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
555 UnicodeString &pattern = outArray[i];
556 if (pattern.isBogus()) {
557 continue;
558 }
559 int32_t longNameLen = 0;
560 const char16_t *longName = ucurr_getPluralName(
561 currency.getISOCurrency(),
562 locale.getName(),
563 nullptr /* isChoiceFormat */,
564 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
565 &longNameLen,
566 &status);
567 // Example pattern from data: "{0} {1}"
568 // Example output after find-and-replace: "{0} US dollars"
569 pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
570 }
571 }
572
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)573 UnicodeString getCompoundValue(StringPiece compoundKey,
574 const Locale &locale,
575 const UNumberUnitWidth &width,
576 UErrorCode &status) {
577 LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
578 if (U_FAILURE(status)) { return {}; }
579 CharString key;
580 key.append("units", status);
581 if (width == UNUM_UNIT_WIDTH_NARROW) {
582 key.append("Narrow", status);
583 } else if (width == UNUM_UNIT_WIDTH_SHORT) {
584 key.append("Short", status);
585 }
586 key.append("/compound/", status);
587 key.append(compoundKey, status);
588
589 UErrorCode localStatus = status;
590 int32_t len = 0;
591 const char16_t *ptr =
592 ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
593 if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
594 // Fall back to short, which contains more compound data
595 key.clear();
596 key.append("unitsShort/compound/", status);
597 key.append(compoundKey, status);
598 ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
599 } else {
600 status = localStatus;
601 }
602 if (U_FAILURE(status)) {
603 return {};
604 }
605 return UnicodeString(ptr, len);
606 }
607
608 /**
609 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
610 *
611 * Consider a deriveComponent rule that looks like this:
612 *
613 * <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
614 *
615 * Instantiating an instance as follows:
616 *
617 * DerivedComponents d(loc, "case", "per");
618 *
619 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
620 * and `d.value1("foo")` will be "nominative".
621 *
622 * The values returned by value0(...) and value1(...) are valid only while the
623 * instance exists. In case of any kind of failure, value0(...) and value1(...)
624 * will return "".
625 */
626 class DerivedComponents {
627 public:
628 /**
629 * Constructor.
630 *
631 * The feature and structure parameters must be null-terminated. The string
632 * referenced by compoundValue must exist for longer than the
633 * DerivedComponents instance.
634 */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)635 DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
636 StackUResourceBundle derivationsBundle, stackBundle;
637 ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
638 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
639 &status);
640 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
641 &status);
642 if (U_FAILURE(status)) {
643 return;
644 }
645 UErrorCode localStatus = U_ZERO_ERROR;
646 // TODO(icu-units#28): use standard normal locale resolution algorithms
647 // rather than just grabbing language:
648 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
649 &localStatus);
650 // TODO(icu-units#28):
651 // - code currently assumes if the locale exists, the rules are there -
652 // instead of falling back to root when the requested rule is missing.
653 // - investigate ures.h functions, see if one that uses res_findResource()
654 // might be better (or use res_findResource directly), or maybe help
655 // improve ures documentation to guide function selection?
656 if (localStatus == U_MISSING_RESOURCE_ERROR) {
657 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
658 } else {
659 status = localStatus;
660 }
661 ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
662 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
663 ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
664 UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
665 UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
666 if (U_SUCCESS(status)) {
667 if (val0.compare(UnicodeString(u"compound")) == 0) {
668 compound0_ = true;
669 } else {
670 compound0_ = false;
671 value0_.appendInvariantChars(val0, status);
672 }
673 if (val1.compare(UnicodeString(u"compound")) == 0) {
674 compound1_ = true;
675 } else {
676 compound1_ = false;
677 value1_.appendInvariantChars(val1, status);
678 }
679 }
680 }
681
682 // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const683 StringPiece value0(const StringPiece compoundValue) const {
684 return compound0_ ? compoundValue : value0_.toStringPiece();
685 }
686
687 // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const688 StringPiece value1(const StringPiece compoundValue) const {
689 return compound1_ ? compoundValue : value1_.toStringPiece();
690 }
691
692 // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const693 const char *value0(const char *compoundValue) const {
694 return compound0_ ? compoundValue : value0_.data();
695 }
696
697 // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const698 const char *value1(const char *compoundValue) const {
699 return compound1_ ? compoundValue : value1_.data();
700 }
701
702 private:
703 UErrorCode status = U_ZERO_ERROR;
704
705 // Holds strings referred to by value0 and value1;
706 bool compound0_ = false, compound1_ = false;
707 CharString value0_, value1_;
708 };
709
710 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
711 // testsuite support for testing with synthetic data?
712 /**
713 * Loads and returns the value in rules that look like these:
714 *
715 * <deriveCompound feature="gender" structure="per" value="0"/>
716 * <deriveCompound feature="gender" structure="times" value="1"/>
717 *
718 * Currently a fake example, but spec compliant:
719 * <deriveCompound feature="gender" structure="power" value="feminine"/>
720 *
721 * NOTE: If U_FAILURE(status), returns an empty string.
722 */
723 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)724 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
725 StackUResourceBundle derivationsBundle, stackBundle;
726 ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
727 ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
728 &status);
729 ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
730 // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
731 ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
732 // TODO:
733 // - code currently assumes if the locale exists, the rules are there -
734 // instead of falling back to root when the requested rule is missing.
735 // - investigate ures.h functions, see if one that uses res_findResource()
736 // might be better (or use res_findResource directly), or maybe help
737 // improve ures documentation to guide function selection?
738 if (status == U_MISSING_RESOURCE_ERROR) {
739 status = U_ZERO_ERROR;
740 ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
741 }
742 ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
743 ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
744 UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
745 if (U_FAILURE(status)) {
746 return {};
747 }
748 U_ASSERT(!uVal.isBogus());
749 return uVal;
750 }
751
752 // Returns the gender string for structures following these rules:
753 //
754 // <deriveCompound feature="gender" structure="per" value="0"/>
755 // <deriveCompound feature="gender" structure="times" value="1"/>
756 //
757 // Fake example:
758 // <deriveCompound feature="gender" structure="power" value="feminine"/>
759 //
760 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
761 // correspond to value="0" and value="1".
762 //
763 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
764 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)765 UnicodeString getDerivedGender(Locale locale,
766 const char *structure,
767 UnicodeString *data0,
768 UnicodeString *data1,
769 UErrorCode &status) {
770 UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
771 if (val.length() == 1) {
772 switch (val[0]) {
773 case u'0':
774 return data0[GENDER_INDEX];
775 case u'1':
776 if (data1 == nullptr) {
777 return {};
778 }
779 return data1[GENDER_INDEX];
780 }
781 }
782 return val;
783 }
784
785 ////////////////////////
786 /// END DATA LOADING ///
787 ////////////////////////
788
789 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const char16_t * s,int32_t & length)790 const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) {
791 if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
792 return s;
793 }
794 int32_t start = 0;
795 int32_t limit = length;
796 while (start < limit && u_isJavaSpaceChar(s[start])) {
797 ++start;
798 }
799 if (start < limit) {
800 // There is non-white space at start; we will not move limit below that,
801 // so we need not test start<limit in the loop.
802 while (u_isJavaSpaceChar(s[limit - 1])) {
803 --limit;
804 }
805 }
806 length = limit - start;
807 return s + start;
808 }
809
810 /**
811 * Calculates the gender of an arbitrary unit: this is the *second*
812 * implementation of an algorithm to do this:
813 *
814 * Gender is also calculated in "processPatternTimes": that code path is "bottom
815 * up", loading the gender for every component of a compound unit (at the same
816 * time as loading the Long Names formatting patterns), even if the gender is
817 * unneeded, then combining the single units' genders into the compound unit's
818 * gender, according to the rules. This algorithm does a lazier "top-down"
819 * evaluation, starting with the compound unit, calculating which single unit's
820 * gender is needed by breaking it down according to the rules, and then loading
821 * only the gender of the one single unit who's gender is needed.
822 *
823 * For future refactorings:
824 * 1. we could drop processPatternTimes' gender calculation and just call this
825 * function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
826 * same table as the formatting patterns, so loading it then may be
827 * efficient. For other unit widths however, it needs to be explicitly looked
828 * up anyway.
829 * 2. alternatively, if CLDR is providing all the genders we need such that we
830 * don't need to calculate them in ICU anymore, we could drop this function
831 * and keep only processPatternTimes' calculation. (And optimise it a bit?)
832 *
833 * @param locale The desired locale.
834 * @param unit The measure unit to calculate the gender for.
835 * @return The gender string for the unit, or an empty string if unknown or
836 * ungendered.
837 */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)838 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
839 MeasureUnitImpl impl;
840 const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
841 int32_t singleUnitIndex = 0;
842 if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
843 int32_t startSlice = 0;
844 // inclusive
845 int32_t endSlice = mui.singleUnits.length()-1;
846 U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
847 if (mui.singleUnits[endSlice]->dimensionality < 0) {
848 // We have a -per- construct
849 UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
850 if (perRule.length() != 1) {
851 // Fixed gender for -per- units
852 return perRule;
853 }
854 if (perRule[0] == u'1') {
855 // Find the start of the denominator. We already know there is one.
856 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
857 startSlice++;
858 }
859 } else {
860 // Find the end of the numerator
861 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
862 endSlice--;
863 }
864 if (endSlice < 0) {
865 // We have only a denominator, e.g. "per-second".
866 // TODO(icu-units#28): find out what gender to use in the
867 // absence of a first value - mentioned in CLDR-14253.
868 return {};
869 }
870 }
871 }
872 if (endSlice > startSlice) {
873 // We have a -times- construct
874 UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
875 if (timesRule.length() != 1) {
876 // Fixed gender for -times- units
877 return timesRule;
878 }
879 if (timesRule[0] == u'0') {
880 endSlice = startSlice;
881 } else {
882 // We assume timesRule[0] == u'1'
883 startSlice = endSlice;
884 }
885 }
886 U_ASSERT(startSlice == endSlice);
887 singleUnitIndex = startSlice;
888 } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
889 status = U_INTERNAL_PROGRAM_ERROR;
890 return {};
891 } else {
892 U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
893 U_ASSERT(mui.singleUnits.length() == 1);
894 }
895
896 // Now we know which singleUnit's gender we want
897 const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
898 // Check for any power-prefix gender override:
899 if (std::abs(singleUnit->dimensionality) != 1) {
900 UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
901 if (powerRule.length() != 1) {
902 // Fixed gender for -powN- units
903 return powerRule;
904 }
905 // powerRule[0] == u'0'; u'1' not currently in spec.
906 }
907 // Check for any SI and binary prefix gender override:
908 if (std::abs(singleUnit->dimensionality) != 1) {
909 UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
910 if (prefixRule.length() != 1) {
911 // Fixed gender for -powN- units
912 return prefixRule;
913 }
914 // prefixRule[0] == u'0'; u'1' not currently in spec.
915 }
916 // Now we've boiled it down to the gender of one simple unit identifier:
917 return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
918 status);
919 }
920
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)921 void maybeCalculateGender(const Locale &locale,
922 const MeasureUnit &unitRef,
923 UnicodeString *outArray,
924 UErrorCode &status) {
925 if (outArray[GENDER_INDEX].isBogus()) {
926 UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
927 if (meterGender.isEmpty()) {
928 // No gender for meter: assume ungendered language
929 return;
930 }
931 // We have a gendered language, but are lacking gender for unitRef.
932 outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
933 }
934 }
935
936 } // namespace
937
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)938 void LongNameHandler::forMeasureUnit(const Locale &loc,
939 const MeasureUnit &unitRef,
940 const UNumberUnitWidth &width,
941 const char *unitDisplayCase,
942 const PluralRules *rules,
943 const MicroPropsGenerator *parent,
944 LongNameHandler *fillIn,
945 UErrorCode &status) {
946 // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
947 // Points 1 and 2 are mostly handled by MeasureUnit:
948 //
949 // 1. If the unitId is empty or invalid, fail
950 // 2. Put the unitId into normalized order
951 U_ASSERT(fillIn != nullptr);
952
953 if (uprv_strcmp(unitRef.getType(), "") != 0) {
954 // Handling built-in units:
955 //
956 // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
957 // - If result is not empty, return it
958 UnicodeString simpleFormats[ARRAY_LENGTH];
959 getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
960 maybeCalculateGender(loc, unitRef, simpleFormats, status);
961 if (U_FAILURE(status)) {
962 return;
963 }
964 fillIn->rules = rules;
965 fillIn->parent = parent;
966 fillIn->simpleFormatsToModifiers(simpleFormats,
967 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
968 if (!simpleFormats[GENDER_INDEX].isBogus()) {
969 fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
970 }
971 return;
972
973 // TODO(icu-units#145): figure out why this causes a failure in
974 // format/MeasureFormatTest/TestIndividualPluralFallback and other
975 // tests, when it should have been an alternative for the lines above:
976
977 // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
978 // fillIn->rules = rules;
979 // fillIn->parent = parent;
980 // return;
981 } else {
982 // Check if it is a MeasureUnit this constructor handles: this
983 // constructor does not handle mixed units
984 U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
985 forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
986 fillIn->rules = rules;
987 fillIn->parent = parent;
988 return;
989 }
990 }
991
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)992 void LongNameHandler::forArbitraryUnit(const Locale &loc,
993 const MeasureUnit &unitRef,
994 const UNumberUnitWidth &width,
995 const char *unitDisplayCase,
996 LongNameHandler *fillIn,
997 UErrorCode &status) {
998 if (U_FAILURE(status)) {
999 return;
1000 }
1001 if (fillIn == nullptr) {
1002 status = U_INTERNAL_PROGRAM_ERROR;
1003 return;
1004 }
1005
1006 // Numbered list items are from the algorithms at
1007 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1008 //
1009 // 4. Divide the unitId into numerator (the part before the "-per-") and
1010 // denominator (the part after the "-per-). If both are empty, fail
1011 MeasureUnitImpl unit;
1012 MeasureUnitImpl perUnit;
1013 {
1014 MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
1015 if (U_FAILURE(status)) {
1016 return;
1017 }
1018 for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
1019 SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
1020 if (subUnit->dimensionality > 0) {
1021 unit.appendSingleUnit(*subUnit, status);
1022 } else {
1023 subUnit->dimensionality *= -1;
1024 perUnit.appendSingleUnit(*subUnit, status);
1025 }
1026 }
1027 }
1028
1029 // TODO(icu-units#28): check placeholder logic, see if it needs to be
1030 // present here instead of only in processPatternTimes:
1031 //
1032 // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1033
1034 DerivedComponents derivedPerCases(loc, "case", "per");
1035
1036 // 6. numeratorUnitString
1037 UnicodeString numeratorUnitData[ARRAY_LENGTH];
1038 processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1039 numeratorUnitData, status);
1040
1041 // 7. denominatorUnitString
1042 UnicodeString denominatorUnitData[ARRAY_LENGTH];
1043 processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1044 denominatorUnitData, status);
1045
1046 // TODO(icu-units#139):
1047 // - implement DerivedComponents for "plural/times" and "plural/power":
1048 // French has different rules, we'll be producing the wrong results
1049 // currently. (Prove via tests!)
1050 // - implement DerivedComponents for "plural/per", "plural/prefix",
1051 // "case/times", "case/power", and "case/prefix" - although they're
1052 // currently hardcoded. Languages with different rules are surely on the
1053 // way.
1054 //
1055 // Currently we only use "case/per", "plural/times", "case/times", and
1056 // "case/power".
1057 //
1058 // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1059 // These rules are currently (ICU 69) all the same and hard-coded below.
1060 UnicodeString perUnitPattern;
1061 if (!denominatorUnitData[PER_INDEX].isBogus()) {
1062 // If we have no denominator, we obtain the empty string:
1063 perUnitPattern = denominatorUnitData[PER_INDEX];
1064 } else {
1065 // 8. Set perPattern to be getValue([per], locale, length)
1066 UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1067 // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1068 SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1069 if (U_FAILURE(status)) {
1070 return;
1071 }
1072 // Plural and placeholder handling for 7. denominatorUnitString:
1073 // TODO(icu-units#139): hardcoded:
1074 // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1075 UnicodeString denominatorFormat =
1076 getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1077 // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1078 SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1079 if (U_FAILURE(status)) {
1080 return;
1081 }
1082 UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1083 int32_t trimmedLen = denominatorPattern.length();
1084 const char16_t *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1085 UnicodeString denominatorString(false, trimmed, trimmedLen);
1086 // 9. If the denominatorString is empty, set result to
1087 // [numeratorString], otherwise set result to format(perPattern,
1088 // numeratorString, denominatorString)
1089 //
1090 // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1091 // following line?
1092 perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1093 if (U_FAILURE(status)) {
1094 return;
1095 }
1096 }
1097 if (perUnitPattern.length() == 0) {
1098 fillIn->simpleFormatsToModifiers(numeratorUnitData,
1099 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1100 } else {
1101 fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1102 {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1103 }
1104
1105 // Gender
1106 //
1107 // TODO(icu-units#28): find out what gender to use in the absence of a first
1108 // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1109 //
1110 // gender/per deriveCompound rules don't say:
1111 // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ← gender(gram) -->
1112 fillIn->gender = getGenderString(
1113 getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1114 }
1115
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1116 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1117 Locale loc,
1118 const UNumberUnitWidth &width,
1119 const char *caseVariant,
1120 UnicodeString *outArray,
1121 UErrorCode &status) {
1122 if (U_FAILURE(status)) {
1123 return;
1124 }
1125 if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1126 // These are handled by MixedUnitLongNameHandler
1127 status = U_UNSUPPORTED_ERROR;
1128 return;
1129 }
1130
1131 #if U_DEBUG
1132 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1133 U_ASSERT(outArray[pluralIndex].length() == 0);
1134 U_ASSERT(!outArray[pluralIndex].isBogus());
1135 }
1136 #endif
1137
1138 if (productUnit.identifier.isEmpty()) {
1139 // TODO(icu-units#28): consider when serialize should be called.
1140 // identifier might also be empty for MeasureUnit().
1141 productUnit.serialize(status);
1142 }
1143 if (U_FAILURE(status)) {
1144 return;
1145 }
1146 if (productUnit.identifier.length() == 0) {
1147 // MeasureUnit(): no units: return empty strings.
1148 return;
1149 }
1150
1151 MeasureUnit builtinUnit;
1152 if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1153 // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1154 // breaks them all down. Do we want to drop this?
1155 // - findBySubType isn't super efficient, if we skip it and go to basic
1156 // singles, we don't have to construct MeasureUnit's anymore.
1157 // - Check all the existing unit tests that fail without this: is it due
1158 // to incorrect fallback via getMeasureData?
1159 // - Do those unit tests cover this code path representatively?
1160 if (builtinUnit != MeasureUnit()) {
1161 getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1162 maybeCalculateGender(loc, builtinUnit, outArray, status);
1163 }
1164 return;
1165 }
1166
1167 // 2. Set timesPattern to be getValue(times, locale, length)
1168 UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1169 SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1170 if (U_FAILURE(status)) {
1171 return;
1172 }
1173
1174 PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1175 char16_t globalJoinerChar = 0;
1176 // Numbered list items are from the algorithms at
1177 // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1178 //
1179 // pattern(...) point 5:
1180 // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1181 //
1182 // 3. Set result to be empty
1183 for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1184 // Initial state: empty string pattern, via all falling back to OTHER:
1185 if (pluralIndex == StandardPlural::Form::OTHER) {
1186 outArray[pluralIndex].remove();
1187 } else {
1188 outArray[pluralIndex].setToBogus();
1189 }
1190 globalPlaceholder[pluralIndex] = PH_EMPTY;
1191 }
1192
1193 // Empty string represents "compound" (propagate the plural form).
1194 const char *pluralCategory = "";
1195 DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1196 DerivedComponents derivedTimesCases(loc, "case", "times");
1197 DerivedComponents derivedPowerCases(loc, "case", "power");
1198
1199 // 4. For each single_unit in product_unit
1200 for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1201 singleUnitIndex++) {
1202 SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1203 const char *singlePluralCategory;
1204 const char *singleCaseVariant;
1205 // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1206 // assign incorrect case variants here:
1207 if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1208 // 4.1. If hasMultiple
1209 singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1210 singleCaseVariant = derivedTimesCases.value0(caseVariant);
1211 pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1212 caseVariant = derivedTimesCases.value1(caseVariant);
1213 } else {
1214 singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1215 singleCaseVariant = derivedTimesCases.value1(caseVariant);
1216 }
1217
1218 // 4.2. Get the gender of that single_unit
1219 MeasureUnit simpleUnit;
1220 if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1221 // Ideally all simple units should be known, but they're not:
1222 // 100-kilometer is internally treated as a simple unit, but it is
1223 // not a built-in unit and does not have formatting data in CLDR 39.
1224 //
1225 // TODO(icu-units#28): test (desirable) invariants in unit tests.
1226 status = U_UNSUPPORTED_ERROR;
1227 return;
1228 }
1229 const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1230
1231 // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1232 U_ASSERT(singleUnit->dimensionality > 0);
1233 int32_t dimensionality = singleUnit->dimensionality;
1234 UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1235 if (dimensionality != 1) {
1236 // 4.3.1. set dimensionalityPrefixPattern to be
1237 // getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1238 // such as "{0} kwadratowym"
1239 CharString dimensionalityKey("compound/power", status);
1240 dimensionalityKey.appendNumber(dimensionality, status);
1241 getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1242 singleCaseVariant, dimensionalityPrefixPatterns, status);
1243 if (U_FAILURE(status)) {
1244 // At the time of writing, only pow2 and pow3 are supported.
1245 // Attempting to format other powers results in a
1246 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1247 // understand it:
1248 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1249 status = U_UNSUPPORTED_ERROR;
1250 }
1251 return;
1252 }
1253
1254 // TODO(icu-units#139):
1255 // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1256
1257 // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1258 singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1259 // 4.3.4. remove the dimensionality_prefix from singleUnit
1260 singleUnit->dimensionality = 1;
1261 }
1262
1263 // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1264 UMeasurePrefix prefix = singleUnit->unitPrefix;
1265 UnicodeString prefixPattern;
1266 if (prefix != UMEASURE_PREFIX_ONE) {
1267 // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1268 // length), such as "centy{0}"
1269 CharString prefixKey;
1270 // prefixKey looks like "1024p3" or "10p-2":
1271 prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1272 prefixKey.append('p', status);
1273 prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1274 // Contains a pattern like "centy{0}".
1275 prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1276
1277 // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1278 //
1279 // TODO(icu-units#139): that refers to these rules:
1280 // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1281 // though I'm not sure what other value they might end up having.
1282 //
1283 // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1284 //
1285 // TODO(icu-units#139): that refers to:
1286 // <deriveComponent feature="case" structure="prefix" value0="nominative"
1287 // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1288 // propagates.
1289
1290 // 4.4.4. remove the si_prefix from singleUnit
1291 singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1292 }
1293
1294 // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1295 // singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1296 UnicodeString singleUnitArray[ARRAY_LENGTH];
1297 // At this point we are left with a Simple Unit:
1298 U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1299 0);
1300 getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1301 status);
1302 if (U_FAILURE(status)) {
1303 // Shouldn't happen if we have data for all single units
1304 return;
1305 }
1306
1307 // Calculate output gender
1308 if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1309 U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1310 UnicodeString uVal;
1311
1312 if (prefix != UMEASURE_PREFIX_ONE) {
1313 singleUnitArray[GENDER_INDEX] =
1314 getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1315 }
1316
1317 if (dimensionality != 1) {
1318 singleUnitArray[GENDER_INDEX] =
1319 getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1320 }
1321
1322 UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1323 if (timesGenderRule.length() == 1) {
1324 switch (timesGenderRule[0]) {
1325 case u'0':
1326 if (singleUnitIndex == 0) {
1327 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1328 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1329 }
1330 break;
1331 case u'1':
1332 if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1333 U_ASSERT(outArray[GENDER_INDEX].isBogus());
1334 outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1335 }
1336 }
1337 } else {
1338 if (outArray[GENDER_INDEX].isBogus()) {
1339 outArray[GENDER_INDEX] = timesGenderRule;
1340 }
1341 }
1342 }
1343
1344 // Calculate resulting patterns for each plural form
1345 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1346 StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1347
1348 // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1349 if (outArray[pluralIndex].isBogus()) {
1350 if (singleUnitArray[pluralIndex].isBogus()) {
1351 // Let the usual plural fallback mechanism take care of this
1352 // plural form
1353 continue;
1354 } else {
1355 // Since our singleUnit can have a plural form that outArray
1356 // doesn't yet have (relying on fallback to OTHER), we start
1357 // by grabbing it with the normal plural fallback mechanism
1358 outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1359 if (U_FAILURE(status)) {
1360 return;
1361 }
1362 }
1363 }
1364
1365 if (uprv_strcmp(singlePluralCategory, "") != 0) {
1366 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1367 }
1368
1369 // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1370 UnicodeString coreUnit;
1371 PlaceholderPosition placeholderPosition;
1372 char16_t joinerChar;
1373 extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1374 placeholderPosition, joinerChar);
1375
1376 // 4.7 If the position is middle, then fail
1377 if (placeholderPosition == PH_MIDDLE) {
1378 status = U_UNSUPPORTED_ERROR;
1379 return;
1380 }
1381
1382 // 4.8. If globalPlaceholder is empty
1383 if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1384 globalPlaceholder[pluralIndex] = placeholderPosition;
1385 globalJoinerChar = joinerChar;
1386 } else {
1387 // Expect all units involved to have the same placeholder position
1388 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1389 // TODO(icu-units#28): Do we want to add a unit test that checks
1390 // for consistent joiner chars? Probably not, given how
1391 // inconsistent they are. File a CLDR ticket with examples?
1392 }
1393 // Now coreUnit would be just "Meter"
1394
1395 // 4.9. If siPrefixPattern is not empty
1396 if (prefix != UMEASURE_PREFIX_ONE) {
1397 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1398 if (U_FAILURE(status)) {
1399 return;
1400 }
1401
1402 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1403 // coreUnit)
1404 UnicodeString tmp;
1405 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1406 //
1407 // TODO(icu-units#28): run this only if prefixPattern does not
1408 // contain space characters - do languages "as", "bn", "hi",
1409 // "kk", etc have concepts of upper and lower case?:
1410 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1411 coreUnit.toLower(loc);
1412 }
1413 prefixCompiled.format(coreUnit, tmp, status);
1414 if (U_FAILURE(status)) {
1415 return;
1416 }
1417 coreUnit = tmp;
1418 }
1419
1420 // 4.10. If dimensionalityPrefixPattern is not empty
1421 if (dimensionality != 1) {
1422 SimpleFormatter dimensionalityCompiled(
1423 getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1424 if (U_FAILURE(status)) {
1425 return;
1426 }
1427
1428 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1429 // dimensionalityPrefixPattern, coreUnit)
1430 UnicodeString tmp;
1431 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1432 //
1433 // TODO(icu-units#28): run this only if prefixPattern does not
1434 // contain space characters - do languages "as", "bn", "hi",
1435 // "kk", etc have concepts of upper and lower case?:
1436 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1437 coreUnit.toLower(loc);
1438 }
1439 dimensionalityCompiled.format(coreUnit, tmp, status);
1440 if (U_FAILURE(status)) {
1441 return;
1442 }
1443 coreUnit = tmp;
1444 }
1445
1446 if (outArray[pluralIndex].length() == 0) {
1447 // 4.11. If the result is empty, set result to be coreUnit
1448 outArray[pluralIndex] = coreUnit;
1449 } else {
1450 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1451 UnicodeString tmp;
1452 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1453 outArray[pluralIndex] = tmp;
1454 }
1455 }
1456 }
1457 for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1458 if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1459 UnicodeString tmp;
1460 tmp.append(u"{0}", 3);
1461 if (globalJoinerChar != 0) {
1462 tmp.append(globalJoinerChar);
1463 }
1464 tmp.append(outArray[pluralIndex]);
1465 outArray[pluralIndex] = tmp;
1466 } else if (globalPlaceholder[pluralIndex] == PH_END) {
1467 if (globalJoinerChar != 0) {
1468 outArray[pluralIndex].append(globalJoinerChar);
1469 }
1470 outArray[pluralIndex].append(u"{0}", 3);
1471 }
1472 }
1473 }
1474
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1475 UnicodeString LongNameHandler::getUnitDisplayName(
1476 const Locale& loc,
1477 const MeasureUnit& unit,
1478 UNumberUnitWidth width,
1479 UErrorCode& status) {
1480 if (U_FAILURE(status)) {
1481 return ICU_Utility::makeBogusString();
1482 }
1483 UnicodeString simpleFormats[ARRAY_LENGTH];
1484 getMeasureData(loc, unit, width, "", simpleFormats, status);
1485 return simpleFormats[DNAM_INDEX];
1486 }
1487
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1488 UnicodeString LongNameHandler::getUnitPattern(
1489 const Locale& loc,
1490 const MeasureUnit& unit,
1491 UNumberUnitWidth width,
1492 StandardPlural::Form pluralForm,
1493 UErrorCode& status) {
1494 if (U_FAILURE(status)) {
1495 return ICU_Utility::makeBogusString();
1496 }
1497 UnicodeString simpleFormats[ARRAY_LENGTH];
1498 getMeasureData(loc, unit, width, "", simpleFormats, status);
1499 // The above already handles fallback from other widths to short
1500 if (U_FAILURE(status)) {
1501 return ICU_Utility::makeBogusString();
1502 }
1503 // Now handle fallback from other plural forms to OTHER
1504 return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1505 simpleFormats[StandardPlural::Form::OTHER];
1506 }
1507
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1508 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit ¤cy,
1509 const PluralRules *rules,
1510 const MicroPropsGenerator *parent,
1511 UErrorCode &status) {
1512 LocalPointer<LongNameHandler> result(new LongNameHandler(rules, parent), status);
1513 if (U_FAILURE(status)) {
1514 return nullptr;
1515 }
1516 UnicodeString simpleFormats[ARRAY_LENGTH];
1517 getCurrencyLongNameData(loc, currency, simpleFormats, status);
1518 if (U_FAILURE(status)) { return nullptr; }
1519 result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1520 // TODO(icu-units#28): currency gender?
1521 return result.orphan();
1522 }
1523
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1524 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1525 UErrorCode &status) {
1526 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1527 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1528 UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1529 if (U_FAILURE(status)) { return; }
1530 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1531 if (U_FAILURE(status)) { return; }
1532 fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1533 }
1534 }
1535
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1536 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1537 Field field, UErrorCode &status) {
1538 SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1539 if (U_FAILURE(status)) { return; }
1540 for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1541 StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1542 UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1543 if (U_FAILURE(status)) { return; }
1544 UnicodeString compoundFormat;
1545 if (leadFormat.length() == 0) {
1546 compoundFormat = trailFormat;
1547 } else {
1548 trailCompiled.format(leadFormat, compoundFormat, status);
1549 if (U_FAILURE(status)) { return; }
1550 }
1551 SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1552 if (U_FAILURE(status)) { return; }
1553 fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1554 }
1555 }
1556
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1557 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1558 UErrorCode &status) const {
1559 if (parent != nullptr) {
1560 parent->processQuantity(quantity, micros, status);
1561 }
1562 StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1563 micros.modOuter = &fModifiers[pluralForm];
1564 micros.gender = gender;
1565 }
1566
getModifier(Signum,StandardPlural::Form plural) const1567 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1568 return &fModifiers[plural];
1569 }
1570
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1571 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1572 const MeasureUnit &mixedUnit,
1573 const UNumberUnitWidth &width,
1574 const char *unitDisplayCase,
1575 const PluralRules *rules,
1576 const MicroPropsGenerator *parent,
1577 MixedUnitLongNameHandler *fillIn,
1578 UErrorCode &status) {
1579 U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1580 U_ASSERT(fillIn != nullptr);
1581 if (U_FAILURE(status)) {
1582 return;
1583 }
1584
1585 MeasureUnitImpl temp;
1586 const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1587 // Defensive, for production code:
1588 if (impl.complexity != UMEASURE_UNIT_MIXED) {
1589 // Should be using the normal LongNameHandler
1590 status = U_UNSUPPORTED_ERROR;
1591 return;
1592 }
1593
1594 fillIn->fMixedUnitCount = impl.singleUnits.length();
1595 fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1596 for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1597 // Grab data for each of the components.
1598 UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1599 // TODO(CLDR-14582): check from the CLDR-14582 ticket whether this
1600 // propagation of unitDisplayCase is correct:
1601 getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1602 status);
1603 // TODO(ICU-21494): if we add support for gender for mixed units, we may
1604 // need maybeCalculateGender() here.
1605 }
1606
1607 // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1608 // high-magnitude fields
1609 // * for mixed units count N, produce N listFormatters, one for each subset
1610 // that might be formatted.
1611 UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1612 if (width == UNUM_UNIT_WIDTH_NARROW) {
1613 listWidth = ULISTFMT_WIDTH_NARROW;
1614 } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1615 // This might be the same as SHORT in most languages:
1616 listWidth = ULISTFMT_WIDTH_WIDE;
1617 }
1618 fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1619 ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1620 // TODO(ICU-21494): grab gender of each unit, calculate the gender
1621 // associated with this list formatter, save it for later.
1622 fillIn->rules = rules;
1623 fillIn->parent = parent;
1624
1625 // We need a localised NumberFormatter for the numbers of the bigger units
1626 // (providing Arabic numerals, for example).
1627 fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1628 }
1629
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1630 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1631 UErrorCode &status) const {
1632 U_ASSERT(fMixedUnitCount > 1);
1633 if (parent != nullptr) {
1634 parent->processQuantity(quantity, micros, status);
1635 }
1636 micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1637 }
1638
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1639 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1640 MicroProps µs,
1641 UErrorCode &status) const {
1642 if (micros.mixedMeasuresCount == 0) {
1643 U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1644 status = U_UNSUPPORTED_ERROR;
1645 return µs.helpers.emptyWeakModifier;
1646 }
1647
1648 // Algorithm:
1649 //
1650 // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1651 // find "3 yard" and "1 foot" in micros.mixedMeasures.
1652 //
1653 // Obtain long-names with plural forms corresponding to measure values:
1654 // * {0} yards, {0} foot, {0} inches
1655 //
1656 // Format the integer values appropriately and modify with the format
1657 // strings:
1658 // - 3 yards, 1 foot
1659 //
1660 // Use ListFormatter to combine, with one placeholder:
1661 // - 3 yards, 1 foot and {0} inches
1662 //
1663 // Return a SimpleModifier for this pattern, letting the rest of the
1664 // pipeline take care of the remaining inches.
1665
1666 LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1667 if (U_FAILURE(status)) {
1668 return µs.helpers.emptyWeakModifier;
1669 }
1670
1671 StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1672 for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1673 DecimalQuantity fdec;
1674
1675 // If numbers are negative, only the first number needs to have its
1676 // negative sign formatted.
1677 int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1678
1679 if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1680 // If quantity is not the first value and quantity is negative
1681 if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1682 quantity.negate();
1683 }
1684
1685 StandardPlural::Form quantityPlural =
1686 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1687 UnicodeString quantityFormatWithPlural =
1688 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1689 SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1690 quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1691 } else {
1692 fdec.setToLong(number);
1693 StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1694 UnicodeString simpleFormat =
1695 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1696 SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1697 UnicodeString num;
1698 auto appendable = UnicodeStringAppendable(num);
1699
1700 fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1701 compiledFormatter.format(num, outputMeasuresList[i], status);
1702 }
1703 }
1704
1705 // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1706 // can set micros.gender to the gender associated with the list formatter in
1707 // use below (once we have correct support for that). And then document this
1708 // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1709 // something like this.
1710
1711 // Combine list into a "premixed" pattern
1712 UnicodeString premixedFormatPattern;
1713 fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1714 status);
1715 SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1716 if (U_FAILURE(status)) {
1717 return µs.helpers.emptyWeakModifier;
1718 }
1719
1720 micros.helpers.mixedUnitModifier =
1721 SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1722 return µs.helpers.mixedUnitModifier;
1723 }
1724
getModifier(Signum,StandardPlural::Form) const1725 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1726 StandardPlural::Form /*plural*/) const {
1727 // TODO(icu-units#28): investigate this method when investigating where
1728 // ModifierStore::getModifier() gets used. To be sure it remains
1729 // unreachable:
1730 UPRV_UNREACHABLE_EXIT;
1731 return nullptr;
1732 }
1733
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1734 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1735 const MaybeStackVector<MeasureUnit> &units,
1736 const UNumberUnitWidth &width,
1737 const char *unitDisplayCase,
1738 const PluralRules *rules,
1739 const MicroPropsGenerator *parent,
1740 UErrorCode &status) {
1741 LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1742 if (U_FAILURE(status)) {
1743 return nullptr;
1744 }
1745 U_ASSERT(units.length() > 0);
1746 if (result->fHandlers.resize(units.length()) == nullptr) {
1747 status = U_MEMORY_ALLOCATION_ERROR;
1748 return nullptr;
1749 }
1750 result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1751 for (int32_t i = 0, length = units.length(); i < length; i++) {
1752 const MeasureUnit &unit = *units[i];
1753 result->fMeasureUnits[i] = unit;
1754 if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1755 MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1756 MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr,
1757 mlnh, status);
1758 result->fHandlers[i] = mlnh;
1759 } else {
1760 LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1761 LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, lnh, status);
1762 result->fHandlers[i] = lnh;
1763 }
1764 if (U_FAILURE(status)) {
1765 return nullptr;
1766 }
1767 }
1768 return result.orphan();
1769 }
1770
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1771 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps µs,
1772 UErrorCode &status) const {
1773 // We call parent->processQuantity() from the Multiplexer, instead of
1774 // letting LongNameHandler handle it: we don't know which LongNameHandler to
1775 // call until we've called the parent!
1776 fParent->processQuantity(quantity, micros, status);
1777
1778 // Call the correct LongNameHandler based on outputUnit
1779 for (int i = 0; i < fHandlers.getCapacity(); i++) {
1780 if (fMeasureUnits[i] == micros.outputUnit) {
1781 fHandlers[i]->processQuantity(quantity, micros, status);
1782 return;
1783 }
1784 }
1785 if (U_FAILURE(status)) {
1786 return;
1787 }
1788 // We shouldn't receive any outputUnit for which we haven't already got a
1789 // LongNameHandler:
1790 status = U_INTERNAL_PROGRAM_ERROR;
1791 }
1792
1793 #endif /* #if !UCONFIG_NO_FORMATTING */
1794