• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include <cstdlib>
9 
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "unicode/plurrule.h"
13 #include "unicode/strenum.h"
14 #include "ureslocs.h"
15 #include "charstr.h"
16 #include "uresimp.h"
17 #include "measunit_impl.h"
18 #include "number_longnames.h"
19 #include "number_microprops.h"
20 #include <algorithm>
21 #include "cstring.h"
22 #include "util.h"
23 #include "sharedpluralrules.h"
24 
25 using namespace icu;
26 using namespace icu::number;
27 using namespace icu::number::impl;
28 
29 namespace {
30 
31 /**
32  * Display Name (this format has no placeholder).
33  *
34  * Used as an index into the LongNameHandler::simpleFormats array. Units
35  * resources cover the normal set of PluralRules keys, as well as `dnam` and
36  * `per` forms.
37  */
38 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
39 /**
40  * "per" form (e.g. "{0} per day" is day's "per" form).
41  *
42  * Used as an index into the LongNameHandler::simpleFormats array. Units
43  * resources cover the normal set of PluralRules keys, as well as `dnam` and
44  * `per` forms.
45  */
46 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
47 /**
48  * Gender of the word, in languages with grammatical gender.
49  */
50 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
51 // Number of keys in the array populated by PluralTableSink.
52 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
53 
54 // TODO(icu-units#28): load this list from resources, after creating a "&set"
55 // function for use in ldml2icu rules.
56 const int32_t GENDER_COUNT = 7;
57 const char *gGenders[GENDER_COUNT] = {"animate",   "common", "feminine", "inanimate",
58                                       "masculine", "neuter", "personal"};
59 
60 // Converts a UnicodeString to a const char*, either pointing to a string in
61 // gGenders, or pointing to an empty string if an appropriate string was not
62 // found.
getGenderString(UnicodeString uGender,UErrorCode status)63 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
64     if (uGender.length() == 0) {
65         return "";
66     }
67     CharString gender;
68     gender.appendInvariantChars(uGender, status);
69     if (U_FAILURE(status)) {
70         return "";
71     }
72     int32_t first = 0;
73     int32_t last = GENDER_COUNT;
74     while (first < last) {
75         int32_t mid = (first + last) / 2;
76         int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
77         if (cmp == 0) {
78             return gGenders[mid];
79         } else if (cmp > 0) {
80             first = mid + 1;
81         } else if (cmp < 0) {
82             last = mid;
83         }
84     }
85     // We don't return an error in case our gGenders list is incomplete in
86     // production.
87     //
88     // TODO(icu-units#28): a unit test checking all locales' genders are covered
89     // by gGenders? Else load a complete list of genders found in
90     // grammaticalFeatures in an initOnce.
91     return "";
92 }
93 
94 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)95 int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
96     // pluralKeyword can also be "dnam", "per", or "gender"
97     switch (*pluralKeyword) {
98     case 'd':
99         if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
100             return DNAM_INDEX;
101         }
102         break;
103     case 'g':
104         if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
105             return GENDER_INDEX;
106         }
107         break;
108     case 'p':
109         if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
110             return PER_INDEX;
111         }
112         break;
113     default:
114         break;
115     }
116     StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
117     return plural;
118 }
119 
120 // Selects a string out of the `strings` array which corresponds to the
121 // specified plural form, with fallback to the OTHER form.
122 //
123 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
124 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)125 UnicodeString getWithPlural(
126         const UnicodeString* strings,
127         StandardPlural::Form plural,
128         UErrorCode& status) {
129     UnicodeString result = strings[plural];
130     if (result.isBogus()) {
131         result = strings[StandardPlural::Form::OTHER];
132     }
133     if (result.isBogus()) {
134         // There should always be data in the "other" plural variant.
135         status = U_INTERNAL_PROGRAM_ERROR;
136     }
137     return result;
138 }
139 
140 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
141 
142 /**
143  * Returns three outputs extracted from pattern.
144  *
145  * @param coreUnit is extracted as per Extract(...) in the spec:
146  *   https://unicode.org/reports/tr35/tr35-general.html#compound-units
147  * @param PlaceholderPosition indicates where in the string the placeholder was
148  *   found.
149  * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
150  *   contains the space character (if any) that separated the placeholder from
151  *   the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
152  *   space character is considered.
153  */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,char16_t & joinerChar)154 void extractCorePattern(const UnicodeString &pattern,
155                         UnicodeString &coreUnit,
156                         PlaceholderPosition &placeholderPosition,
157                         char16_t &joinerChar) {
158     joinerChar = 0;
159     int32_t len = pattern.length();
160     if (pattern.startsWith(u"{0}", 3)) {
161         placeholderPosition = PH_BEGINNING;
162         if (u_isJavaSpaceChar(pattern[3])) {
163             joinerChar = pattern[3];
164             coreUnit.setTo(pattern, 4, len - 4);
165         } else {
166             coreUnit.setTo(pattern, 3, len - 3);
167         }
168     } else if (pattern.endsWith(u"{0}", 3)) {
169         placeholderPosition = PH_END;
170         if (u_isJavaSpaceChar(pattern[len - 4])) {
171             coreUnit.setTo(pattern, 0, len - 4);
172             joinerChar = pattern[len - 4];
173         } else {
174             coreUnit.setTo(pattern, 0, len - 3);
175         }
176     } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
177         placeholderPosition = PH_NONE;
178         coreUnit = pattern;
179     } else {
180         placeholderPosition = PH_MIDDLE;
181         coreUnit = pattern;
182     }
183 }
184 
185 //////////////////////////
186 /// BEGIN DATA LOADING ///
187 //////////////////////////
188 
189 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
190 // string both in case of unknown gender and in case of unknown unit.
191 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)192 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
193     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
194     if (U_FAILURE(status)) { return {}; }
195 
196     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
197     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
198     StringPiece subtypeForResource;
199     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
200     if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
201         subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
202     } else {
203         subtypeForResource = builtinUnit.getSubtype();
204     }
205 
206     CharString key;
207     key.append("units/", status);
208     key.append(builtinUnit.getType(), status);
209     key.append("/", status);
210     key.append(subtypeForResource, status);
211     key.append("/gender", status);
212 
213     UErrorCode localStatus = status;
214     int32_t resultLen = 0;
215     const char16_t *result =
216         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
217     if (U_SUCCESS(localStatus)) {
218         status = localStatus;
219         return UnicodeString(true, result, resultLen);
220     } else {
221         // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
222         // check whether the parent "$unitRes" exists? Then we could return
223         // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
224         // being a builtin).
225         return {};
226     }
227 }
228 
229 // Loads data from a resource tree with paths matching
230 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
231 // and genders.
232 //
233 // An InflectedPluralSink is configured to load data for a specific gender and
234 // case. It loads all plural forms, because selection between plural forms is
235 // dependent upon the value being formatted.
236 //
237 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
238 // units/compound/power2: German has case, French has differences for gender,
239 // but no case.
240 //
241 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
242 // tree structures are different. After homogenizing the structures, we may be
243 // able to unify the two classes.
244 //
245 // TODO: Spec violation: expects presence of "count" - does not fallback to an
246 // absent "count"! If this fallback were added, getCompoundValue could be
247 // superseded?
248 class InflectedPluralSink : public ResourceSink {
249   public:
250     // Accepts `char*` rather than StringPiece because
251     // ResourceTable::findValue(...) requires a null-terminated `char*`.
252     //
253     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
254     // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)255     explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
256         : gender(gender), caseVariant(caseVariant), outArray(outArray) {
257         // Initialize the array to bogus strings.
258         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
259             outArray[i].setToBogus();
260         }
261     }
262 
263     // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)264     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
265         int32_t pluralIndex = getIndex(key, status);
266         if (U_FAILURE(status)) { return; }
267         if (!outArray[pluralIndex].isBogus()) {
268             // We already have a pattern
269             return;
270         }
271         ResourceTable genderTable = value.getTable(status);
272         ResourceTable caseTable; // This instance has to outlive `value`
273         if (loadForPluralForm(genderTable, caseTable, value, status)) {
274             outArray[pluralIndex] = value.getUnicodeString(status);
275         }
276     }
277 
278   private:
279     // Tries to load data for the configured gender from `genderTable`. Returns
280     // true if found, returning the data in `value`. The returned data will be
281     // for the configured gender if found, falling back to "neuter" and
282     // no-gender if not. The caseTable parameter holds the intermediate
283     // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)284     bool loadForPluralForm(const ResourceTable &genderTable,
285                            ResourceTable &caseTable,
286                            ResourceValue &value,
287                            UErrorCode &status) {
288         if (uprv_strcmp(gender, "") != 0) {
289             if (loadForGender(genderTable, gender, caseTable, value, status)) {
290                 return true;
291             }
292             if (uprv_strcmp(gender, "neuter") != 0 &&
293                 loadForGender(genderTable, "neuter", caseTable, value, status)) {
294                 return true;
295             }
296         }
297         if (loadForGender(genderTable, "_", caseTable, value, status)) {
298             return true;
299         }
300         return false;
301     }
302 
303     // Tries to load data for the given gender from `genderTable`. Returns true
304     // if found, returning the data in `value`. The returned data will be for
305     // the configured case if found, falling back to "nominative" and no-case if
306     // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)307     bool loadForGender(const ResourceTable &genderTable,
308                        const char *genderVal,
309                        ResourceTable &caseTable,
310                        ResourceValue &value,
311                        UErrorCode &status) {
312         if (!genderTable.findValue(genderVal, value)) {
313             return false;
314         }
315         caseTable = value.getTable(status);
316         if (uprv_strcmp(caseVariant, "") != 0) {
317             if (loadForCase(caseTable, caseVariant, value)) {
318                 return true;
319             }
320             if (uprv_strcmp(caseVariant, "nominative") != 0 &&
321                 loadForCase(caseTable, "nominative", value)) {
322                 return true;
323             }
324         }
325         if (loadForCase(caseTable, "_", value)) {
326             return true;
327         }
328         return false;
329     }
330 
331     // Tries to load data for the given case from `caseTable`. Returns true if
332     // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)333     bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
334         if (!caseTable.findValue(caseValue, value)) {
335             return false;
336         }
337         return true;
338     }
339 
340     const char *gender;
341     const char *caseVariant;
342     UnicodeString *outArray;
343 };
344 
345 // Fetches localised formatting patterns for the given subKey. See documentation
346 // for InflectedPluralSink for details.
347 //
348 // Data is loaded for the appropriate unit width, with missing data filled in
349 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)350 void getInflectedMeasureData(StringPiece subKey,
351                              const Locale &locale,
352                              const UNumberUnitWidth &width,
353                              const char *gender,
354                              const char *caseVariant,
355                              UnicodeString *outArray,
356                              UErrorCode &status) {
357     InflectedPluralSink sink(gender, caseVariant, outArray);
358     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
359     if (U_FAILURE(status)) { return; }
360 
361     CharString key;
362     key.append("units", status);
363     if (width == UNUM_UNIT_WIDTH_NARROW) {
364         key.append("Narrow", status);
365     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
366         key.append("Short", status);
367     }
368     key.append("/", status);
369     key.append(subKey, status);
370 
371     UErrorCode localStatus = status;
372     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
373     if (width == UNUM_UNIT_WIDTH_SHORT) {
374         status = localStatus;
375         return;
376     }
377 }
378 
379 class PluralTableSink : public ResourceSink {
380   public:
381     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
382     // checking is performed.
PluralTableSink(UnicodeString * outArray)383     explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
384         // Initialize the array to bogus strings.
385         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
386             outArray[i].setToBogus();
387         }
388     }
389 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)390     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
391         if (uprv_strcmp(key, "case") == 0) {
392             return;
393         }
394         int32_t index = getIndex(key, status);
395         if (U_FAILURE(status)) { return; }
396         if (!outArray[index].isBogus()) {
397             return;
398         }
399         outArray[index] = value.getUnicodeString(status);
400         if (U_FAILURE(status)) { return; }
401     }
402 
403   private:
404     UnicodeString *outArray;
405 };
406 
407 /**
408  * Populates outArray with `locale`-specific values for `unit` through use of
409  * PluralTableSink. Only the set of basic units are supported!
410  *
411  * Reading from resources *unitsNarrow* and *unitsShort* (for width
412  * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
413  * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
414  *
415  * @param unit must be a built-in unit, i.e. must have a type and subtype,
416  *     listed in gTypes and gSubTypes in measunit.cpp.
417  * @param unitDisplayCase the empty string and "nominative" are treated the
418  *     same. For other cases, strings for the requested case are used if found.
419  *     (For any missing case-specific data, we fall back to nominative.)
420  * @param outArray must be of fixed length ARRAY_LENGTH.
421  */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)422 void getMeasureData(const Locale &locale,
423                     const MeasureUnit &unit,
424                     const UNumberUnitWidth &width,
425                     const char *unitDisplayCase,
426                     UnicodeString *outArray,
427                     UErrorCode &status) {
428     PluralTableSink sink(outArray);
429     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
430     if (U_FAILURE(status)) { return; }
431 
432     CharString subKey;
433     subKey.append("/", status);
434     subKey.append(unit.getType(), status);
435     subKey.append("/", status);
436 
437     // Check if unitSubType is an alias or not.
438     LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status));
439 
440     UErrorCode aliasStatus = status;
441     StackUResourceBundle aliasFillIn;
442     CharString aliasKey;
443     aliasKey.append("alias/unit/", aliasStatus);
444     aliasKey.append(unit.getSubtype(), aliasStatus);
445     aliasKey.append("/replacement", aliasStatus);
446     ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(),
447                               &aliasStatus);
448     CharString unitSubType;
449     if (!U_FAILURE(aliasStatus)) {
450         // This means the subType is an alias. Then, replace unitSubType with the replacement.
451         auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status);
452         unitSubType.appendInvariantChars(replacement, status);
453     } else {
454         unitSubType.append(unit.getSubtype(), status);
455     }
456 
457     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
458     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
459     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()));
460     if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) {
461         subKey.append({unitSubType.data(), subtypeLen - 7}, status);
462     } else {
463         subKey.append({unitSubType.data(), subtypeLen}, status);
464     }
465 
466     if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
467         UErrorCode localStatus = status;
468         CharString genderKey;
469         genderKey.append("units", localStatus);
470         genderKey.append(subKey, localStatus);
471         genderKey.append("/gender", localStatus);
472         StackUResourceBundle fillIn;
473         ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
474                                   &localStatus);
475         outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
476     }
477 
478     CharString key;
479     key.append("units", status);
480     if (width == UNUM_UNIT_WIDTH_NARROW) {
481         key.append("Narrow", status);
482     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
483         key.append("Short", status);
484     }
485     key.append(subKey, status);
486 
487     // Grab desired case first, if available. Then grab no-case data to fill in
488     // the gaps.
489     if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
490         CharString caseKey;
491         caseKey.append(key, status);
492         caseKey.append("/case/", status);
493         caseKey.append(unitDisplayCase, status);
494 
495         UErrorCode localStatus = U_ZERO_ERROR;
496         // TODO(icu-units#138): our fallback logic is not spec-compliant:
497         // lateral fallback should happen before locale fallback. Switch to
498         // getInflectedMeasureData after homogenizing data format? Find a unit
499         // test case that demonstrates the incorrect fallback logic (via
500         // regional variant of an inflected language?)
501         ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
502     }
503 
504     // TODO(icu-units#138): our fallback logic is not spec-compliant: we
505     // check the given case, then go straight to the no-case data. The spec
506     // states we should first look for case="nominative". As part of #138,
507     // either get the spec changed, or add unit tests that warn us if
508     // case="nominative" data differs from no-case data?
509     UErrorCode localStatus = U_ZERO_ERROR;
510     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
511     if (width == UNUM_UNIT_WIDTH_SHORT) {
512         if (U_FAILURE(localStatus)) {
513             status = localStatus;
514         }
515         return;
516     }
517 }
518 
519 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)520 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
521                              UErrorCode &status) {
522     // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
523     // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
524     PluralTableSink sink(outArray);
525     // Here all outArray entries are bogus.
526     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
527     if (U_FAILURE(status)) { return; }
528     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
529     if (U_FAILURE(status)) { return; }
530     // Here the outArray[] entries are filled in with any CurrencyUnitPatterns data for locale,
531     // or if there is no CurrencyUnitPatterns data for locale since the patterns all inherited
532     // from the "other" pattern in root (which is true for many locales in CLDR 46), then only
533     // the "other" entry has a currency pattern. So now what we do is: For all valid plural keywords
534     // for the locale, if the corresponding outArray[] entry is bogus, fill it in from the "other"
535     // entry. In the longer run, clients of this should instead consider using CurrencyPluralInfo
536     // (see i18n/unicode/currpinf.h).
537     UErrorCode localStatus = U_ZERO_ERROR;
538     const SharedPluralRules *pr = PluralRules::createSharedInstance(
539             locale, UPLURAL_TYPE_CARDINAL, localStatus);
540     if (U_SUCCESS(localStatus)) {
541         LocalPointer<StringEnumeration> keywords((*pr)->getKeywords(localStatus), localStatus);
542         if (U_SUCCESS(localStatus)) {
543             const char* keyword;
544             while (((keyword = keywords->next(nullptr, localStatus)) != nullptr) && U_SUCCESS(localStatus)) {
545                 int32_t index = StandardPlural::indexOrOtherIndexFromString(keyword);
546                 if (index != StandardPlural::Form::OTHER && outArray[index].isBogus()) {
547                     outArray[index].setTo(outArray[StandardPlural::Form::OTHER]);
548                 }
549             }
550         }
551         pr->removeRef();
552     }
553 
554     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
555         UnicodeString &pattern = outArray[i];
556         if (pattern.isBogus()) {
557             continue;
558         }
559         int32_t longNameLen = 0;
560         const char16_t *longName = ucurr_getPluralName(
561                 currency.getISOCurrency(),
562                 locale.getName(),
563                 nullptr /* isChoiceFormat */,
564                 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
565                 &longNameLen,
566                 &status);
567         // Example pattern from data: "{0} {1}"
568         // Example output after find-and-replace: "{0} US dollars"
569         pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
570     }
571 }
572 
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)573 UnicodeString getCompoundValue(StringPiece compoundKey,
574                                const Locale &locale,
575                                const UNumberUnitWidth &width,
576                                UErrorCode &status) {
577     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
578     if (U_FAILURE(status)) { return {}; }
579     CharString key;
580     key.append("units", status);
581     if (width == UNUM_UNIT_WIDTH_NARROW) {
582         key.append("Narrow", status);
583     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
584         key.append("Short", status);
585     }
586     key.append("/compound/", status);
587     key.append(compoundKey, status);
588 
589     UErrorCode localStatus = status;
590     int32_t len = 0;
591     const char16_t *ptr =
592         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
593     if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
594         // Fall back to short, which contains more compound data
595         key.clear();
596         key.append("unitsShort/compound/", status);
597         key.append(compoundKey, status);
598         ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
599     } else {
600         status = localStatus;
601     }
602     if (U_FAILURE(status)) {
603         return {};
604     }
605     return UnicodeString(ptr, len);
606 }
607 
608 /**
609  * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
610  *
611  * Consider a deriveComponent rule that looks like this:
612  *
613  *     <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
614  *
615  * Instantiating an instance as follows:
616  *
617  *     DerivedComponents d(loc, "case", "per");
618  *
619  * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
620  * and `d.value1("foo")` will be "nominative".
621  *
622  * The values returned by value0(...) and value1(...) are valid only while the
623  * instance exists. In case of any kind of failure, value0(...) and value1(...)
624  * will return "".
625  */
626 class DerivedComponents {
627   public:
628     /**
629      * Constructor.
630      *
631      * The feature and structure parameters must be null-terminated. The string
632      * referenced by compoundValue must exist for longer than the
633      * DerivedComponents instance.
634      */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)635     DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
636         StackUResourceBundle derivationsBundle, stackBundle;
637         ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
638         ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
639                       &status);
640         ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
641                       &status);
642         if (U_FAILURE(status)) {
643             return;
644         }
645         UErrorCode localStatus = U_ZERO_ERROR;
646         // TODO(icu-units#28): use standard normal locale resolution algorithms
647         // rather than just grabbing language:
648         ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
649                       &localStatus);
650         // TODO(icu-units#28):
651         // - code currently assumes if the locale exists, the rules are there -
652         //   instead of falling back to root when the requested rule is missing.
653         // - investigate ures.h functions, see if one that uses res_findResource()
654         //   might be better (or use res_findResource directly), or maybe help
655         //   improve ures documentation to guide function selection?
656         if (localStatus == U_MISSING_RESOURCE_ERROR) {
657             ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
658         } else {
659             status = localStatus;
660         }
661         ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
662         ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
663         ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
664         UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
665         UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
666         if (U_SUCCESS(status)) {
667             if (val0.compare(UnicodeString(u"compound")) == 0) {
668                 compound0_ = true;
669             } else {
670                 compound0_ = false;
671                 value0_.appendInvariantChars(val0, status);
672             }
673             if (val1.compare(UnicodeString(u"compound")) == 0) {
674                 compound1_ = true;
675             } else {
676                 compound1_ = false;
677                 value1_.appendInvariantChars(val1, status);
678             }
679         }
680     }
681 
682     // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const683     StringPiece value0(const StringPiece compoundValue) const {
684         return compound0_ ? compoundValue : value0_.toStringPiece();
685     }
686 
687     // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const688     StringPiece value1(const StringPiece compoundValue) const {
689         return compound1_ ? compoundValue : value1_.toStringPiece();
690     }
691 
692     // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const693     const char *value0(const char *compoundValue) const {
694         return compound0_ ? compoundValue : value0_.data();
695     }
696 
697     // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const698     const char *value1(const char *compoundValue) const {
699         return compound1_ ? compoundValue : value1_.data();
700     }
701 
702   private:
703     UErrorCode status = U_ZERO_ERROR;
704 
705     // Holds strings referred to by value0 and value1;
706     bool compound0_ = false, compound1_ = false;
707     CharString value0_, value1_;
708 };
709 
710 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
711 // testsuite support for testing with synthetic data?
712 /**
713  * Loads and returns the value in rules that look like these:
714  *
715  * <deriveCompound feature="gender" structure="per" value="0"/>
716  * <deriveCompound feature="gender" structure="times" value="1"/>
717  *
718  * Currently a fake example, but spec compliant:
719  * <deriveCompound feature="gender" structure="power" value="feminine"/>
720  *
721  * NOTE: If U_FAILURE(status), returns an empty string.
722  */
723 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)724 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
725     StackUResourceBundle derivationsBundle, stackBundle;
726     ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
727     ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
728                   &status);
729     ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
730     // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
731     ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
732     // TODO:
733     // - code currently assumes if the locale exists, the rules are there -
734     //   instead of falling back to root when the requested rule is missing.
735     // - investigate ures.h functions, see if one that uses res_findResource()
736     //   might be better (or use res_findResource directly), or maybe help
737     //   improve ures documentation to guide function selection?
738     if (status == U_MISSING_RESOURCE_ERROR) {
739         status = U_ZERO_ERROR;
740         ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
741     }
742     ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
743     ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
744     UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
745     if (U_FAILURE(status)) {
746         return {};
747     }
748     U_ASSERT(!uVal.isBogus());
749     return uVal;
750 }
751 
752 // Returns the gender string for structures following these rules:
753 //
754 // <deriveCompound feature="gender" structure="per" value="0"/>
755 // <deriveCompound feature="gender" structure="times" value="1"/>
756 //
757 // Fake example:
758 // <deriveCompound feature="gender" structure="power" value="feminine"/>
759 //
760 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
761 // correspond to value="0" and value="1".
762 //
763 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
764 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)765 UnicodeString getDerivedGender(Locale locale,
766                                const char *structure,
767                                UnicodeString *data0,
768                                UnicodeString *data1,
769                                UErrorCode &status) {
770     UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
771     if (val.length() == 1) {
772         switch (val[0]) {
773         case u'0':
774             return data0[GENDER_INDEX];
775         case u'1':
776             if (data1 == nullptr) {
777                 return {};
778             }
779             return data1[GENDER_INDEX];
780         }
781     }
782     return val;
783 }
784 
785 ////////////////////////
786 /// END DATA LOADING ///
787 ////////////////////////
788 
789 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const char16_t * s,int32_t & length)790 const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) {
791     if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
792         return s;
793     }
794     int32_t start = 0;
795     int32_t limit = length;
796     while (start < limit && u_isJavaSpaceChar(s[start])) {
797         ++start;
798     }
799     if (start < limit) {
800         // There is non-white space at start; we will not move limit below that,
801         // so we need not test start<limit in the loop.
802         while (u_isJavaSpaceChar(s[limit - 1])) {
803             --limit;
804         }
805     }
806     length = limit - start;
807     return s + start;
808 }
809 
810 /**
811  * Calculates the gender of an arbitrary unit: this is the *second*
812  * implementation of an algorithm to do this:
813  *
814  * Gender is also calculated in "processPatternTimes": that code path is "bottom
815  * up", loading the gender for every component of a compound unit (at the same
816  * time as loading the Long Names formatting patterns), even if the gender is
817  * unneeded, then combining the single units' genders into the compound unit's
818  * gender, according to the rules. This algorithm does a lazier "top-down"
819  * evaluation, starting with the compound unit, calculating which single unit's
820  * gender is needed by breaking it down according to the rules, and then loading
821  * only the gender of the one single unit who's gender is needed.
822  *
823  * For future refactorings:
824  * 1. we could drop processPatternTimes' gender calculation and just call this
825  *    function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
826  *    same table as the formatting patterns, so loading it then may be
827  *    efficient. For other unit widths however, it needs to be explicitly looked
828  *    up anyway.
829  * 2. alternatively, if CLDR is providing all the genders we need such that we
830  *    don't need to calculate them in ICU anymore, we could drop this function
831  *    and keep only processPatternTimes' calculation. (And optimise it a bit?)
832  *
833  * @param locale The desired locale.
834  * @param unit The measure unit to calculate the gender for.
835  * @return The gender string for the unit, or an empty string if unknown or
836  *     ungendered.
837  */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)838 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
839     MeasureUnitImpl impl;
840     const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
841     int32_t singleUnitIndex = 0;
842     if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
843         int32_t startSlice = 0;
844         // inclusive
845         int32_t endSlice = mui.singleUnits.length()-1;
846         U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
847         if (mui.singleUnits[endSlice]->dimensionality < 0) {
848             // We have a -per- construct
849             UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
850             if (perRule.length() != 1) {
851                 // Fixed gender for -per- units
852                 return perRule;
853             }
854             if (perRule[0] == u'1') {
855                 // Find the start of the denominator. We already know there is one.
856                 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
857                     startSlice++;
858                 }
859             } else {
860                 // Find the end of the numerator
861                 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
862                     endSlice--;
863                 }
864                 if (endSlice < 0) {
865                     // We have only a denominator, e.g. "per-second".
866                     // TODO(icu-units#28): find out what gender to use in the
867                     // absence of a first value - mentioned in CLDR-14253.
868                     return {};
869                 }
870             }
871         }
872         if (endSlice > startSlice) {
873             // We have a -times- construct
874             UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
875             if (timesRule.length() != 1) {
876                 // Fixed gender for -times- units
877                 return timesRule;
878             }
879             if (timesRule[0] == u'0') {
880                 endSlice = startSlice;
881             } else {
882                 // We assume timesRule[0] == u'1'
883                 startSlice = endSlice;
884             }
885         }
886         U_ASSERT(startSlice == endSlice);
887         singleUnitIndex = startSlice;
888     } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
889         status = U_INTERNAL_PROGRAM_ERROR;
890         return {};
891     } else {
892         U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
893         U_ASSERT(mui.singleUnits.length() == 1);
894     }
895 
896     // Now we know which singleUnit's gender we want
897     const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
898     // Check for any power-prefix gender override:
899     if (std::abs(singleUnit->dimensionality) != 1) {
900         UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
901         if (powerRule.length() != 1) {
902             // Fixed gender for -powN- units
903             return powerRule;
904         }
905         // powerRule[0] == u'0'; u'1' not currently in spec.
906     }
907     // Check for any SI and binary prefix gender override:
908     if (std::abs(singleUnit->dimensionality) != 1) {
909         UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
910         if (prefixRule.length() != 1) {
911             // Fixed gender for -powN- units
912             return prefixRule;
913         }
914         // prefixRule[0] == u'0'; u'1' not currently in spec.
915     }
916     // Now we've boiled it down to the gender of one simple unit identifier:
917     return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
918                                status);
919 }
920 
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)921 void maybeCalculateGender(const Locale &locale,
922                           const MeasureUnit &unitRef,
923                           UnicodeString *outArray,
924                           UErrorCode &status) {
925     if (outArray[GENDER_INDEX].isBogus()) {
926         UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
927         if (meterGender.isEmpty()) {
928             // No gender for meter: assume ungendered language
929             return;
930         }
931         // We have a gendered language, but are lacking gender for unitRef.
932         outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
933     }
934 }
935 
936 } // namespace
937 
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)938 void LongNameHandler::forMeasureUnit(const Locale &loc,
939                                      const MeasureUnit &unitRef,
940                                      const UNumberUnitWidth &width,
941                                      const char *unitDisplayCase,
942                                      const PluralRules *rules,
943                                      const MicroPropsGenerator *parent,
944                                      LongNameHandler *fillIn,
945                                      UErrorCode &status) {
946     // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
947     // Points 1 and 2 are mostly handled by MeasureUnit:
948     //
949     // 1. If the unitId is empty or invalid, fail
950     // 2. Put the unitId into normalized order
951     U_ASSERT(fillIn != nullptr);
952 
953     if (uprv_strcmp(unitRef.getType(), "") != 0) {
954         // Handling built-in units:
955         //
956         // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
957         //    - If result is not empty, return it
958         UnicodeString simpleFormats[ARRAY_LENGTH];
959         getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
960         maybeCalculateGender(loc, unitRef, simpleFormats, status);
961         if (U_FAILURE(status)) {
962             return;
963         }
964         fillIn->rules = rules;
965         fillIn->parent = parent;
966         fillIn->simpleFormatsToModifiers(simpleFormats,
967                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
968         if (!simpleFormats[GENDER_INDEX].isBogus()) {
969             fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
970         }
971         return;
972 
973         // TODO(icu-units#145): figure out why this causes a failure in
974         // format/MeasureFormatTest/TestIndividualPluralFallback and other
975         // tests, when it should have been an alternative for the lines above:
976 
977         // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
978         // fillIn->rules = rules;
979         // fillIn->parent = parent;
980         // return;
981     } else {
982         // Check if it is a MeasureUnit this constructor handles: this
983         // constructor does not handle mixed units
984         U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
985         forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
986         fillIn->rules = rules;
987         fillIn->parent = parent;
988         return;
989     }
990 }
991 
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)992 void LongNameHandler::forArbitraryUnit(const Locale &loc,
993                                        const MeasureUnit &unitRef,
994                                        const UNumberUnitWidth &width,
995                                        const char *unitDisplayCase,
996                                        LongNameHandler *fillIn,
997                                        UErrorCode &status) {
998     if (U_FAILURE(status)) {
999         return;
1000     }
1001     if (fillIn == nullptr) {
1002         status = U_INTERNAL_PROGRAM_ERROR;
1003         return;
1004     }
1005 
1006     // Numbered list items are from the algorithms at
1007     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1008     //
1009     // 4. Divide the unitId into numerator (the part before the "-per-") and
1010     //    denominator (the part after the "-per-). If both are empty, fail
1011     MeasureUnitImpl unit;
1012     MeasureUnitImpl perUnit;
1013     {
1014         MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
1015         if (U_FAILURE(status)) {
1016             return;
1017         }
1018         for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
1019             SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
1020             if (subUnit->dimensionality > 0) {
1021                 unit.appendSingleUnit(*subUnit, status);
1022             } else {
1023                 subUnit->dimensionality *= -1;
1024                 perUnit.appendSingleUnit(*subUnit, status);
1025             }
1026         }
1027     }
1028 
1029     // TODO(icu-units#28): check placeholder logic, see if it needs to be
1030     // present here instead of only in processPatternTimes:
1031     //
1032     // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1033 
1034     DerivedComponents derivedPerCases(loc, "case", "per");
1035 
1036     // 6. numeratorUnitString
1037     UnicodeString numeratorUnitData[ARRAY_LENGTH];
1038     processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1039                         numeratorUnitData, status);
1040 
1041     // 7. denominatorUnitString
1042     UnicodeString denominatorUnitData[ARRAY_LENGTH];
1043     processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1044                         denominatorUnitData, status);
1045 
1046     // TODO(icu-units#139):
1047     // - implement DerivedComponents for "plural/times" and "plural/power":
1048     //   French has different rules, we'll be producing the wrong results
1049     //   currently. (Prove via tests!)
1050     // - implement DerivedComponents for "plural/per", "plural/prefix",
1051     //   "case/times", "case/power", and "case/prefix" - although they're
1052     //   currently hardcoded. Languages with different rules are surely on the
1053     //   way.
1054     //
1055     // Currently we only use "case/per", "plural/times", "case/times", and
1056     // "case/power".
1057     //
1058     // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1059     // These rules are currently (ICU 69) all the same and hard-coded below.
1060     UnicodeString perUnitPattern;
1061     if (!denominatorUnitData[PER_INDEX].isBogus()) {
1062         // If we have no denominator, we obtain the empty string:
1063         perUnitPattern = denominatorUnitData[PER_INDEX];
1064     } else {
1065         // 8. Set perPattern to be getValue([per], locale, length)
1066         UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1067         // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1068         SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1069         if (U_FAILURE(status)) {
1070             return;
1071         }
1072         // Plural and placeholder handling for 7. denominatorUnitString:
1073         // TODO(icu-units#139): hardcoded:
1074         // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1075         UnicodeString denominatorFormat =
1076             getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1077         // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1078         SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1079         if (U_FAILURE(status)) {
1080             return;
1081         }
1082         UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1083         int32_t trimmedLen = denominatorPattern.length();
1084         const char16_t *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1085         UnicodeString denominatorString(false, trimmed, trimmedLen);
1086         // 9. If the denominatorString is empty, set result to
1087         //    [numeratorString], otherwise set result to format(perPattern,
1088         //    numeratorString, denominatorString)
1089         //
1090         // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1091         // following line?
1092         perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1093         if (U_FAILURE(status)) {
1094             return;
1095         }
1096     }
1097     if (perUnitPattern.length() == 0) {
1098         fillIn->simpleFormatsToModifiers(numeratorUnitData,
1099                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1100     } else {
1101         fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1102                                               {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1103     }
1104 
1105     // Gender
1106     //
1107     // TODO(icu-units#28): find out what gender to use in the absence of a first
1108     // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1109     //
1110     // gender/per deriveCompound rules don't say:
1111     // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ←  gender(gram) -->
1112     fillIn->gender = getGenderString(
1113         getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1114 }
1115 
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1116 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1117                                           Locale loc,
1118                                           const UNumberUnitWidth &width,
1119                                           const char *caseVariant,
1120                                           UnicodeString *outArray,
1121                                           UErrorCode &status) {
1122     if (U_FAILURE(status)) {
1123         return;
1124     }
1125     if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1126         // These are handled by MixedUnitLongNameHandler
1127         status = U_UNSUPPORTED_ERROR;
1128         return;
1129     }
1130 
1131 #if U_DEBUG
1132     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1133         U_ASSERT(outArray[pluralIndex].length() == 0);
1134         U_ASSERT(!outArray[pluralIndex].isBogus());
1135     }
1136 #endif
1137 
1138     if (productUnit.identifier.isEmpty()) {
1139         // TODO(icu-units#28): consider when serialize should be called.
1140         // identifier might also be empty for MeasureUnit().
1141         productUnit.serialize(status);
1142     }
1143     if (U_FAILURE(status)) {
1144         return;
1145     }
1146     if (productUnit.identifier.length() == 0) {
1147         // MeasureUnit(): no units: return empty strings.
1148         return;
1149     }
1150 
1151     MeasureUnit builtinUnit;
1152     if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1153         // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1154         // breaks them all down. Do we want to drop this?
1155         // - findBySubType isn't super efficient, if we skip it and go to basic
1156         //   singles, we don't have to construct MeasureUnit's anymore.
1157         // - Check all the existing unit tests that fail without this: is it due
1158         //   to incorrect fallback via getMeasureData?
1159         // - Do those unit tests cover this code path representatively?
1160         if (builtinUnit != MeasureUnit()) {
1161             getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1162             maybeCalculateGender(loc, builtinUnit, outArray, status);
1163         }
1164         return;
1165     }
1166 
1167     // 2. Set timesPattern to be getValue(times, locale, length)
1168     UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1169     SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1170     if (U_FAILURE(status)) {
1171         return;
1172     }
1173 
1174     PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1175     char16_t globalJoinerChar = 0;
1176     // Numbered list items are from the algorithms at
1177     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1178     //
1179     // pattern(...) point 5:
1180     // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1181     //
1182     // 3. Set result to be empty
1183     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1184         // Initial state: empty string pattern, via all falling back to OTHER:
1185         if (pluralIndex == StandardPlural::Form::OTHER) {
1186             outArray[pluralIndex].remove();
1187         } else {
1188             outArray[pluralIndex].setToBogus();
1189         }
1190         globalPlaceholder[pluralIndex] = PH_EMPTY;
1191     }
1192 
1193     // Empty string represents "compound" (propagate the plural form).
1194     const char *pluralCategory = "";
1195     DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1196     DerivedComponents derivedTimesCases(loc, "case", "times");
1197     DerivedComponents derivedPowerCases(loc, "case", "power");
1198 
1199     // 4. For each single_unit in product_unit
1200     for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1201          singleUnitIndex++) {
1202         SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1203         const char *singlePluralCategory;
1204         const char *singleCaseVariant;
1205         // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1206         // assign incorrect case variants here:
1207         if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1208             // 4.1. If hasMultiple
1209             singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1210             singleCaseVariant = derivedTimesCases.value0(caseVariant);
1211             pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1212             caseVariant = derivedTimesCases.value1(caseVariant);
1213         } else {
1214             singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1215             singleCaseVariant = derivedTimesCases.value1(caseVariant);
1216         }
1217 
1218         // 4.2. Get the gender of that single_unit
1219         MeasureUnit simpleUnit;
1220         if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1221             // Ideally all simple units should be known, but they're not:
1222             // 100-kilometer is internally treated as a simple unit, but it is
1223             // not a built-in unit and does not have formatting data in CLDR 39.
1224             //
1225             // TODO(icu-units#28): test (desirable) invariants in unit tests.
1226             status = U_UNSUPPORTED_ERROR;
1227             return;
1228         }
1229         const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1230 
1231         // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1232         U_ASSERT(singleUnit->dimensionality > 0);
1233         int32_t dimensionality = singleUnit->dimensionality;
1234         UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1235         if (dimensionality != 1) {
1236             // 4.3.1. set dimensionalityPrefixPattern to be
1237             //   getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1238             //   such as "{0} kwadratowym"
1239             CharString dimensionalityKey("compound/power", status);
1240             dimensionalityKey.appendNumber(dimensionality, status);
1241             getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1242                                     singleCaseVariant, dimensionalityPrefixPatterns, status);
1243             if (U_FAILURE(status)) {
1244                 // At the time of writing, only pow2 and pow3 are supported.
1245                 // Attempting to format other powers results in a
1246                 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1247                 // understand it:
1248                 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1249                     status = U_UNSUPPORTED_ERROR;
1250                 }
1251                 return;
1252             }
1253 
1254             // TODO(icu-units#139):
1255             // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1256 
1257             // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1258             singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1259             // 4.3.4. remove the dimensionality_prefix from singleUnit
1260             singleUnit->dimensionality = 1;
1261         }
1262 
1263         // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1264         UMeasurePrefix prefix = singleUnit->unitPrefix;
1265         UnicodeString prefixPattern;
1266         if (prefix != UMEASURE_PREFIX_ONE) {
1267             // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1268             //        length), such as "centy{0}"
1269             CharString prefixKey;
1270             // prefixKey looks like "1024p3" or "10p-2":
1271             prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1272             prefixKey.append('p', status);
1273             prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1274             // Contains a pattern like "centy{0}".
1275             prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1276 
1277             // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1278             //
1279             // TODO(icu-units#139): that refers to these rules:
1280             // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1281             // though I'm not sure what other value they might end up having.
1282             //
1283             // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1284             //
1285             // TODO(icu-units#139): that refers to:
1286             // <deriveComponent feature="case" structure="prefix" value0="nominative"
1287             // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1288             // propagates.
1289 
1290             // 4.4.4. remove the si_prefix from singleUnit
1291             singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1292         }
1293 
1294         // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1295         //      singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1296         UnicodeString singleUnitArray[ARRAY_LENGTH];
1297         // At this point we are left with a Simple Unit:
1298         U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1299                  0);
1300         getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1301                        status);
1302         if (U_FAILURE(status)) {
1303             // Shouldn't happen if we have data for all single units
1304             return;
1305         }
1306 
1307         // Calculate output gender
1308         if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1309             U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1310             UnicodeString uVal;
1311 
1312             if (prefix != UMEASURE_PREFIX_ONE) {
1313                 singleUnitArray[GENDER_INDEX] =
1314                     getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1315             }
1316 
1317             if (dimensionality != 1) {
1318                 singleUnitArray[GENDER_INDEX] =
1319                     getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1320             }
1321 
1322             UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1323             if (timesGenderRule.length() == 1) {
1324                 switch (timesGenderRule[0]) {
1325                 case u'0':
1326                     if (singleUnitIndex == 0) {
1327                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1328                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1329                     }
1330                     break;
1331                 case u'1':
1332                     if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1333                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1334                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1335                     }
1336                 }
1337             } else {
1338                 if (outArray[GENDER_INDEX].isBogus()) {
1339                     outArray[GENDER_INDEX] = timesGenderRule;
1340                 }
1341             }
1342         }
1343 
1344         // Calculate resulting patterns for each plural form
1345         for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1346             StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1347 
1348             // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1349             if (outArray[pluralIndex].isBogus()) {
1350                 if (singleUnitArray[pluralIndex].isBogus()) {
1351                     // Let the usual plural fallback mechanism take care of this
1352                     // plural form
1353                     continue;
1354                 } else {
1355                     // Since our singleUnit can have a plural form that outArray
1356                     // doesn't yet have (relying on fallback to OTHER), we start
1357                     // by grabbing it with the normal plural fallback mechanism
1358                     outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1359                     if (U_FAILURE(status)) {
1360                         return;
1361                     }
1362                 }
1363             }
1364 
1365             if (uprv_strcmp(singlePluralCategory, "") != 0) {
1366                 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1367             }
1368 
1369             // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1370             UnicodeString coreUnit;
1371             PlaceholderPosition placeholderPosition;
1372             char16_t joinerChar;
1373             extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1374                                placeholderPosition, joinerChar);
1375 
1376             // 4.7 If the position is middle, then fail
1377             if (placeholderPosition == PH_MIDDLE) {
1378                 status = U_UNSUPPORTED_ERROR;
1379                 return;
1380             }
1381 
1382             // 4.8. If globalPlaceholder is empty
1383             if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1384                 globalPlaceholder[pluralIndex] = placeholderPosition;
1385                 globalJoinerChar = joinerChar;
1386             } else {
1387                 // Expect all units involved to have the same placeholder position
1388                 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1389                 // TODO(icu-units#28): Do we want to add a unit test that checks
1390                 // for consistent joiner chars? Probably not, given how
1391                 // inconsistent they are. File a CLDR ticket with examples?
1392             }
1393             // Now coreUnit would be just "Meter"
1394 
1395             // 4.9. If siPrefixPattern is not empty
1396             if (prefix != UMEASURE_PREFIX_ONE) {
1397                 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1398                 if (U_FAILURE(status)) {
1399                     return;
1400                 }
1401 
1402                 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1403                 //        coreUnit)
1404                 UnicodeString tmp;
1405                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1406                 //
1407                 // TODO(icu-units#28): run this only if prefixPattern does not
1408                 // contain space characters - do languages "as", "bn", "hi",
1409                 // "kk", etc have concepts of upper and lower case?:
1410                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1411                     coreUnit.toLower(loc);
1412                 }
1413                 prefixCompiled.format(coreUnit, tmp, status);
1414                 if (U_FAILURE(status)) {
1415                     return;
1416                 }
1417                 coreUnit = tmp;
1418             }
1419 
1420             // 4.10. If dimensionalityPrefixPattern is not empty
1421             if (dimensionality != 1) {
1422                 SimpleFormatter dimensionalityCompiled(
1423                     getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1424                 if (U_FAILURE(status)) {
1425                     return;
1426                 }
1427 
1428                 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1429                 //         dimensionalityPrefixPattern, coreUnit)
1430                 UnicodeString tmp;
1431                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1432                 //
1433                 // TODO(icu-units#28): run this only if prefixPattern does not
1434                 // contain space characters - do languages "as", "bn", "hi",
1435                 // "kk", etc have concepts of upper and lower case?:
1436                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1437                     coreUnit.toLower(loc);
1438                 }
1439                 dimensionalityCompiled.format(coreUnit, tmp, status);
1440                 if (U_FAILURE(status)) {
1441                     return;
1442                 }
1443                 coreUnit = tmp;
1444             }
1445 
1446             if (outArray[pluralIndex].length() == 0) {
1447                 // 4.11. If the result is empty, set result to be coreUnit
1448                 outArray[pluralIndex] = coreUnit;
1449             } else {
1450                 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1451                 UnicodeString tmp;
1452                 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1453                 outArray[pluralIndex] = tmp;
1454             }
1455         }
1456     }
1457     for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1458         if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1459             UnicodeString tmp;
1460             tmp.append(u"{0}", 3);
1461             if (globalJoinerChar != 0) {
1462                 tmp.append(globalJoinerChar);
1463             }
1464             tmp.append(outArray[pluralIndex]);
1465             outArray[pluralIndex] = tmp;
1466         } else if (globalPlaceholder[pluralIndex] == PH_END) {
1467             if (globalJoinerChar != 0) {
1468                 outArray[pluralIndex].append(globalJoinerChar);
1469             }
1470             outArray[pluralIndex].append(u"{0}", 3);
1471         }
1472     }
1473 }
1474 
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1475 UnicodeString LongNameHandler::getUnitDisplayName(
1476         const Locale& loc,
1477         const MeasureUnit& unit,
1478         UNumberUnitWidth width,
1479         UErrorCode& status) {
1480     if (U_FAILURE(status)) {
1481         return ICU_Utility::makeBogusString();
1482     }
1483     UnicodeString simpleFormats[ARRAY_LENGTH];
1484     getMeasureData(loc, unit, width, "", simpleFormats, status);
1485     return simpleFormats[DNAM_INDEX];
1486 }
1487 
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1488 UnicodeString LongNameHandler::getUnitPattern(
1489         const Locale& loc,
1490         const MeasureUnit& unit,
1491         UNumberUnitWidth width,
1492         StandardPlural::Form pluralForm,
1493         UErrorCode& status) {
1494     if (U_FAILURE(status)) {
1495         return ICU_Utility::makeBogusString();
1496     }
1497     UnicodeString simpleFormats[ARRAY_LENGTH];
1498     getMeasureData(loc, unit, width, "", simpleFormats, status);
1499     // The above already handles fallback from other widths to short
1500     if (U_FAILURE(status)) {
1501         return ICU_Utility::makeBogusString();
1502     }
1503     // Now handle fallback from other plural forms to OTHER
1504     return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1505             simpleFormats[StandardPlural::Form::OTHER];
1506 }
1507 
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1508 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
1509                                                       const PluralRules *rules,
1510                                                       const MicroPropsGenerator *parent,
1511                                                       UErrorCode &status) {
1512     LocalPointer<LongNameHandler> result(new LongNameHandler(rules, parent), status);
1513     if (U_FAILURE(status)) {
1514         return nullptr;
1515     }
1516     UnicodeString simpleFormats[ARRAY_LENGTH];
1517     getCurrencyLongNameData(loc, currency, simpleFormats, status);
1518     if (U_FAILURE(status)) { return nullptr; }
1519     result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1520     // TODO(icu-units#28): currency gender?
1521     return result.orphan();
1522 }
1523 
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1524 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1525                                                UErrorCode &status) {
1526     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1527         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1528         UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1529         if (U_FAILURE(status)) { return; }
1530         SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1531         if (U_FAILURE(status)) { return; }
1532         fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1533     }
1534 }
1535 
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1536 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1537                                                     Field field, UErrorCode &status) {
1538     SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1539     if (U_FAILURE(status)) { return; }
1540     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1541         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1542         UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1543         if (U_FAILURE(status)) { return; }
1544         UnicodeString compoundFormat;
1545         if (leadFormat.length() == 0) {
1546             compoundFormat = trailFormat;
1547         } else {
1548             trailCompiled.format(leadFormat, compoundFormat, status);
1549             if (U_FAILURE(status)) { return; }
1550         }
1551         SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1552         if (U_FAILURE(status)) { return; }
1553         fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1554     }
1555 }
1556 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1557 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1558                                       UErrorCode &status) const {
1559     if (parent != nullptr) {
1560         parent->processQuantity(quantity, micros, status);
1561     }
1562     StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1563     micros.modOuter = &fModifiers[pluralForm];
1564     micros.gender = gender;
1565 }
1566 
getModifier(Signum,StandardPlural::Form plural) const1567 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1568     return &fModifiers[plural];
1569 }
1570 
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1571 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1572                                               const MeasureUnit &mixedUnit,
1573                                               const UNumberUnitWidth &width,
1574                                               const char *unitDisplayCase,
1575                                               const PluralRules *rules,
1576                                               const MicroPropsGenerator *parent,
1577                                               MixedUnitLongNameHandler *fillIn,
1578                                               UErrorCode &status) {
1579     U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1580     U_ASSERT(fillIn != nullptr);
1581     if (U_FAILURE(status)) {
1582         return;
1583     }
1584 
1585     MeasureUnitImpl temp;
1586     const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1587     // Defensive, for production code:
1588     if (impl.complexity != UMEASURE_UNIT_MIXED) {
1589         // Should be using the normal LongNameHandler
1590         status = U_UNSUPPORTED_ERROR;
1591         return;
1592     }
1593 
1594     fillIn->fMixedUnitCount = impl.singleUnits.length();
1595     fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1596     for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1597         // Grab data for each of the components.
1598         UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1599         // TODO(CLDR-14582): check from the CLDR-14582 ticket whether this
1600         // propagation of unitDisplayCase is correct:
1601         getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1602                        status);
1603         // TODO(ICU-21494): if we add support for gender for mixed units, we may
1604         // need maybeCalculateGender() here.
1605     }
1606 
1607     // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1608     // high-magnitude fields
1609     // * for mixed units count N, produce N listFormatters, one for each subset
1610     //   that might be formatted.
1611     UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1612     if (width == UNUM_UNIT_WIDTH_NARROW) {
1613         listWidth = ULISTFMT_WIDTH_NARROW;
1614     } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1615         // This might be the same as SHORT in most languages:
1616         listWidth = ULISTFMT_WIDTH_WIDE;
1617     }
1618     fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1619         ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1620     // TODO(ICU-21494): grab gender of each unit, calculate the gender
1621     // associated with this list formatter, save it for later.
1622     fillIn->rules = rules;
1623     fillIn->parent = parent;
1624 
1625     // We need a localised NumberFormatter for the numbers of the bigger units
1626     // (providing Arabic numerals, for example).
1627     fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1628 }
1629 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1630 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1631                                                UErrorCode &status) const {
1632     U_ASSERT(fMixedUnitCount > 1);
1633     if (parent != nullptr) {
1634         parent->processQuantity(quantity, micros, status);
1635     }
1636     micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1637 }
1638 
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1639 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1640                                                                MicroProps &micros,
1641                                                                UErrorCode &status) const {
1642     if (micros.mixedMeasuresCount == 0) {
1643         U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1644         status = U_UNSUPPORTED_ERROR;
1645         return &micros.helpers.emptyWeakModifier;
1646     }
1647 
1648     // Algorithm:
1649     //
1650     // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1651     // find "3 yard" and "1 foot" in micros.mixedMeasures.
1652     //
1653     // Obtain long-names with plural forms corresponding to measure values:
1654     //   * {0} yards, {0} foot, {0} inches
1655     //
1656     // Format the integer values appropriately and modify with the format
1657     // strings:
1658     //   - 3 yards, 1 foot
1659     //
1660     // Use ListFormatter to combine, with one placeholder:
1661     //   - 3 yards, 1 foot and {0} inches
1662     //
1663     // Return a SimpleModifier for this pattern, letting the rest of the
1664     // pipeline take care of the remaining inches.
1665 
1666     LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1667     if (U_FAILURE(status)) {
1668         return &micros.helpers.emptyWeakModifier;
1669     }
1670 
1671     StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1672     for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1673         DecimalQuantity fdec;
1674 
1675         // If numbers are negative, only the first number needs to have its
1676         // negative sign formatted.
1677         int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1678 
1679         if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1680             // If quantity is not the first value and quantity is negative
1681             if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1682                 quantity.negate();
1683             }
1684 
1685             StandardPlural::Form quantityPlural =
1686                 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1687             UnicodeString quantityFormatWithPlural =
1688                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1689             SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1690             quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1691         } else {
1692             fdec.setToLong(number);
1693             StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1694             UnicodeString simpleFormat =
1695                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1696             SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1697             UnicodeString num;
1698             auto appendable = UnicodeStringAppendable(num);
1699 
1700             fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1701             compiledFormatter.format(num, outputMeasuresList[i], status);
1702         }
1703     }
1704 
1705     // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1706     // can set micros.gender to the gender associated with the list formatter in
1707     // use below (once we have correct support for that). And then document this
1708     // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1709     // something like this.
1710 
1711     // Combine list into a "premixed" pattern
1712     UnicodeString premixedFormatPattern;
1713     fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1714                            status);
1715     SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1716     if (U_FAILURE(status)) {
1717         return &micros.helpers.emptyWeakModifier;
1718     }
1719 
1720     micros.helpers.mixedUnitModifier =
1721         SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1722     return &micros.helpers.mixedUnitModifier;
1723 }
1724 
getModifier(Signum,StandardPlural::Form) const1725 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1726                                                       StandardPlural::Form /*plural*/) const {
1727     // TODO(icu-units#28): investigate this method when investigating where
1728     // ModifierStore::getModifier() gets used. To be sure it remains
1729     // unreachable:
1730     UPRV_UNREACHABLE_EXIT;
1731     return nullptr;
1732 }
1733 
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1734 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1735                                                           const MaybeStackVector<MeasureUnit> &units,
1736                                                           const UNumberUnitWidth &width,
1737                                                           const char *unitDisplayCase,
1738                                                           const PluralRules *rules,
1739                                                           const MicroPropsGenerator *parent,
1740                                                           UErrorCode &status) {
1741     LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1742     if (U_FAILURE(status)) {
1743         return nullptr;
1744     }
1745     U_ASSERT(units.length() > 0);
1746     if (result->fHandlers.resize(units.length()) == nullptr) {
1747         status = U_MEMORY_ALLOCATION_ERROR;
1748         return nullptr;
1749     }
1750     result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1751     for (int32_t i = 0, length = units.length(); i < length; i++) {
1752         const MeasureUnit &unit = *units[i];
1753         result->fMeasureUnits[i] = unit;
1754         if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1755             MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1756             MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr,
1757                                                      mlnh, status);
1758             result->fHandlers[i] = mlnh;
1759         } else {
1760             LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1761             LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, lnh, status);
1762             result->fHandlers[i] = lnh;
1763         }
1764         if (U_FAILURE(status)) {
1765             return nullptr;
1766         }
1767     }
1768     return result.orphan();
1769 }
1770 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1771 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1772                                           UErrorCode &status) const {
1773     // We call parent->processQuantity() from the Multiplexer, instead of
1774     // letting LongNameHandler handle it: we don't know which LongNameHandler to
1775     // call until we've called the parent!
1776     fParent->processQuantity(quantity, micros, status);
1777 
1778     // Call the correct LongNameHandler based on outputUnit
1779     for (int i = 0; i < fHandlers.getCapacity(); i++) {
1780         if (fMeasureUnits[i] == micros.outputUnit) {
1781             fHandlers[i]->processQuantity(quantity, micros, status);
1782             return;
1783         }
1784     }
1785     if (U_FAILURE(status)) {
1786         return;
1787     }
1788     // We shouldn't receive any outputUnit for which we haven't already got a
1789     // LongNameHandler:
1790     status = U_INTERNAL_PROGRAM_ERROR;
1791 }
1792 
1793 #endif /* #if !UCONFIG_NO_FORMATTING */
1794