• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14 
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "measunit_impl.h"
19 #include "resource.h"
20 #include "uarrsort.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "unicode/bytestrie.h"
25 #include "unicode/bytestriebuilder.h"
26 #include "unicode/localpointer.h"
27 #include "unicode/stringpiece.h"
28 #include "unicode/stringtriebuilder.h"
29 #include "unicode/ures.h"
30 #include "unicode/ustringtrie.h"
31 #include "uresimp.h"
32 #include "util.h"
33 #include <cstdlib>
34 
35 U_NAMESPACE_BEGIN
36 
37 
38 namespace {
39 
40 // TODO: Propose a new error code for this?
41 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42 
43 // Trie value offset for SI or binary prefixes. This is big enough to ensure we only
44 // insert positive integers into the trie.
45 constexpr int32_t kPrefixOffset = 64;
46 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,
47               "kPrefixOffset is too small for minimum UMeasurePrefix value");
48 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,
49               "kPrefixOffset is too small for minimum UMeasurePrefix value");
50 
51 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
52 constexpr int32_t kCompoundPartOffset = 128;
53 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,
54               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
55 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,
56               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
57 
58 enum CompoundPart {
59     // Represents "-per-"
60     COMPOUND_PART_PER = kCompoundPartOffset,
61     // Represents "-"
62     COMPOUND_PART_TIMES,
63     // Represents "-and-"
64     COMPOUND_PART_AND,
65 };
66 
67 // Trie value offset for "per-".
68 constexpr int32_t kInitialCompoundPartOffset = 192;
69 
70 enum InitialCompoundPart {
71     // Represents "per-", the only compound part that can appear at the start of
72     // an identifier.
73     INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
74 };
75 
76 // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
77 constexpr int32_t kPowerPartOffset = 256;
78 
79 enum PowerPart {
80     POWER_PART_P2 = kPowerPartOffset + 2,
81     POWER_PART_P3,
82     POWER_PART_P4,
83     POWER_PART_P5,
84     POWER_PART_P6,
85     POWER_PART_P7,
86     POWER_PART_P8,
87     POWER_PART_P9,
88     POWER_PART_P10,
89     POWER_PART_P11,
90     POWER_PART_P12,
91     POWER_PART_P13,
92     POWER_PART_P14,
93     POWER_PART_P15,
94 };
95 
96 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
97 // "fluid-ounce-imperial".
98 constexpr int32_t kSimpleUnitOffset = 512;
99 
100 const struct UnitPrefixStrings {
101     const char* const string;
102     UMeasurePrefix value;
103 } gUnitPrefixStrings[] = {
104     // SI prefixes
105     { "quetta", UMEASURE_PREFIX_QUETTA },
106     { "ronna", UMEASURE_PREFIX_RONNA },
107     { "yotta", UMEASURE_PREFIX_YOTTA },
108     { "zetta", UMEASURE_PREFIX_ZETTA },
109     { "exa", UMEASURE_PREFIX_EXA },
110     { "peta", UMEASURE_PREFIX_PETA },
111     { "tera", UMEASURE_PREFIX_TERA },
112     { "giga", UMEASURE_PREFIX_GIGA },
113     { "mega", UMEASURE_PREFIX_MEGA },
114     { "kilo", UMEASURE_PREFIX_KILO },
115     { "hecto", UMEASURE_PREFIX_HECTO },
116     { "deka", UMEASURE_PREFIX_DEKA },
117     { "deci", UMEASURE_PREFIX_DECI },
118     { "centi", UMEASURE_PREFIX_CENTI },
119     { "milli", UMEASURE_PREFIX_MILLI },
120     { "micro", UMEASURE_PREFIX_MICRO },
121     { "nano", UMEASURE_PREFIX_NANO },
122     { "pico", UMEASURE_PREFIX_PICO },
123     { "femto", UMEASURE_PREFIX_FEMTO },
124     { "atto", UMEASURE_PREFIX_ATTO },
125     { "zepto", UMEASURE_PREFIX_ZEPTO },
126     { "yocto", UMEASURE_PREFIX_YOCTO },
127     { "ronto", UMEASURE_PREFIX_RONTO },
128     { "quecto", UMEASURE_PREFIX_QUECTO },
129     // Binary prefixes
130     { "yobi", UMEASURE_PREFIX_YOBI },
131     { "zebi", UMEASURE_PREFIX_ZEBI },
132     { "exbi", UMEASURE_PREFIX_EXBI },
133     { "pebi", UMEASURE_PREFIX_PEBI },
134     { "tebi", UMEASURE_PREFIX_TEBI },
135     { "gibi", UMEASURE_PREFIX_GIBI },
136     { "mebi", UMEASURE_PREFIX_MEBI },
137     { "kibi", UMEASURE_PREFIX_KIBI },
138 };
139 
140 /**
141  * A ResourceSink that collects simple unit identifiers from the keys of the
142  * convertUnits table into an array, and adds these values to a TrieBuilder,
143  * with associated values being their index into this array plus a specified
144  * offset.
145  *
146  * Example code:
147  *
148  *     UErrorCode status = U_ZERO_ERROR;
149  *     BytesTrieBuilder b(status);
150  *     int32_t ARR_SIZE = 200;
151  *     const char *unitIdentifiers[ARR_SIZE];
152  *     int32_t *unitCategories[ARR_SIZE];
153  *     SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
154  *                                              unitCategories, ARR_SIZE, b, kTrieValueOffset);
155  *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
156  *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
157  */
158 class SimpleUnitIdentifiersSink : public icu::ResourceSink {
159   public:
160     /**
161      * Constructor.
162      * @param quantitiesTrieData The data for constructing a quantitiesTrie,
163      *     which maps from a simple unit identifier to an index into the
164      *     gCategories array.
165      * @param out Array of char* to which pointers to the simple unit
166      *     identifiers will be saved. (Does not take ownership.)
167      * @param outCategories Array of int32_t to which category indexes will be
168      *     saved: this corresponds to simple unit IDs saved to `out`, mapping
169      *     from the ID to the value produced by the quantitiesTrie (which is an
170      *     index into the gCategories array).
171      * @param outSize The size of `out` and `outCategories`.
172      * @param trieBuilder The trie builder to which the simple unit identifier
173      *     should be added. The trie builder must outlive this resource sink.
174      * @param trieValueOffset This is added to the index of the identifier in
175      *     the `out` array, before adding to `trieBuilder` as the value
176      *     associated with the identifier.
177      */
SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData,const char ** out,int32_t * outCategories,int32_t outSize,BytesTrieBuilder & trieBuilder,int32_t trieValueOffset)178     explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
179                                        int32_t *outCategories, int32_t outSize,
180                                        BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
181         : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
182           trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
183 
184     /**
185      * Adds the table keys found in value to the output vector.
186      * @param key The key of the resource passed to `value`: the second
187      *     parameter of the ures_getAllItemsWithFallback() call.
188      * @param value Should be a ResourceTable value, if
189      *     ures_getAllItemsWithFallback() was called correctly for this sink.
190      * @param noFallback Ignored.
191      * @param status The standard ICU error code output parameter.
192      */
put(const char *,ResourceValue & value,UBool,UErrorCode & status)193     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
194         ResourceTable table = value.getTable(status);
195         if (U_FAILURE(status)) return;
196 
197         if (outIndex + table.getSize() > outSize) {
198             status = U_INDEX_OUTOFBOUNDS_ERROR;
199             return;
200         }
201 
202         BytesTrie quantitiesTrie(quantitiesTrieData.data());
203 
204         // Collect keys from the table resource.
205         const char *simpleUnitID;
206         for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
207             U_ASSERT(i < table.getSize());
208             U_ASSERT(outIndex < outSize);
209             if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
210                 // For parsing, we use "gram", the prefixless metric mass unit. We
211                 // thus ignore the SI Base Unit of Mass: it exists due to being the
212                 // mass conversion target unit, but not needed for MeasureUnit
213                 // parsing.
214                 continue;
215             }
216             outArray[outIndex] = simpleUnitID;
217             trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
218 
219             // Find the base target unit for this simple unit
220             ResourceTable table = value.getTable(status);
221             if (U_FAILURE(status)) { return; }
222             if (!table.findValue("target", value)) {
223                 status = U_INVALID_FORMAT_ERROR;
224                 break;
225             }
226             int32_t len;
227             const char16_t* uTarget = value.getString(len, status);
228             CharString target;
229             target.appendInvariantChars(uTarget, len, status);
230             if (U_FAILURE(status)) { return; }
231             quantitiesTrie.reset();
232             UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
233             if (!USTRINGTRIE_HAS_VALUE(result)) {
234                 status = U_INVALID_FORMAT_ERROR;
235                 break;
236             }
237             outCategories[outIndex] = quantitiesTrie.getValue();
238 
239             outIndex++;
240         }
241     }
242 
243   private:
244     const char **outArray;
245     int32_t *outCategories;
246     int32_t outSize;
247     BytesTrieBuilder &trieBuilder;
248     int32_t trieValueOffset;
249 
250     StringPiece quantitiesTrieData;
251 
252     int32_t outIndex;
253 };
254 
255 /**
256  * A ResourceSink that collects information from `unitQuantities` in the `units`
257  * resource to provide key->value lookups from base unit to category, as well as
258  * preserving ordering information for these categories. See `units.txt`.
259  *
260  * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
261  *
262  * In C++ unitQuantity values are collected in order into a char16_t* array, while
263  * unitQuantity keys are added added to a TrieBuilder, with associated values
264  * being the index into the aforementioned char16_t* array.
265  */
266 class CategoriesSink : public icu::ResourceSink {
267   public:
268     /**
269      * Constructor.
270      * @param out Array of char16_t* to which unitQuantity values will be saved.
271      *     The pointers returned  not owned: they point directly at the resource
272      *     strings in static memory.
273      * @param outSize The size of the `out` array.
274      * @param trieBuilder The trie builder to which the keys (base units) of
275      *     each unitQuantity will be added, each with value being the offset
276      *     into `out`.
277      */
CategoriesSink(const char16_t ** out,int32_t & outSize,BytesTrieBuilder & trieBuilder)278     explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
279         : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
280 
put(const char *,ResourceValue & value,UBool,UErrorCode & status)281     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
282         ResourceArray array = value.getArray(status);
283         if (U_FAILURE(status)) {
284             return;
285         }
286 
287         if (outIndex + array.getSize() > outSize) {
288             status = U_INDEX_OUTOFBOUNDS_ERROR;
289             return;
290         }
291 
292         for (int32_t i = 0; array.getValue(i, value); ++i) {
293             U_ASSERT(outIndex < outSize);
294             ResourceTable table = value.getTable(status);
295             if (U_FAILURE(status)) {
296                 return;
297             }
298             if (table.getSize() != 1) {
299                 status = U_INVALID_FORMAT_ERROR;
300                 return;
301             }
302             const char *key;
303             table.getKeyAndValue(0, key, value);
304             int32_t uTmpLen;
305             outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
306             trieBuilder.add(key, outIndex, status);
307             outIndex++;
308         }
309     }
310 
311   private:
312     const char16_t **outQuantitiesArray;
313     int32_t &outSize;
314     BytesTrieBuilder &trieBuilder;
315 
316     int32_t outIndex;
317 };
318 
319 icu::UInitOnce gUnitExtrasInitOnce {};
320 
321 // Array of simple unit IDs.
322 //
323 // The array memory itself is owned by this pointer, but the individual char* in
324 // that array point at static memory. (Note that these char* are also returned
325 // by SingleUnitImpl::getSimpleUnitID().)
326 const char **gSimpleUnits = nullptr;
327 
328 // Maps from the value associated with each simple unit ID to an index into the
329 // gCategories array.
330 int32_t *gSimpleUnitCategories = nullptr;
331 
332 char *gSerializedUnitExtrasStemTrie = nullptr;
333 
334 // Array of char16_t* pointing at the unit categories (aka "quantities", aka
335 // "types"), as found in the `unitQuantities` resource. The array memory itself
336 // is owned by this pointer, but the individual char16_t* in that array point at
337 // static memory.
338 const char16_t **gCategories = nullptr;
339 // Number of items in `gCategories`.
340 int32_t gCategoriesCount = 0;
341 // Serialized BytesTrie for mapping from base units to indices into gCategories.
342 char *gSerializedUnitCategoriesTrie = nullptr;
343 
cleanupUnitExtras()344 UBool U_CALLCONV cleanupUnitExtras() {
345     uprv_free(gSerializedUnitCategoriesTrie);
346     gSerializedUnitCategoriesTrie = nullptr;
347     uprv_free(gCategories);
348     gCategories = nullptr;
349     uprv_free(gSerializedUnitExtrasStemTrie);
350     gSerializedUnitExtrasStemTrie = nullptr;
351     uprv_free(gSimpleUnitCategories);
352     gSimpleUnitCategories = nullptr;
353     uprv_free(gSimpleUnits);
354     gSimpleUnits = nullptr;
355     gUnitExtrasInitOnce.reset();
356     return true;
357 }
358 
initUnitExtras(UErrorCode & status)359 void U_CALLCONV initUnitExtras(UErrorCode& status) {
360     ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
361     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
362 
363     // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
364     const char *CATEGORY_TABLE_NAME = "unitQuantities";
365     LocalUResourceBundlePointer unitQuantities(
366         ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
367     if (U_FAILURE(status)) { return; }
368     gCategoriesCount = unitQuantities.getAlias()->fSize;
369     size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount;
370     gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize));
371     if (gCategories == nullptr) {
372         status = U_MEMORY_ALLOCATION_ERROR;
373         return;
374     }
375     uprv_memset(gCategories, 0, quantitiesMallocSize);
376     BytesTrieBuilder quantitiesBuilder(status);
377     CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
378     ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
379     StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
380     if (U_FAILURE(status)) { return; }
381     // Copy the result into the global constant pointer
382     size_t numBytesQuantities = resultQuantities.length();
383     gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
384     if (gSerializedUnitCategoriesTrie == nullptr) {
385         status = U_MEMORY_ALLOCATION_ERROR;
386         return;
387     }
388     uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
389 
390     // Build the BytesTrie that Parser needs for parsing unit identifiers.
391 
392     BytesTrieBuilder b(status);
393     if (U_FAILURE(status)) { return; }
394 
395     // Add SI and binary prefixes
396     for (const auto& unitPrefixInfo : gUnitPrefixStrings) {
397         b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);
398     }
399     if (U_FAILURE(status)) { return; }
400 
401     // Add syntax parts (compound, power prefixes)
402     b.add("-per-", COMPOUND_PART_PER, status);
403     b.add("-", COMPOUND_PART_TIMES, status);
404     b.add("-and-", COMPOUND_PART_AND, status);
405     b.add("per-", INITIAL_COMPOUND_PART_PER, status);
406     b.add("square-", POWER_PART_P2, status);
407     b.add("cubic-", POWER_PART_P3, status);
408     b.add("pow2-", POWER_PART_P2, status);
409     b.add("pow3-", POWER_PART_P3, status);
410     b.add("pow4-", POWER_PART_P4, status);
411     b.add("pow5-", POWER_PART_P5, status);
412     b.add("pow6-", POWER_PART_P6, status);
413     b.add("pow7-", POWER_PART_P7, status);
414     b.add("pow8-", POWER_PART_P8, status);
415     b.add("pow9-", POWER_PART_P9, status);
416     b.add("pow10-", POWER_PART_P10, status);
417     b.add("pow11-", POWER_PART_P11, status);
418     b.add("pow12-", POWER_PART_P12, status);
419     b.add("pow13-", POWER_PART_P13, status);
420     b.add("pow14-", POWER_PART_P14, status);
421     b.add("pow15-", POWER_PART_P15, status);
422     if (U_FAILURE(status)) { return; }
423 
424     // Add sanctioned simple units by offset: simple units all have entries in
425     // units/convertUnits resources.
426     LocalUResourceBundlePointer convertUnits(
427         ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
428     if (U_FAILURE(status)) { return; }
429 
430     // Allocate enough space: with identifierSink below skipping kilogram, we're
431     // probably allocating one more than needed.
432     int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
433     int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
434     gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
435     if (gSimpleUnits == nullptr) {
436         status = U_MEMORY_ALLOCATION_ERROR;
437         return;
438     }
439     uprv_memset(gSimpleUnits, 0, arrayMallocSize);
440     arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
441     gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
442     if (gSimpleUnitCategories == nullptr) {
443         status = U_MEMORY_ALLOCATION_ERROR;
444         return;
445     }
446     uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
447 
448     // Populate gSimpleUnits and build the associated trie.
449     SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
450                                              simpleUnitsCount, b, kSimpleUnitOffset);
451     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
452 
453     // Build the CharsTrie
454     // TODO: Use SLOW or FAST here?
455     StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
456     if (U_FAILURE(status)) { return; }
457 
458     // Copy the result into the global constant pointer
459     size_t numBytes = result.length();
460     gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
461     if (gSerializedUnitExtrasStemTrie == nullptr) {
462         status = U_MEMORY_ALLOCATION_ERROR;
463         return;
464     }
465     uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
466 }
467 
468 class Token {
469 public:
Token(int32_t match)470     Token(int32_t match) : fMatch(match) {}
471 
472     enum Type {
473         TYPE_UNDEFINED,
474         TYPE_PREFIX,
475         // Token type for "-per-", "-", and "-and-".
476         TYPE_COMPOUND_PART,
477         // Token type for "per-".
478         TYPE_INITIAL_COMPOUND_PART,
479         TYPE_POWER_PART,
480         TYPE_SIMPLE_UNIT,
481     };
482 
483     // Calling getType() is invalid, resulting in an assertion failure, if Token
484     // value isn't positive.
getType() const485     Type getType() const {
486         U_ASSERT(fMatch > 0);
487         if (fMatch < kCompoundPartOffset) {
488             return TYPE_PREFIX;
489         }
490         if (fMatch < kInitialCompoundPartOffset) {
491             return TYPE_COMPOUND_PART;
492         }
493         if (fMatch < kPowerPartOffset) {
494             return TYPE_INITIAL_COMPOUND_PART;
495         }
496         if (fMatch < kSimpleUnitOffset) {
497             return TYPE_POWER_PART;
498         }
499         return TYPE_SIMPLE_UNIT;
500     }
501 
getUnitPrefix() const502     UMeasurePrefix getUnitPrefix() const {
503         U_ASSERT(getType() == TYPE_PREFIX);
504         return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);
505     }
506 
507     // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const508     int32_t getMatch() const {
509         U_ASSERT(getType() == TYPE_COMPOUND_PART);
510         return fMatch;
511     }
512 
getInitialCompoundPart() const513     int32_t getInitialCompoundPart() const {
514         // Even if there is only one InitialCompoundPart value, we have this
515         // function for the simplicity of code consistency.
516         U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
517         // Defensive: if this assert fails, code using this function also needs
518         // to change.
519         U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
520         return fMatch;
521     }
522 
getPower() const523     int8_t getPower() const {
524         U_ASSERT(getType() == TYPE_POWER_PART);
525         return static_cast<int8_t>(fMatch - kPowerPartOffset);
526     }
527 
getSimpleUnitIndex() const528     int32_t getSimpleUnitIndex() const {
529         U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
530         return fMatch - kSimpleUnitOffset;
531     }
532 
533 private:
534     int32_t fMatch;
535 };
536 
537 class Parser {
538 public:
539     /**
540      * Factory function for parsing the given identifier.
541      *
542      * @param source The identifier to parse. This function does not make a copy
543      * of source: the underlying string that source points at, must outlive the
544      * parser.
545      * @param status ICU error code.
546      */
from(StringPiece source,UErrorCode & status)547     static Parser from(StringPiece source, UErrorCode& status) {
548         if (U_FAILURE(status)) {
549             return {};
550         }
551         umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
552         if (U_FAILURE(status)) {
553             return {};
554         }
555         return {source};
556     }
557 
parse(UErrorCode & status)558     MeasureUnitImpl parse(UErrorCode& status) {
559         MeasureUnitImpl result;
560 
561         if (U_FAILURE(status)) {
562             return result;
563         }
564         if (fSource.empty()) {
565             // The dimenionless unit: nothing to parse. leave result as is.
566             return result;
567         }
568 
569         while (hasNext()) {
570             bool sawAnd = false;
571 
572             SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status);
573             if (U_FAILURE(status)) {
574                 return result;
575             }
576 
577             bool added = result.appendSingleUnit(singleUnit, status);
578             if (U_FAILURE(status)) {
579                 return result;
580             }
581 
582             if (sawAnd && !added) {
583                 // Two similar units are not allowed in a mixed unit.
584                 status = kUnitIdentifierSyntaxError;
585                 return result;
586             }
587 
588             if (result.singleUnits.length() >= 2) {
589                 // nextSingleUnit fails appropriately for "per" and "and" in the
590                 // same identifier. It doesn't fail for other compound units
591                 // (COMPOUND_PART_TIMES). Consequently we take care of that
592                 // here.
593                 UMeasureUnitComplexity complexity =
594                     sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
595                 if (result.singleUnits.length() == 2) {
596                     // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`
597                     U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);
598                     result.complexity = complexity;
599                 } else if (result.complexity != complexity) {
600                     // Can't have mixed compound units
601                     status = kUnitIdentifierSyntaxError;
602                     return result;
603                 }
604             }
605         }
606 
607         return result;
608     }
609 
610 private:
611     // Tracks parser progress: the offset into fSource.
612     int32_t fIndex = 0;
613 
614     // Since we're not owning this memory, whatever is passed to the constructor
615     // should live longer than this Parser - and the parser shouldn't return any
616     // references to that string.
617     StringPiece fSource;
618     BytesTrie fTrie;
619 
620     // Set to true when we've seen a "-per-" or a "per-", after which all units
621     // are in the denominator. Until we find an "-and-", at which point the
622     // identifier is invalid pending TODO(CLDR-13701).
623     bool fAfterPer = false;
624 
Parser()625     Parser() : fSource(""), fTrie(u"") {}
626 
Parser(StringPiece source)627     Parser(StringPiece source)
628         : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
629 
hasNext() const630     inline bool hasNext() const {
631         return fIndex < fSource.length();
632     }
633 
634     // Returns the next Token parsed from fSource, advancing fIndex to the end
635     // of that token in fSource. In case of U_FAILURE(status), the token
636     // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)637     Token nextToken(UErrorCode& status) {
638         fTrie.reset();
639         int32_t match = -1;
640         // Saves the position in the fSource string for the end of the most
641         // recent matching token.
642         int32_t previ = -1;
643         // Find the longest token that matches a value in the trie:
644         while (fIndex < fSource.length()) {
645             auto result = fTrie.next(fSource.data()[fIndex++]);
646             if (result == USTRINGTRIE_NO_MATCH) {
647                 break;
648             } else if (result == USTRINGTRIE_NO_VALUE) {
649                 continue;
650             }
651             U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
652             match = fTrie.getValue();
653             previ = fIndex;
654             if (result == USTRINGTRIE_FINAL_VALUE) {
655                 break;
656             }
657             U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
658             // continue;
659         }
660 
661         if (match < 0) {
662             status = kUnitIdentifierSyntaxError;
663         } else {
664             fIndex = previ;
665         }
666         return {match};
667     }
668 
669     /**
670      * Returns the next "single unit" via result.
671      *
672      * If a "-per-" was parsed, the result will have appropriate negative
673      * dimensionality.
674      *
675      * Returns an error if we parse both compound units and "-and-", since mixed
676      * compound units are not yet supported - TODO(CLDR-13701).
677      *
678      * @param result Will be overwritten by the result, if status shows success.
679      * @param sawAnd If an "-and-" was parsed prior to finding the "single
680      * unit", sawAnd is set to true. If not, it is left as is.
681      * @param status ICU error code.
682      */
nextSingleUnit(bool & sawAnd,UErrorCode & status)683     SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) {
684         SingleUnitImpl result;
685         if (U_FAILURE(status)) {
686             return result;
687         }
688 
689         // state:
690         // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)
691         // 1 = power token seen (will not accept another power token)
692         // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)
693         int32_t state = 0;
694 
695         bool atStart = fIndex == 0;
696         Token token = nextToken(status);
697         if (U_FAILURE(status)) {
698             return result;
699         }
700 
701         if (atStart) {
702             // Identifiers optionally start with "per-".
703             if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
704                 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
705                 fAfterPer = true;
706                 result.dimensionality = -1;
707 
708                 token = nextToken(status);
709                 if (U_FAILURE(status)) {
710                     return result;
711                 }
712             }
713         } else {
714             // All other SingleUnit's are separated from previous SingleUnit's
715             // via a compound part:
716             if (token.getType() != Token::TYPE_COMPOUND_PART) {
717                 status = kUnitIdentifierSyntaxError;
718                 return result;
719             }
720 
721             switch (token.getMatch()) {
722             case COMPOUND_PART_PER:
723                 if (sawAnd) {
724                     // Mixed compound units not yet supported,
725                     // TODO(CLDR-13701).
726                     status = kUnitIdentifierSyntaxError;
727                     return result;
728                 }
729                 fAfterPer = true;
730                 result.dimensionality = -1;
731                 break;
732 
733             case COMPOUND_PART_TIMES:
734                 if (fAfterPer) {
735                     result.dimensionality = -1;
736                 }
737                 break;
738 
739             case COMPOUND_PART_AND:
740                 if (fAfterPer) {
741                     // Can't start with "-and-", and mixed compound units
742                     // not yet supported, TODO(CLDR-13701).
743                     status = kUnitIdentifierSyntaxError;
744                     return result;
745                 }
746                 sawAnd = true;
747                 break;
748             }
749 
750             token = nextToken(status);
751             if (U_FAILURE(status)) {
752                 return result;
753             }
754         }
755 
756         // Read tokens until we have a complete SingleUnit or we reach the end.
757         while (true) {
758             switch (token.getType()) {
759                 case Token::TYPE_POWER_PART:
760                     if (state > 0) {
761                         status = kUnitIdentifierSyntaxError;
762                         return result;
763                     }
764                     result.dimensionality *= token.getPower();
765                     state = 1;
766                     break;
767 
768                 case Token::TYPE_PREFIX:
769                     if (state > 1) {
770                         status = kUnitIdentifierSyntaxError;
771                         return result;
772                     }
773                     result.unitPrefix = token.getUnitPrefix();
774                     state = 2;
775                     break;
776 
777                 case Token::TYPE_SIMPLE_UNIT:
778                     result.index = token.getSimpleUnitIndex();
779                     return result;
780 
781                 default:
782                     status = kUnitIdentifierSyntaxError;
783                     return result;
784             }
785 
786             if (!hasNext()) {
787                 // We ran out of tokens before finding a complete single unit.
788                 status = kUnitIdentifierSyntaxError;
789                 return result;
790             }
791             token = nextToken(status);
792             if (U_FAILURE(status)) {
793                 return result;
794             }
795         }
796 
797         return result;
798     }
799 };
800 
801 // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
802 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)803 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
804     const auto* realLeft = static_cast<const SingleUnitImpl* const*>(left);
805     const auto* realRight = static_cast<const SingleUnitImpl* const*>(right);
806     return (*realLeft)->compareTo(**realRight);
807 }
808 
809 // Returns an index into the gCategories array, for the "unitQuantity" (aka
810 // "type" or "category") associated with the given base unit identifier. Returns
811 // -1 on failure, together with U_UNSUPPORTED_ERROR.
getUnitCategoryIndex(BytesTrie & trie,StringPiece baseUnitIdentifier,UErrorCode & status)812 int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
813     UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
814     if (!USTRINGTRIE_HAS_VALUE(result)) {
815         status = U_UNSUPPORTED_ERROR;
816         return -1;
817     }
818 
819     return trie.getValue();
820 }
821 
822 } // namespace
823 
824 U_CAPI int32_t U_EXPORT2
umeas_getPrefixPower(UMeasurePrefix unitPrefix)825 umeas_getPrefixPower(UMeasurePrefix unitPrefix) {
826     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
827         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
828         return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;
829     }
830     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
831              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
832     return unitPrefix - UMEASURE_PREFIX_ONE;
833 }
834 
835 U_CAPI int32_t U_EXPORT2
umeas_getPrefixBase(UMeasurePrefix unitPrefix)836 umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
837     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
838         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
839         return 1024;
840     }
841     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
842              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
843     return 10;
844 }
845 
getUnitQuantity(const MeasureUnitImpl & baseMeasureUnitImpl,UErrorCode & status)846 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
847     CharString result;
848     MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
849     UErrorCode localStatus = U_ZERO_ERROR;
850     umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
851     if (U_FAILURE(status)) {
852         return result;
853     }
854     BytesTrie trie(gSerializedUnitCategoriesTrie);
855 
856     baseUnitImpl.serialize(status);
857     StringPiece identifier = baseUnitImpl.identifier.data();
858     int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
859     if (U_FAILURE(status)) {
860         return result;
861     }
862 
863     // In case the base unit identifier did not match any entry.
864     if (U_FAILURE(localStatus)) {
865         localStatus = U_ZERO_ERROR;
866         baseUnitImpl.takeReciprocal(status);
867         baseUnitImpl.serialize(status);
868         identifier.set(baseUnitImpl.identifier.data());
869         idx = getUnitCategoryIndex(trie, identifier, localStatus);
870 
871         if (U_FAILURE(status)) {
872             return result;
873         }
874     }
875 
876     // In case the reciprocal of the base unit identifier did not match any entry.
877     MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
878     if (U_FAILURE(status)) {
879         return result;
880     }
881     if (U_FAILURE(localStatus)) {
882         localStatus = U_ZERO_ERROR;
883         simplifiedUnit.serialize(status);
884         identifier.set(simplifiedUnit.identifier.data());
885         idx = getUnitCategoryIndex(trie, identifier, localStatus);
886 
887         if (U_FAILURE(status)) {
888             return result;
889         }
890     }
891 
892     // In case the simplified base unit identifier did not match any entry.
893     if (U_FAILURE(localStatus)) {
894         localStatus = U_ZERO_ERROR;
895         simplifiedUnit.takeReciprocal(status);
896         simplifiedUnit.serialize(status);
897         identifier.set(simplifiedUnit.identifier.data());
898         idx = getUnitCategoryIndex(trie, identifier, localStatus);
899 
900         if (U_FAILURE(status)) {
901             return result;
902         }
903     }
904 
905     // If there is no match at all, throw an exception.
906     if (U_FAILURE(localStatus)) {
907         status = U_INVALID_FORMAT_ERROR;
908         return result;
909     }
910 
911     if (idx < 0 || idx >= gCategoriesCount) {
912         status = U_INVALID_FORMAT_ERROR;
913         return result;
914     }
915 
916     result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
917     return result;
918 }
919 
920 // In ICU4J, this is MeasureUnit.getSingleUnitImpl().
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)921 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
922     MeasureUnitImpl temp;
923     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
924     if (U_FAILURE(status)) {
925         return {};
926     }
927     if (impl.singleUnits.length() == 0) {
928         return {};
929     }
930     if (impl.singleUnits.length() == 1) {
931         return *impl.singleUnits[0];
932     }
933     status = U_ILLEGAL_ARGUMENT_ERROR;
934     return {};
935 }
936 
build(UErrorCode & status) const937 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
938     MeasureUnitImpl temp;
939     temp.appendSingleUnit(*this, status);
940     // TODO(icu-units#28): the MeasureUnitImpl::build() method uses
941     // findBySubtype, which is relatively slow.
942     // - At the time of loading the simple unit IDs, we could also save a
943     //   mapping to the builtin MeasureUnit type and subtype they correspond to.
944     // - This method could then check dimensionality and index, and if both are
945     //   1, directly return MeasureUnit instances very quickly.
946     return std::move(temp).build(status);
947 }
948 
getSimpleUnitID() const949 const char *SingleUnitImpl::getSimpleUnitID() const {
950     return gSimpleUnits[index];
951 }
952 
appendNeutralIdentifier(CharString & result,UErrorCode & status) const953 void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
954     int32_t absPower = std::abs(this->dimensionality);
955 
956     U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
957 
958     if (absPower == 1) {
959         // no-op
960     } else if (absPower == 2) {
961         result.append(StringPiece("square-"), status);
962     } else if (absPower == 3) {
963         result.append(StringPiece("cubic-"), status);
964     } else if (absPower <= 15) {
965         result.append(StringPiece("pow"), status);
966         result.appendNumber(absPower, status);
967         result.append(StringPiece("-"), status);
968     } else {
969         status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
970         return;
971     }
972 
973     if (U_FAILURE(status)) {
974         return;
975     }
976 
977     if (this->unitPrefix != UMEASURE_PREFIX_ONE) {
978         bool found = false;
979         for (const auto &unitPrefixInfo : gUnitPrefixStrings) {
980             // TODO: consider using binary search? If we do this, add a unit
981             // test to ensure gUnitPrefixStrings is sorted?
982             if (unitPrefixInfo.value == this->unitPrefix) {
983                 result.append(unitPrefixInfo.string, status);
984                 found = true;
985                 break;
986             }
987         }
988         if (!found) {
989             status = U_UNSUPPORTED_ERROR;
990             return;
991         }
992     }
993 
994     result.append(StringPiece(this->getSimpleUnitID()), status);
995 }
996 
getUnitCategoryIndex() const997 int32_t SingleUnitImpl::getUnitCategoryIndex() const {
998     return gSimpleUnitCategories[index];
999 }
1000 
MeasureUnitImpl(const SingleUnitImpl & singleUnit,UErrorCode & status)1001 MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
1002     this->appendSingleUnit(singleUnit, status);
1003 }
1004 
forIdentifier(StringPiece identifier,UErrorCode & status)1005 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
1006     return Parser::from(identifier, status).parse(status);
1007 }
1008 
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)1009 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
1010         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
1011     if (measureUnit.fImpl) {
1012         return *measureUnit.fImpl;
1013     } else {
1014         memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
1015         return memory;
1016     }
1017 }
1018 
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)1019 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
1020         const MeasureUnit& measureUnit, UErrorCode& status) {
1021     if (measureUnit.fImpl) {
1022         return measureUnit.fImpl->copy(status);
1023     } else {
1024         return Parser::from(measureUnit.getIdentifier(), status).parse(status);
1025     }
1026 }
1027 
takeReciprocal(UErrorCode &)1028 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
1029     identifier.clear();
1030     for (int32_t i = 0; i < singleUnits.length(); i++) {
1031         singleUnits[i]->dimensionality *= -1;
1032     }
1033 }
1034 
copyAndSimplify(UErrorCode & status) const1035 MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
1036     MeasureUnitImpl result;
1037     for (int32_t i = 0; i < singleUnits.length(); i++) {
1038         const SingleUnitImpl &singleUnit = *this->singleUnits[i];
1039 
1040         // The following `for` loop will cause time complexity to be O(n^2).
1041         // However, n is very small (number of units, generally, at maximum equal to 10)
1042         bool unitExist = false;
1043         for (int32_t j = 0; j < result.singleUnits.length(); j++) {
1044             if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
1045                     0 &&
1046                 result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
1047                 unitExist = true;
1048                 result.singleUnits[j]->dimensionality =
1049                     result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
1050                 break;
1051             }
1052         }
1053 
1054         if (!unitExist) {
1055             result.appendSingleUnit(singleUnit, status);
1056         }
1057     }
1058 
1059     return result;
1060 }
1061 
appendSingleUnit(const SingleUnitImpl & singleUnit,UErrorCode & status)1062 bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
1063     identifier.clear();
1064 
1065     if (singleUnit.isDimensionless()) {
1066         // Do not append dimensionless units.
1067         return false;
1068     }
1069 
1070     // Find a similar unit that already exists, to attempt to coalesce
1071     SingleUnitImpl *oldUnit = nullptr;
1072     for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1073         auto *candidate = this->singleUnits[i];
1074         if (candidate->isCompatibleWith(singleUnit)) {
1075             oldUnit = candidate;
1076         }
1077     }
1078 
1079     if (oldUnit) {
1080         // Both dimensionalities will be positive, or both will be negative, by
1081         // virtue of isCompatibleWith().
1082         oldUnit->dimensionality += singleUnit.dimensionality;
1083 
1084         return false;
1085     }
1086 
1087     // Add a copy of singleUnit
1088     // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of  singleUnit.
1089     this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);
1090     if (U_FAILURE(status)) {
1091         return false;
1092     }
1093 
1094     // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`
1095     // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`
1096     if (this->singleUnits.length() > 1 &&
1097         this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) {
1098         this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;
1099     }
1100 
1101     return true;
1102 }
1103 
1104 MaybeStackVector<MeasureUnitImplWithIndex>
extractIndividualUnitsWithIndices(UErrorCode & status) const1105 MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const {
1106     MaybeStackVector<MeasureUnitImplWithIndex> result;
1107 
1108     if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1109         result.emplaceBackAndCheckErrorCode(status, 0, *this, status);
1110         return result;
1111     }
1112 
1113     for (int32_t i = 0; i < singleUnits.length(); ++i) {
1114         result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);
1115         if (U_FAILURE(status)) {
1116             return result;
1117         }
1118     }
1119 
1120     return result;
1121 }
1122 
1123 /**
1124  * Normalize a MeasureUnitImpl and generate the identifier string in place.
1125  */
serialize(UErrorCode & status)1126 void MeasureUnitImpl::serialize(UErrorCode &status) {
1127     if (U_FAILURE(status)) {
1128         return;
1129     }
1130 
1131     if (this->singleUnits.length() == 0) {
1132         // Dimensionless, constructed by the default constructor.
1133         return;
1134     }
1135 
1136     if (this->complexity == UMEASURE_UNIT_COMPOUND) {
1137         // Note: don't sort a MIXED unit
1138         uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
1139                        sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
1140         if (U_FAILURE(status)) {
1141             return;
1142         }
1143     }
1144 
1145     CharString result;
1146     bool beforePer = true;
1147     bool firstTimeNegativeDimension = false;
1148     for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1149         if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
1150             beforePer = false;
1151             firstTimeNegativeDimension = true;
1152         } else if ((*this->singleUnits[i]).dimensionality < 0) {
1153             firstTimeNegativeDimension = false;
1154         }
1155 
1156         if (U_FAILURE(status)) {
1157             return;
1158         }
1159 
1160         if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1161             if (result.length() != 0) {
1162                 result.append(StringPiece("-and-"), status);
1163             }
1164         } else {
1165             if (firstTimeNegativeDimension) {
1166                 if (result.length() == 0) {
1167                     result.append(StringPiece("per-"), status);
1168                 } else {
1169                     result.append(StringPiece("-per-"), status);
1170                 }
1171             } else {
1172                 if (result.length() != 0) {
1173                     result.append(StringPiece("-"), status);
1174                 }
1175             }
1176         }
1177 
1178         this->singleUnits[i]->appendNeutralIdentifier(result, status);
1179     }
1180 
1181     this->identifier = CharString(result, status);
1182 }
1183 
build(UErrorCode & status)1184 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
1185     this->serialize(status);
1186     return MeasureUnit(std::move(*this));
1187 }
1188 
forIdentifier(StringPiece identifier,UErrorCode & status)1189 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
1190     return Parser::from(identifier, status).parse(status).build(status);
1191 }
1192 
getComplexity(UErrorCode & status) const1193 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
1194     MeasureUnitImpl temp;
1195     return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
1196 }
1197 
getPrefix(UErrorCode & status) const1198 UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const {
1199     return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;
1200 }
1201 
withPrefix(UMeasurePrefix prefix,UErrorCode & status) const1202 MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED {
1203     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1204     singleUnit.unitPrefix = prefix;
1205     return singleUnit.build(status);
1206 }
1207 
getDimensionality(UErrorCode & status) const1208 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
1209     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1210     if (U_FAILURE(status)) { return 0; }
1211     if (singleUnit.isDimensionless()) {
1212         return 0;
1213     }
1214     return singleUnit.dimensionality;
1215 }
1216 
withDimensionality(int32_t dimensionality,UErrorCode & status) const1217 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
1218     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1219     singleUnit.dimensionality = dimensionality;
1220     return singleUnit.build(status);
1221 }
1222 
reciprocal(UErrorCode & status) const1223 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
1224     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1225     impl.takeReciprocal(status);
1226     return std::move(impl).build(status);
1227 }
1228 
product(const MeasureUnit & other,UErrorCode & status) const1229 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
1230     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1231     MeasureUnitImpl temp;
1232     const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
1233     if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
1234         status = U_ILLEGAL_ARGUMENT_ERROR;
1235         return {};
1236     }
1237     for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) {
1238         impl.appendSingleUnit(*otherImpl.singleUnits[i], status);
1239     }
1240     if (impl.singleUnits.length() > 1) {
1241         impl.complexity = UMEASURE_UNIT_COMPOUND;
1242     }
1243     return std::move(impl).build(status);
1244 }
1245 
splitToSingleUnitsImpl(int32_t & outCount,UErrorCode & status) const1246 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
1247     MeasureUnitImpl temp;
1248     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
1249     outCount = impl.singleUnits.length();
1250     MeasureUnit* arr = new MeasureUnit[outCount];
1251     if (arr == nullptr) {
1252         status = U_MEMORY_ALLOCATION_ERROR;
1253         return LocalArray<MeasureUnit>();
1254     }
1255     for (int32_t i = 0; i < outCount; i++) {
1256         arr[i] = impl.singleUnits[i]->build(status);
1257     }
1258     return LocalArray<MeasureUnit>(arr, status);
1259 }
1260 
1261 
1262 U_NAMESPACE_END
1263 
1264 #endif /* !UNCONFIG_NO_FORMATTING */
1265