• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14 
15 #include <cstdlib>
16 #include "cstring.h"
17 #include "measunit_impl.h"
18 #include "uarrsort.h"
19 #include "uassert.h"
20 #include "ucln_in.h"
21 #include "umutex.h"
22 #include "unicode/errorcode.h"
23 #include "unicode/localpointer.h"
24 #include "unicode/measunit.h"
25 #include "unicode/ucharstrie.h"
26 #include "unicode/ucharstriebuilder.h"
27 
28 #include "cstr.h"
29 
30 U_NAMESPACE_BEGIN
31 
32 
33 namespace {
34 
35 // TODO: Propose a new error code for this?
36 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
37 
38 // Trie value offset for SI Prefixes. This is big enough to ensure we only
39 // insert positive integers into the trie.
40 constexpr int32_t kSIPrefixOffset = 64;
41 
42 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
43 constexpr int32_t kCompoundPartOffset = 128;
44 
45 enum CompoundPart {
46     // Represents "-per-"
47     COMPOUND_PART_PER = kCompoundPartOffset,
48     // Represents "-"
49     COMPOUND_PART_TIMES,
50     // Represents "-and-"
51     COMPOUND_PART_AND,
52 };
53 
54 // Trie value offset for "per-".
55 constexpr int32_t kInitialCompoundPartOffset = 192;
56 
57 enum InitialCompoundPart {
58     // Represents "per-", the only compound part that can appear at the start of
59     // an identifier.
60     INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
61 };
62 
63 // Trie value offset for powers like "square-", "cubic-", "p2-" etc.
64 constexpr int32_t kPowerPartOffset = 256;
65 
66 enum PowerPart {
67     POWER_PART_P2 = kPowerPartOffset + 2,
68     POWER_PART_P3,
69     POWER_PART_P4,
70     POWER_PART_P5,
71     POWER_PART_P6,
72     POWER_PART_P7,
73     POWER_PART_P8,
74     POWER_PART_P9,
75     POWER_PART_P10,
76     POWER_PART_P11,
77     POWER_PART_P12,
78     POWER_PART_P13,
79     POWER_PART_P14,
80     POWER_PART_P15,
81 };
82 
83 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
84 // "fluid-ounce-imperial".
85 constexpr int32_t kSimpleUnitOffset = 512;
86 
87 const struct SIPrefixStrings {
88     const char* const string;
89     UMeasureSIPrefix value;
90 } gSIPrefixStrings[] = {
91     { "yotta", UMEASURE_SI_PREFIX_YOTTA },
92     { "zetta", UMEASURE_SI_PREFIX_ZETTA },
93     { "exa", UMEASURE_SI_PREFIX_EXA },
94     { "peta", UMEASURE_SI_PREFIX_PETA },
95     { "tera", UMEASURE_SI_PREFIX_TERA },
96     { "giga", UMEASURE_SI_PREFIX_GIGA },
97     { "mega", UMEASURE_SI_PREFIX_MEGA },
98     { "kilo", UMEASURE_SI_PREFIX_KILO },
99     { "hecto", UMEASURE_SI_PREFIX_HECTO },
100     { "deka", UMEASURE_SI_PREFIX_DEKA },
101     { "deci", UMEASURE_SI_PREFIX_DECI },
102     { "centi", UMEASURE_SI_PREFIX_CENTI },
103     { "milli", UMEASURE_SI_PREFIX_MILLI },
104     { "micro", UMEASURE_SI_PREFIX_MICRO },
105     { "nano", UMEASURE_SI_PREFIX_NANO },
106     { "pico", UMEASURE_SI_PREFIX_PICO },
107     { "femto", UMEASURE_SI_PREFIX_FEMTO },
108     { "atto", UMEASURE_SI_PREFIX_ATTO },
109     { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
110     { "yocto", UMEASURE_SI_PREFIX_YOCTO },
111 };
112 
113 // TODO(ICU-21059): Get this list from data
114 const char16_t* const gSimpleUnits[] = {
115     u"candela",
116     u"carat",
117     u"gram",
118     u"ounce",
119     u"ounce-troy",
120     u"pound",
121     u"kilogram",
122     u"stone",
123     u"ton",
124     u"metric-ton",
125     u"earth-mass",
126     u"solar-mass",
127     u"point",
128     u"inch",
129     u"foot",
130     u"yard",
131     u"meter",
132     u"fathom",
133     u"furlong",
134     u"mile",
135     u"nautical-mile",
136     u"mile-scandinavian",
137     u"100-kilometer",
138     u"earth-radius",
139     u"solar-radius",
140     u"astronomical-unit",
141     u"light-year",
142     u"parsec",
143     u"second",
144     u"minute",
145     u"hour",
146     u"day",
147     u"day-person",
148     u"week",
149     u"week-person",
150     u"month",
151     u"month-person",
152     u"year",
153     u"year-person",
154     u"decade",
155     u"century",
156     u"ampere",
157     u"fahrenheit",
158     u"kelvin",
159     u"celsius",
160     u"arc-second",
161     u"arc-minute",
162     u"degree",
163     u"radian",
164     u"revolution",
165     u"item",
166     u"mole",
167     u"permillion",
168     u"permyriad",
169     u"permille",
170     u"percent",
171     u"karat",
172     u"portion",
173     u"bit",
174     u"byte",
175     u"dot",
176     u"pixel",
177     u"em",
178     u"hertz",
179     u"newton",
180     u"pound-force",
181     u"pascal",
182     u"bar",
183     u"atmosphere",
184     u"ofhg",
185     u"electronvolt",
186     u"dalton",
187     u"joule",
188     u"calorie",
189     u"british-thermal-unit",
190     u"foodcalorie",
191     u"therm-us",
192     u"watt",
193     u"horsepower",
194     u"solar-luminosity",
195     u"volt",
196     u"ohm",
197     u"dunam",
198     u"acre",
199     u"hectare",
200     u"teaspoon",
201     u"tablespoon",
202     u"fluid-ounce-imperial",
203     u"fluid-ounce",
204     u"cup",
205     u"cup-metric",
206     u"pint",
207     u"pint-metric",
208     u"quart",
209     u"liter",
210     u"gallon",
211     u"gallon-imperial",
212     u"bushel",
213     u"barrel",
214     u"knot",
215     u"g-force",
216     u"lux",
217 };
218 
219 icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
220 
221 char16_t* kSerializedUnitExtrasStemTrie = nullptr;
222 
cleanupUnitExtras()223 UBool U_CALLCONV cleanupUnitExtras() {
224     uprv_free(kSerializedUnitExtrasStemTrie);
225     kSerializedUnitExtrasStemTrie = nullptr;
226     gUnitExtrasInitOnce.reset();
227     return TRUE;
228 }
229 
initUnitExtras(UErrorCode & status)230 void U_CALLCONV initUnitExtras(UErrorCode& status) {
231     ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
232 
233     UCharsTrieBuilder b(status);
234     if (U_FAILURE(status)) { return; }
235 
236     // Add SI prefixes
237     for (const auto& siPrefixInfo : gSIPrefixStrings) {
238         UnicodeString uSIPrefix(siPrefixInfo.string, -1, US_INV);
239         b.add(uSIPrefix, siPrefixInfo.value + kSIPrefixOffset, status);
240     }
241     if (U_FAILURE(status)) { return; }
242 
243     // Add syntax parts (compound, power prefixes)
244     b.add(u"-per-", COMPOUND_PART_PER, status);
245     b.add(u"-", COMPOUND_PART_TIMES, status);
246     b.add(u"-and-", COMPOUND_PART_AND, status);
247     b.add(u"per-", INITIAL_COMPOUND_PART_PER, status);
248     b.add(u"square-", POWER_PART_P2, status);
249     b.add(u"cubic-", POWER_PART_P3, status);
250     b.add(u"p2-", POWER_PART_P2, status);
251     b.add(u"p3-", POWER_PART_P3, status);
252     b.add(u"p4-", POWER_PART_P4, status);
253     b.add(u"p5-", POWER_PART_P5, status);
254     b.add(u"p6-", POWER_PART_P6, status);
255     b.add(u"p7-", POWER_PART_P7, status);
256     b.add(u"p8-", POWER_PART_P8, status);
257     b.add(u"p9-", POWER_PART_P9, status);
258     b.add(u"p10-", POWER_PART_P10, status);
259     b.add(u"p11-", POWER_PART_P11, status);
260     b.add(u"p12-", POWER_PART_P12, status);
261     b.add(u"p13-", POWER_PART_P13, status);
262     b.add(u"p14-", POWER_PART_P14, status);
263     b.add(u"p15-", POWER_PART_P15, status);
264     if (U_FAILURE(status)) { return; }
265 
266     // Add sanctioned simple units by offset
267     int32_t simpleUnitOffset = kSimpleUnitOffset;
268     for (auto simpleUnit : gSimpleUnits) {
269         b.add(simpleUnit, simpleUnitOffset++, status);
270     }
271 
272     // Build the CharsTrie
273     // TODO: Use SLOW or FAST here?
274     UnicodeString result;
275     b.buildUnicodeString(USTRINGTRIE_BUILD_FAST, result, status);
276     if (U_FAILURE(status)) { return; }
277 
278     // Copy the result into the global constant pointer
279     size_t numBytes = result.length() * sizeof(char16_t);
280     kSerializedUnitExtrasStemTrie = static_cast<char16_t*>(uprv_malloc(numBytes));
281     uprv_memcpy(kSerializedUnitExtrasStemTrie, result.getBuffer(), numBytes);
282 }
283 
284 class Token {
285 public:
Token(int32_t match)286     Token(int32_t match) : fMatch(match) {}
287 
288     enum Type {
289         TYPE_UNDEFINED,
290         TYPE_SI_PREFIX,
291         // Token type for "-per-", "-", and "-and-".
292         TYPE_COMPOUND_PART,
293         // Token type for "per-".
294         TYPE_INITIAL_COMPOUND_PART,
295         TYPE_POWER_PART,
296         TYPE_SIMPLE_UNIT,
297     };
298 
299     // Calling getType() is invalid, resulting in an assertion failure, if Token
300     // value isn't positive.
getType() const301     Type getType() const {
302         U_ASSERT(fMatch > 0);
303         if (fMatch < kCompoundPartOffset) {
304             return TYPE_SI_PREFIX;
305         }
306         if (fMatch < kInitialCompoundPartOffset) {
307             return TYPE_COMPOUND_PART;
308         }
309         if (fMatch < kPowerPartOffset) {
310             return TYPE_INITIAL_COMPOUND_PART;
311         }
312         if (fMatch < kSimpleUnitOffset) {
313             return TYPE_POWER_PART;
314         }
315         return TYPE_SIMPLE_UNIT;
316     }
317 
getSIPrefix() const318     UMeasureSIPrefix getSIPrefix() const {
319         U_ASSERT(getType() == TYPE_SI_PREFIX);
320         return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
321     }
322 
323     // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const324     int32_t getMatch() const {
325         U_ASSERT(getType() == TYPE_COMPOUND_PART);
326         return fMatch;
327     }
328 
getInitialCompoundPart() const329     int32_t getInitialCompoundPart() const {
330         // Even if there is only one InitialCompoundPart value, we have this
331         // function for the simplicity of code consistency.
332         U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
333         // Defensive: if this assert fails, code using this function also needs
334         // to change.
335         U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
336         return fMatch;
337     }
338 
getPower() const339     int8_t getPower() const {
340         U_ASSERT(getType() == TYPE_POWER_PART);
341         return static_cast<int8_t>(fMatch - kPowerPartOffset);
342     }
343 
getSimpleUnitIndex() const344     int32_t getSimpleUnitIndex() const {
345         U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
346         return fMatch - kSimpleUnitOffset;
347     }
348 
349 private:
350     int32_t fMatch;
351 };
352 
353 class Parser {
354 public:
355     /**
356      * Factory function for parsing the given identifier.
357      *
358      * @param source The identifier to parse. This function does not make a copy
359      * of source: the underlying string that source points at, must outlive the
360      * parser.
361      * @param status ICU error code.
362      */
from(StringPiece source,UErrorCode & status)363     static Parser from(StringPiece source, UErrorCode& status) {
364         if (U_FAILURE(status)) {
365             return Parser();
366         }
367         umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
368         if (U_FAILURE(status)) {
369             return Parser();
370         }
371         return Parser(source);
372     }
373 
parse(UErrorCode & status)374     MeasureUnitImpl parse(UErrorCode& status) {
375         MeasureUnitImpl result;
376         parseImpl(result, status);
377         return result;
378     }
379 
380 private:
381     // Tracks parser progress: the offset into fSource.
382     int32_t fIndex = 0;
383 
384     // Since we're not owning this memory, whatever is passed to the constructor
385     // should live longer than this Parser - and the parser shouldn't return any
386     // references to that string.
387     StringPiece fSource;
388     UCharsTrie fTrie;
389 
390     // Set to true when we've seen a "-per-" or a "per-", after which all units
391     // are in the denominator. Until we find an "-and-", at which point the
392     // identifier is invalid pending TODO(CLDR-13700).
393     bool fAfterPer = false;
394 
Parser()395     Parser() : fSource(""), fTrie(u"") {}
396 
Parser(StringPiece source)397     Parser(StringPiece source)
398         : fSource(source), fTrie(kSerializedUnitExtrasStemTrie) {}
399 
hasNext() const400     inline bool hasNext() const {
401         return fIndex < fSource.length();
402     }
403 
404     // Returns the next Token parsed from fSource, advancing fIndex to the end
405     // of that token in fSource. In case of U_FAILURE(status), the token
406     // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)407     Token nextToken(UErrorCode& status) {
408         fTrie.reset();
409         int32_t match = -1;
410         // Saves the position in the fSource string for the end of the most
411         // recent matching token.
412         int32_t previ = -1;
413         // Find the longest token that matches a value in the trie:
414         while (fIndex < fSource.length()) {
415             auto result = fTrie.next(fSource.data()[fIndex++]);
416             if (result == USTRINGTRIE_NO_MATCH) {
417                 break;
418             } else if (result == USTRINGTRIE_NO_VALUE) {
419                 continue;
420             }
421             U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
422             match = fTrie.getValue();
423             previ = fIndex;
424             if (result == USTRINGTRIE_FINAL_VALUE) {
425                 break;
426             }
427             U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
428             // continue;
429         }
430 
431         if (match < 0) {
432             status = kUnitIdentifierSyntaxError;
433         } else {
434             fIndex = previ;
435         }
436         return Token(match);
437     }
438 
439     /**
440      * Returns the next "single unit" via result.
441      *
442      * If a "-per-" was parsed, the result will have appropriate negative
443      * dimensionality.
444      *
445      * Returns an error if we parse both compound units and "-and-", since mixed
446      * compound units are not yet supported - TODO(CLDR-13700).
447      *
448      * @param result Will be overwritten by the result, if status shows success.
449      * @param sawAnd If an "-and-" was parsed prior to finding the "single
450      * unit", sawAnd is set to true. If not, it is left as is.
451      * @param status ICU error code.
452      */
nextSingleUnit(SingleUnitImpl & result,bool & sawAnd,UErrorCode & status)453     void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
454         if (U_FAILURE(status)) {
455             return;
456         }
457 
458         // state:
459         // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
460         // 1 = power token seen (will not accept another power token)
461         // 2 = SI prefix token seen (will not accept a power or SI prefix token)
462         int32_t state = 0;
463 
464         bool atStart = fIndex == 0;
465         Token token = nextToken(status);
466         if (U_FAILURE(status)) { return; }
467 
468         if (atStart) {
469             // Identifiers optionally start with "per-".
470             if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
471                 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
472                 fAfterPer = true;
473                 result.dimensionality = -1;
474 
475                 token = nextToken(status);
476                 if (U_FAILURE(status)) { return; }
477             }
478         } else {
479             // All other SingleUnit's are separated from previous SingleUnit's
480             // via a compound part:
481             if (token.getType() != Token::TYPE_COMPOUND_PART) {
482                 status = kUnitIdentifierSyntaxError;
483                 return;
484             }
485 
486             switch (token.getMatch()) {
487             case COMPOUND_PART_PER:
488                 if (sawAnd) {
489                     // Mixed compound units not yet supported,
490                     // TODO(CLDR-13700).
491                     status = kUnitIdentifierSyntaxError;
492                     return;
493                 }
494                 fAfterPer = true;
495                 result.dimensionality = -1;
496                 break;
497 
498             case COMPOUND_PART_TIMES:
499                 if (fAfterPer) {
500                     result.dimensionality = -1;
501                 }
502                 break;
503 
504             case COMPOUND_PART_AND:
505                 if (fAfterPer) {
506                     // Can't start with "-and-", and mixed compound units
507                     // not yet supported, TODO(CLDR-13700).
508                     status = kUnitIdentifierSyntaxError;
509                     return;
510                 }
511                 sawAnd = true;
512                 break;
513             }
514 
515             token = nextToken(status);
516             if (U_FAILURE(status)) { return; }
517         }
518 
519         // Read tokens until we have a complete SingleUnit or we reach the end.
520         while (true) {
521             switch (token.getType()) {
522                 case Token::TYPE_POWER_PART:
523                     if (state > 0) {
524                         status = kUnitIdentifierSyntaxError;
525                         return;
526                     }
527                     result.dimensionality *= token.getPower();
528                     state = 1;
529                     break;
530 
531                 case Token::TYPE_SI_PREFIX:
532                     if (state > 1) {
533                         status = kUnitIdentifierSyntaxError;
534                         return;
535                     }
536                     result.siPrefix = token.getSIPrefix();
537                     state = 2;
538                     break;
539 
540                 case Token::TYPE_SIMPLE_UNIT:
541                     result.index = token.getSimpleUnitIndex();
542                     return;
543 
544                 default:
545                     status = kUnitIdentifierSyntaxError;
546                     return;
547             }
548 
549             if (!hasNext()) {
550                 // We ran out of tokens before finding a complete single unit.
551                 status = kUnitIdentifierSyntaxError;
552                 return;
553             }
554             token = nextToken(status);
555             if (U_FAILURE(status)) {
556                 return;
557             }
558         }
559     }
560 
561     /// @param result is modified, not overridden. Caller must pass in a
562     /// default-constructed (empty) MeasureUnitImpl instance.
parseImpl(MeasureUnitImpl & result,UErrorCode & status)563     void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
564         if (U_FAILURE(status)) {
565             return;
566         }
567         if (fSource.empty()) {
568             // The dimenionless unit: nothing to parse. leave result as is.
569             return;
570         }
571         int32_t unitNum = 0;
572         while (hasNext()) {
573             bool sawAnd = false;
574             SingleUnitImpl singleUnit;
575             nextSingleUnit(singleUnit, sawAnd, status);
576             if (U_FAILURE(status)) {
577                 return;
578             }
579             U_ASSERT(!singleUnit.isDimensionless());
580             bool added = result.append(singleUnit, status);
581             if (sawAnd && !added) {
582                 // Two similar units are not allowed in a mixed unit
583                 status = kUnitIdentifierSyntaxError;
584                 return;
585             }
586             if ((++unitNum) >= 2) {
587                 // nextSingleUnit fails appropriately for "per" and "and" in the
588                 // same identifier. It doesn't fail for other compound units
589                 // (COMPOUND_PART_TIMES). Consequently we take care of that
590                 // here.
591                 UMeasureUnitComplexity complexity =
592                     sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
593                 if (unitNum == 2) {
594                     U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
595                     result.complexity = complexity;
596                 } else if (result.complexity != complexity) {
597                     // Can't have mixed compound units
598                     status = kUnitIdentifierSyntaxError;
599                     return;
600                 }
601             }
602         }
603     }
604 };
605 
606 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)607 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
608     auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
609     auto realRight = static_cast<const SingleUnitImpl* const*>(right);
610     return (*realLeft)->compareTo(**realRight);
611 }
612 
613 /**
614  * Generate the identifier string for a single unit in place.
615  *
616  * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
617  * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
618  *
619  * @param first If singleUnit is part of a compound unit, and not its first
620  * single unit, set this to false. Otherwise: set to true.
621  */
serializeSingle(const SingleUnitImpl & singleUnit,bool first,CharString & output,UErrorCode & status)622 void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
623     if (first && singleUnit.dimensionality < 0) {
624         // Essentially the "unary per". For compound units with a numerator, the
625         // caller takes care of the "binary per".
626         output.append("per-", status);
627     }
628 
629     if (singleUnit.isDimensionless()) {
630         status = U_INTERNAL_PROGRAM_ERROR;
631         return;
632     }
633     int8_t posPower = std::abs(singleUnit.dimensionality);
634     if (posPower == 0) {
635         status = U_INTERNAL_PROGRAM_ERROR;
636     } else if (posPower == 1) {
637         // no-op
638     } else if (posPower == 2) {
639         output.append("square-", status);
640     } else if (posPower == 3) {
641         output.append("cubic-", status);
642     } else if (posPower < 10) {
643         output.append('p', status);
644         output.append(posPower + '0', status);
645         output.append('-', status);
646     } else if (posPower <= 15) {
647         output.append("p1", status);
648         output.append('0' + (posPower % 10), status);
649         output.append('-', status);
650     } else {
651         status = kUnitIdentifierSyntaxError;
652     }
653     if (U_FAILURE(status)) {
654         return;
655     }
656 
657     if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
658         for (const auto& siPrefixInfo : gSIPrefixStrings) {
659             if (siPrefixInfo.value == singleUnit.siPrefix) {
660                 output.append(siPrefixInfo.string, status);
661                 break;
662             }
663         }
664     }
665     if (U_FAILURE(status)) {
666         return;
667     }
668 
669     output.appendInvariantChars(gSimpleUnits[singleUnit.index], status);
670 }
671 
672 /**
673  * Normalize a MeasureUnitImpl and generate the identifier string in place.
674  */
serialize(MeasureUnitImpl & impl,UErrorCode & status)675 void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
676     if (U_FAILURE(status)) {
677         return;
678     }
679     U_ASSERT(impl.identifier.isEmpty());
680     if (impl.units.length() == 0) {
681         // Dimensionless, constructed by the default constructor: no appending
682         // to impl.identifier, we wish it to contain the zero-length string.
683         return;
684     }
685     if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
686         // Note: don't sort a MIXED unit
687         uprv_sortArray(
688             impl.units.getAlias(),
689             impl.units.length(),
690             sizeof(impl.units[0]),
691             compareSingleUnits,
692             nullptr,
693             false,
694             &status);
695         if (U_FAILURE(status)) {
696             return;
697         }
698     }
699     serializeSingle(*impl.units[0], true, impl.identifier, status);
700     if (impl.units.length() == 1) {
701         return;
702     }
703     for (int32_t i = 1; i < impl.units.length(); i++) {
704         const SingleUnitImpl& prev = *impl.units[i-1];
705         const SingleUnitImpl& curr = *impl.units[i];
706         if (impl.complexity == UMEASURE_UNIT_MIXED) {
707             impl.identifier.append("-and-", status);
708             serializeSingle(curr, true, impl.identifier, status);
709         } else {
710             if (prev.dimensionality > 0 && curr.dimensionality < 0) {
711                 impl.identifier.append("-per-", status);
712             } else {
713                 impl.identifier.append('-', status);
714             }
715             serializeSingle(curr, false, impl.identifier, status);
716         }
717     }
718 
719 }
720 
721 /**
722  * Appends a SingleUnitImpl to a MeasureUnitImpl.
723  *
724  * @return true if a new item was added. If unit is the dimensionless unit, it
725  * is never added: the return value will always be false.
726  */
appendImpl(MeasureUnitImpl & impl,const SingleUnitImpl & unit,UErrorCode & status)727 bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
728     if (unit.isDimensionless()) {
729         // We don't append dimensionless units.
730         return false;
731     }
732     // Find a similar unit that already exists, to attempt to coalesce
733     SingleUnitImpl* oldUnit = nullptr;
734     for (int32_t i = 0; i < impl.units.length(); i++) {
735         auto* candidate = impl.units[i];
736         if (candidate->isCompatibleWith(unit)) {
737             oldUnit = candidate;
738         }
739     }
740     if (oldUnit) {
741         // Both dimensionalities will be positive, or both will be negative, by
742         // virtue of isCompatibleWith().
743         oldUnit->dimensionality += unit.dimensionality;
744     } else {
745         SingleUnitImpl* destination = impl.units.emplaceBack();
746         if (!destination) {
747             status = U_MEMORY_ALLOCATION_ERROR;
748             return false;
749         }
750         *destination = unit;
751     }
752     return (oldUnit == nullptr);
753 }
754 
755 } // namespace
756 
757 
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)758 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
759     MeasureUnitImpl temp;
760     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
761     if (U_FAILURE(status)) {
762         return {};
763     }
764     if (impl.units.length() == 0) {
765         return {};
766     }
767     if (impl.units.length() == 1) {
768         return *impl.units[0];
769     }
770     status = U_ILLEGAL_ARGUMENT_ERROR;
771     return {};
772 }
773 
build(UErrorCode & status) const774 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
775     MeasureUnitImpl temp;
776     temp.append(*this, status);
777     return std::move(temp).build(status);
778 }
779 
780 
forIdentifier(StringPiece identifier,UErrorCode & status)781 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
782     return Parser::from(identifier, status).parse(status);
783 }
784 
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)785 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
786         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
787     if (measureUnit.fImpl) {
788         return *measureUnit.fImpl;
789     } else {
790         memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
791         return memory;
792     }
793 }
794 
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)795 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
796         const MeasureUnit& measureUnit, UErrorCode& status) {
797     if (measureUnit.fImpl) {
798         return measureUnit.fImpl->copy(status);
799     } else {
800         return Parser::from(measureUnit.getIdentifier(), status).parse(status);
801     }
802 }
803 
takeReciprocal(UErrorCode &)804 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
805     identifier.clear();
806     for (int32_t i = 0; i < units.length(); i++) {
807         units[i]->dimensionality *= -1;
808     }
809 }
810 
append(const SingleUnitImpl & singleUnit,UErrorCode & status)811 bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
812     identifier.clear();
813     return appendImpl(*this, singleUnit, status);
814 }
815 
build(UErrorCode & status)816 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
817     serialize(*this, status);
818     return MeasureUnit(std::move(*this));
819 }
820 
821 
forIdentifier(StringPiece identifier,UErrorCode & status)822 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
823     return Parser::from(identifier, status).parse(status).build(status);
824 }
825 
getComplexity(UErrorCode & status) const826 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
827     MeasureUnitImpl temp;
828     return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
829 }
830 
getSIPrefix(UErrorCode & status) const831 UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
832     return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
833 }
834 
withSIPrefix(UMeasureSIPrefix prefix,UErrorCode & status) const835 MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
836     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
837     singleUnit.siPrefix = prefix;
838     return singleUnit.build(status);
839 }
840 
getDimensionality(UErrorCode & status) const841 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
842     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
843     if (U_FAILURE(status)) { return 0; }
844     if (singleUnit.isDimensionless()) {
845         return 0;
846     }
847     return singleUnit.dimensionality;
848 }
849 
withDimensionality(int32_t dimensionality,UErrorCode & status) const850 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
851     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
852     singleUnit.dimensionality = dimensionality;
853     return singleUnit.build(status);
854 }
855 
reciprocal(UErrorCode & status) const856 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
857     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
858     impl.takeReciprocal(status);
859     return std::move(impl).build(status);
860 }
861 
product(const MeasureUnit & other,UErrorCode & status) const862 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
863     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
864     MeasureUnitImpl temp;
865     const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
866     if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
867         status = U_ILLEGAL_ARGUMENT_ERROR;
868         return {};
869     }
870     for (int32_t i = 0; i < otherImpl.units.length(); i++) {
871         impl.append(*otherImpl.units[i], status);
872     }
873     if (impl.units.length() > 1) {
874         impl.complexity = UMEASURE_UNIT_COMPOUND;
875     }
876     return std::move(impl).build(status);
877 }
878 
splitToSingleUnits(int32_t & outCount,UErrorCode & status) const879 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnits(int32_t& outCount, UErrorCode& status) const {
880     MeasureUnitImpl temp;
881     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
882     outCount = impl.units.length();
883     MeasureUnit* arr = new MeasureUnit[outCount];
884     for (int32_t i = 0; i < outCount; i++) {
885         arr[i] = impl.units[i]->build(status);
886     }
887     return LocalArray<MeasureUnit>(arr, status);
888 }
889 
890 
891 U_NAMESPACE_END
892 
893 #endif /* !UNCONFIG_NO_FORMATTING */
894