1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_FORMATTING
10
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "measunit_impl.h"
19 #include "resource.h"
20 #include "uarrsort.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "unicode/bytestrie.h"
25 #include "unicode/bytestriebuilder.h"
26 #include "unicode/localpointer.h"
27 #include "unicode/measunit.h"
28 #include "unicode/stringpiece.h"
29 #include "unicode/stringtriebuilder.h"
30 #include "unicode/ures.h"
31 #include "unicode/ustringtrie.h"
32 #include "uresimp.h"
33 #include <cstdlib>
34
35 U_NAMESPACE_BEGIN
36
37
38 namespace {
39
40 // TODO: Propose a new error code for this?
41 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42
43 // Trie value offset for SI Prefixes. This is big enough to ensure we only
44 // insert positive integers into the trie.
45 constexpr int32_t kSIPrefixOffset = 64;
46
47 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
48 constexpr int32_t kCompoundPartOffset = 128;
49
50 enum CompoundPart {
51 // Represents "-per-"
52 COMPOUND_PART_PER = kCompoundPartOffset,
53 // Represents "-"
54 COMPOUND_PART_TIMES,
55 // Represents "-and-"
56 COMPOUND_PART_AND,
57 };
58
59 // Trie value offset for "per-".
60 constexpr int32_t kInitialCompoundPartOffset = 192;
61
62 enum InitialCompoundPart {
63 // Represents "per-", the only compound part that can appear at the start of
64 // an identifier.
65 INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
66 };
67
68 // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
69 constexpr int32_t kPowerPartOffset = 256;
70
71 enum PowerPart {
72 POWER_PART_P2 = kPowerPartOffset + 2,
73 POWER_PART_P3,
74 POWER_PART_P4,
75 POWER_PART_P5,
76 POWER_PART_P6,
77 POWER_PART_P7,
78 POWER_PART_P8,
79 POWER_PART_P9,
80 POWER_PART_P10,
81 POWER_PART_P11,
82 POWER_PART_P12,
83 POWER_PART_P13,
84 POWER_PART_P14,
85 POWER_PART_P15,
86 };
87
88 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
89 // "fluid-ounce-imperial".
90 constexpr int32_t kSimpleUnitOffset = 512;
91
92 const struct SIPrefixStrings {
93 const char* const string;
94 UMeasureSIPrefix value;
95 } gSIPrefixStrings[] = {
96 { "yotta", UMEASURE_SI_PREFIX_YOTTA },
97 { "zetta", UMEASURE_SI_PREFIX_ZETTA },
98 { "exa", UMEASURE_SI_PREFIX_EXA },
99 { "peta", UMEASURE_SI_PREFIX_PETA },
100 { "tera", UMEASURE_SI_PREFIX_TERA },
101 { "giga", UMEASURE_SI_PREFIX_GIGA },
102 { "mega", UMEASURE_SI_PREFIX_MEGA },
103 { "kilo", UMEASURE_SI_PREFIX_KILO },
104 { "hecto", UMEASURE_SI_PREFIX_HECTO },
105 { "deka", UMEASURE_SI_PREFIX_DEKA },
106 { "deci", UMEASURE_SI_PREFIX_DECI },
107 { "centi", UMEASURE_SI_PREFIX_CENTI },
108 { "milli", UMEASURE_SI_PREFIX_MILLI },
109 { "micro", UMEASURE_SI_PREFIX_MICRO },
110 { "nano", UMEASURE_SI_PREFIX_NANO },
111 { "pico", UMEASURE_SI_PREFIX_PICO },
112 { "femto", UMEASURE_SI_PREFIX_FEMTO },
113 { "atto", UMEASURE_SI_PREFIX_ATTO },
114 { "zepto", UMEASURE_SI_PREFIX_ZEPTO },
115 { "yocto", UMEASURE_SI_PREFIX_YOCTO },
116 };
117
118 /**
119 * A ResourceSink that collects simple unit identifiers from the keys of the
120 * convertUnits table into an array, and adds these values to a TrieBuilder,
121 * with associated values being their index into this array plus a specified
122 * offset, to a trie.
123 *
124 * Example code:
125 *
126 * UErrorCode status = U_ZERO_ERROR;
127 * BytesTrieBuilder b(status);
128 * const char *unitIdentifiers[200];
129 * SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset);
130 * LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
131 * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
132 */
133 class SimpleUnitIdentifiersSink : public icu::ResourceSink {
134 public:
135 /**
136 * Constructor.
137 * @param out Array of char* to which the simple unit identifiers will be
138 * saved.
139 * @param outSize The size of `out`.
140 * @param trieBuilder The trie builder to which the simple unit identifier
141 * should be added. The trie builder must outlive this resource sink.
142 * @param trieValueOffset This is added to the index of the identifier in
143 * the `out` array, before adding to `trieBuilder` as the value
144 * associated with the identifier.
145 */
SimpleUnitIdentifiersSink(const char ** out,int32_t outSize,BytesTrieBuilder & trieBuilder,int32_t trieValueOffset)146 explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder,
147 int32_t trieValueOffset)
148 : outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset),
149 outIndex(0) {
150 }
151
152 /**
153 * Adds the table keys found in value to the output vector.
154 * @param key The key of the resource passed to `value`: the second
155 * parameter of the ures_getAllItemsWithFallback() call.
156 * @param value Should be a ResourceTable value, if
157 * ures_getAllItemsWithFallback() was called correctly for this sink.
158 * @param noFallback Ignored.
159 * @param status The standard ICU error code output parameter.
160 */
put(const char *,ResourceValue & value,UBool,UErrorCode & status)161 void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
162 ResourceTable table = value.getTable(status);
163 if (U_FAILURE(status)) return;
164
165 if (outIndex + table.getSize() > outSize) {
166 status = U_INDEX_OUTOFBOUNDS_ERROR;
167 return;
168 }
169
170 // Collect keys from the table resource.
171 const char *key;
172 for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) {
173 U_ASSERT(i < table.getSize());
174 U_ASSERT(outIndex < outSize);
175 if (uprv_strcmp(key, "kilogram") == 0) {
176 // For parsing, we use "gram", the prefixless metric mass unit. We
177 // thus ignore the SI Base Unit of Mass: it exists due to being the
178 // mass conversion target unit, but not needed for MeasureUnit
179 // parsing.
180 continue;
181 }
182 outArray[outIndex] = key;
183 trieBuilder.add(key, trieValueOffset + outIndex, status);
184 outIndex++;
185 }
186 }
187
188 private:
189 const char **outArray;
190 int32_t outSize;
191 BytesTrieBuilder &trieBuilder;
192 int32_t trieValueOffset;
193
194 int32_t outIndex;
195 };
196
197 icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;
198
199 // Array of simple unit IDs.
200 //
201 // The array memory itself is owned by this pointer, but the individual char* in
202 // that array point at static memory. (Note that these char* are also returned
203 // by SingleUnitImpl::getSimpleUnitID().)
204 const char **gSimpleUnits = nullptr;
205
206 char *gSerializedUnitExtrasStemTrie = nullptr;
207
cleanupUnitExtras()208 UBool U_CALLCONV cleanupUnitExtras() {
209 uprv_free(gSerializedUnitExtrasStemTrie);
210 gSerializedUnitExtrasStemTrie = nullptr;
211 uprv_free(gSimpleUnits);
212 gSimpleUnits = nullptr;
213 gUnitExtrasInitOnce.reset();
214 return TRUE;
215 }
216
initUnitExtras(UErrorCode & status)217 void U_CALLCONV initUnitExtras(UErrorCode& status) {
218 ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
219
220 BytesTrieBuilder b(status);
221 if (U_FAILURE(status)) { return; }
222
223 // Add SI prefixes
224 for (const auto& siPrefixInfo : gSIPrefixStrings) {
225 b.add(siPrefixInfo.string, siPrefixInfo.value + kSIPrefixOffset, status);
226 }
227 if (U_FAILURE(status)) { return; }
228
229 // Add syntax parts (compound, power prefixes)
230 b.add("-per-", COMPOUND_PART_PER, status);
231 b.add("-", COMPOUND_PART_TIMES, status);
232 b.add("-and-", COMPOUND_PART_AND, status);
233 b.add("per-", INITIAL_COMPOUND_PART_PER, status);
234 b.add("square-", POWER_PART_P2, status);
235 b.add("cubic-", POWER_PART_P3, status);
236 b.add("pow2-", POWER_PART_P2, status);
237 b.add("pow3-", POWER_PART_P3, status);
238 b.add("pow4-", POWER_PART_P4, status);
239 b.add("pow5-", POWER_PART_P5, status);
240 b.add("pow6-", POWER_PART_P6, status);
241 b.add("pow7-", POWER_PART_P7, status);
242 b.add("pow8-", POWER_PART_P8, status);
243 b.add("pow9-", POWER_PART_P9, status);
244 b.add("pow10-", POWER_PART_P10, status);
245 b.add("pow11-", POWER_PART_P11, status);
246 b.add("pow12-", POWER_PART_P12, status);
247 b.add("pow13-", POWER_PART_P13, status);
248 b.add("pow14-", POWER_PART_P14, status);
249 b.add("pow15-", POWER_PART_P15, status);
250 if (U_FAILURE(status)) { return; }
251
252 // Add sanctioned simple units by offset: simple units all have entries in
253 // units/convertUnits resources.
254 // TODO(ICU-21059): confirm whether this is clean enough, or whether we need to
255 // filter units' validity list instead.
256 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
257 LocalUResourceBundlePointer convertUnits(
258 ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status));
259 if (U_FAILURE(status)) { return; }
260
261 // Allocate enough space: with identifierSink below skipping kilogram, we're
262 // probably allocating one more than needed.
263 int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
264 int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
265 gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
266 if (gSimpleUnits == nullptr) {
267 status = U_MEMORY_ALLOCATION_ERROR;
268 return;
269 }
270 uprv_memset(gSimpleUnits, 0, arrayMallocSize);
271
272 // Populate gSimpleUnits and build the associated trie.
273 SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset);
274 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
275
276 // Build the CharsTrie
277 // TODO: Use SLOW or FAST here?
278 StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
279 if (U_FAILURE(status)) { return; }
280
281 // Copy the result into the global constant pointer
282 size_t numBytes = result.length();
283 gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
284 if (gSerializedUnitExtrasStemTrie == nullptr) {
285 status = U_MEMORY_ALLOCATION_ERROR;
286 return;
287 }
288 uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
289 }
290
291 class Token {
292 public:
Token(int32_t match)293 Token(int32_t match) : fMatch(match) {}
294
295 enum Type {
296 TYPE_UNDEFINED,
297 TYPE_SI_PREFIX,
298 // Token type for "-per-", "-", and "-and-".
299 TYPE_COMPOUND_PART,
300 // Token type for "per-".
301 TYPE_INITIAL_COMPOUND_PART,
302 TYPE_POWER_PART,
303 TYPE_SIMPLE_UNIT,
304 };
305
306 // Calling getType() is invalid, resulting in an assertion failure, if Token
307 // value isn't positive.
getType() const308 Type getType() const {
309 U_ASSERT(fMatch > 0);
310 if (fMatch < kCompoundPartOffset) {
311 return TYPE_SI_PREFIX;
312 }
313 if (fMatch < kInitialCompoundPartOffset) {
314 return TYPE_COMPOUND_PART;
315 }
316 if (fMatch < kPowerPartOffset) {
317 return TYPE_INITIAL_COMPOUND_PART;
318 }
319 if (fMatch < kSimpleUnitOffset) {
320 return TYPE_POWER_PART;
321 }
322 return TYPE_SIMPLE_UNIT;
323 }
324
getSIPrefix() const325 UMeasureSIPrefix getSIPrefix() const {
326 U_ASSERT(getType() == TYPE_SI_PREFIX);
327 return static_cast<UMeasureSIPrefix>(fMatch - kSIPrefixOffset);
328 }
329
330 // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const331 int32_t getMatch() const {
332 U_ASSERT(getType() == TYPE_COMPOUND_PART);
333 return fMatch;
334 }
335
getInitialCompoundPart() const336 int32_t getInitialCompoundPart() const {
337 // Even if there is only one InitialCompoundPart value, we have this
338 // function for the simplicity of code consistency.
339 U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
340 // Defensive: if this assert fails, code using this function also needs
341 // to change.
342 U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
343 return fMatch;
344 }
345
getPower() const346 int8_t getPower() const {
347 U_ASSERT(getType() == TYPE_POWER_PART);
348 return static_cast<int8_t>(fMatch - kPowerPartOffset);
349 }
350
getSimpleUnitIndex() const351 int32_t getSimpleUnitIndex() const {
352 U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
353 return fMatch - kSimpleUnitOffset;
354 }
355
356 private:
357 int32_t fMatch;
358 };
359
360 class Parser {
361 public:
362 /**
363 * Factory function for parsing the given identifier.
364 *
365 * @param source The identifier to parse. This function does not make a copy
366 * of source: the underlying string that source points at, must outlive the
367 * parser.
368 * @param status ICU error code.
369 */
from(StringPiece source,UErrorCode & status)370 static Parser from(StringPiece source, UErrorCode& status) {
371 if (U_FAILURE(status)) {
372 return Parser();
373 }
374 umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
375 if (U_FAILURE(status)) {
376 return Parser();
377 }
378 return Parser(source);
379 }
380
parse(UErrorCode & status)381 MeasureUnitImpl parse(UErrorCode& status) {
382 MeasureUnitImpl result;
383 parseImpl(result, status);
384 return result;
385 }
386
387 private:
388 // Tracks parser progress: the offset into fSource.
389 int32_t fIndex = 0;
390
391 // Since we're not owning this memory, whatever is passed to the constructor
392 // should live longer than this Parser - and the parser shouldn't return any
393 // references to that string.
394 StringPiece fSource;
395 BytesTrie fTrie;
396
397 // Set to true when we've seen a "-per-" or a "per-", after which all units
398 // are in the denominator. Until we find an "-and-", at which point the
399 // identifier is invalid pending TODO(CLDR-13700).
400 bool fAfterPer = false;
401
Parser()402 Parser() : fSource(""), fTrie(u"") {}
403
Parser(StringPiece source)404 Parser(StringPiece source)
405 : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
406
hasNext() const407 inline bool hasNext() const {
408 return fIndex < fSource.length();
409 }
410
411 // Returns the next Token parsed from fSource, advancing fIndex to the end
412 // of that token in fSource. In case of U_FAILURE(status), the token
413 // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)414 Token nextToken(UErrorCode& status) {
415 fTrie.reset();
416 int32_t match = -1;
417 // Saves the position in the fSource string for the end of the most
418 // recent matching token.
419 int32_t previ = -1;
420 // Find the longest token that matches a value in the trie:
421 while (fIndex < fSource.length()) {
422 auto result = fTrie.next(fSource.data()[fIndex++]);
423 if (result == USTRINGTRIE_NO_MATCH) {
424 break;
425 } else if (result == USTRINGTRIE_NO_VALUE) {
426 continue;
427 }
428 U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
429 match = fTrie.getValue();
430 previ = fIndex;
431 if (result == USTRINGTRIE_FINAL_VALUE) {
432 break;
433 }
434 U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
435 // continue;
436 }
437
438 if (match < 0) {
439 status = kUnitIdentifierSyntaxError;
440 } else {
441 fIndex = previ;
442 }
443 return Token(match);
444 }
445
446 /**
447 * Returns the next "single unit" via result.
448 *
449 * If a "-per-" was parsed, the result will have appropriate negative
450 * dimensionality.
451 *
452 * Returns an error if we parse both compound units and "-and-", since mixed
453 * compound units are not yet supported - TODO(CLDR-13700).
454 *
455 * @param result Will be overwritten by the result, if status shows success.
456 * @param sawAnd If an "-and-" was parsed prior to finding the "single
457 * unit", sawAnd is set to true. If not, it is left as is.
458 * @param status ICU error code.
459 */
nextSingleUnit(SingleUnitImpl & result,bool & sawAnd,UErrorCode & status)460 void nextSingleUnit(SingleUnitImpl& result, bool& sawAnd, UErrorCode& status) {
461 if (U_FAILURE(status)) {
462 return;
463 }
464
465 // state:
466 // 0 = no tokens seen yet (will accept power, SI prefix, or simple unit)
467 // 1 = power token seen (will not accept another power token)
468 // 2 = SI prefix token seen (will not accept a power or SI prefix token)
469 int32_t state = 0;
470
471 bool atStart = fIndex == 0;
472 Token token = nextToken(status);
473 if (U_FAILURE(status)) { return; }
474
475 if (atStart) {
476 // Identifiers optionally start with "per-".
477 if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
478 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
479 fAfterPer = true;
480 result.dimensionality = -1;
481
482 token = nextToken(status);
483 if (U_FAILURE(status)) { return; }
484 }
485 } else {
486 // All other SingleUnit's are separated from previous SingleUnit's
487 // via a compound part:
488 if (token.getType() != Token::TYPE_COMPOUND_PART) {
489 status = kUnitIdentifierSyntaxError;
490 return;
491 }
492
493 switch (token.getMatch()) {
494 case COMPOUND_PART_PER:
495 if (sawAnd) {
496 // Mixed compound units not yet supported,
497 // TODO(CLDR-13700).
498 status = kUnitIdentifierSyntaxError;
499 return;
500 }
501 fAfterPer = true;
502 result.dimensionality = -1;
503 break;
504
505 case COMPOUND_PART_TIMES:
506 if (fAfterPer) {
507 result.dimensionality = -1;
508 }
509 break;
510
511 case COMPOUND_PART_AND:
512 if (fAfterPer) {
513 // Can't start with "-and-", and mixed compound units
514 // not yet supported, TODO(CLDR-13700).
515 status = kUnitIdentifierSyntaxError;
516 return;
517 }
518 sawAnd = true;
519 break;
520 }
521
522 token = nextToken(status);
523 if (U_FAILURE(status)) { return; }
524 }
525
526 // Read tokens until we have a complete SingleUnit or we reach the end.
527 while (true) {
528 switch (token.getType()) {
529 case Token::TYPE_POWER_PART:
530 if (state > 0) {
531 status = kUnitIdentifierSyntaxError;
532 return;
533 }
534 result.dimensionality *= token.getPower();
535 state = 1;
536 break;
537
538 case Token::TYPE_SI_PREFIX:
539 if (state > 1) {
540 status = kUnitIdentifierSyntaxError;
541 return;
542 }
543 result.siPrefix = token.getSIPrefix();
544 state = 2;
545 break;
546
547 case Token::TYPE_SIMPLE_UNIT:
548 result.index = token.getSimpleUnitIndex();
549 return;
550
551 default:
552 status = kUnitIdentifierSyntaxError;
553 return;
554 }
555
556 if (!hasNext()) {
557 // We ran out of tokens before finding a complete single unit.
558 status = kUnitIdentifierSyntaxError;
559 return;
560 }
561 token = nextToken(status);
562 if (U_FAILURE(status)) {
563 return;
564 }
565 }
566 }
567
568 /// @param result is modified, not overridden. Caller must pass in a
569 /// default-constructed (empty) MeasureUnitImpl instance.
parseImpl(MeasureUnitImpl & result,UErrorCode & status)570 void parseImpl(MeasureUnitImpl& result, UErrorCode& status) {
571 if (U_FAILURE(status)) {
572 return;
573 }
574 if (fSource.empty()) {
575 // The dimenionless unit: nothing to parse. leave result as is.
576 return;
577 }
578 int32_t unitNum = 0;
579 while (hasNext()) {
580 bool sawAnd = false;
581 SingleUnitImpl singleUnit;
582 nextSingleUnit(singleUnit, sawAnd, status);
583 if (U_FAILURE(status)) {
584 return;
585 }
586 U_ASSERT(!singleUnit.isDimensionless());
587 bool added = result.append(singleUnit, status);
588 if (sawAnd && !added) {
589 // Two similar units are not allowed in a mixed unit
590 status = kUnitIdentifierSyntaxError;
591 return;
592 }
593 if ((++unitNum) >= 2) {
594 // nextSingleUnit fails appropriately for "per" and "and" in the
595 // same identifier. It doesn't fail for other compound units
596 // (COMPOUND_PART_TIMES). Consequently we take care of that
597 // here.
598 UMeasureUnitComplexity complexity =
599 sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
600 if (unitNum == 2) {
601 U_ASSERT(result.complexity == UMEASURE_UNIT_SINGLE);
602 result.complexity = complexity;
603 } else if (result.complexity != complexity) {
604 // Can't have mixed compound units
605 status = kUnitIdentifierSyntaxError;
606 return;
607 }
608 }
609 }
610 }
611 };
612
613 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)614 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
615 auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
616 auto realRight = static_cast<const SingleUnitImpl* const*>(right);
617 return (*realLeft)->compareTo(**realRight);
618 }
619
620 /**
621 * Generate the identifier string for a single unit in place.
622 *
623 * Does not support the dimensionless SingleUnitImpl: calling serializeSingle
624 * with the dimensionless unit results in an U_INTERNAL_PROGRAM_ERROR.
625 *
626 * @param first If singleUnit is part of a compound unit, and not its first
627 * single unit, set this to false. Otherwise: set to true.
628 */
serializeSingle(const SingleUnitImpl & singleUnit,bool first,CharString & output,UErrorCode & status)629 void serializeSingle(const SingleUnitImpl& singleUnit, bool first, CharString& output, UErrorCode& status) {
630 if (first && singleUnit.dimensionality < 0) {
631 // Essentially the "unary per". For compound units with a numerator, the
632 // caller takes care of the "binary per".
633 output.append("per-", status);
634 }
635
636 if (singleUnit.isDimensionless()) {
637 status = U_INTERNAL_PROGRAM_ERROR;
638 return;
639 }
640 int8_t posPower = std::abs(singleUnit.dimensionality);
641 if (posPower == 0) {
642 status = U_INTERNAL_PROGRAM_ERROR;
643 } else if (posPower == 1) {
644 // no-op
645 } else if (posPower == 2) {
646 output.append("square-", status);
647 } else if (posPower == 3) {
648 output.append("cubic-", status);
649 } else if (posPower < 10) {
650 output.append("pow", status);
651 output.append(posPower + '0', status);
652 output.append('-', status);
653 } else if (posPower <= 15) {
654 output.append("pow1", status);
655 output.append('0' + (posPower % 10), status);
656 output.append('-', status);
657 } else {
658 status = kUnitIdentifierSyntaxError;
659 }
660 if (U_FAILURE(status)) {
661 return;
662 }
663
664 if (singleUnit.siPrefix != UMEASURE_SI_PREFIX_ONE) {
665 for (const auto& siPrefixInfo : gSIPrefixStrings) {
666 if (siPrefixInfo.value == singleUnit.siPrefix) {
667 output.append(siPrefixInfo.string, status);
668 break;
669 }
670 }
671 }
672 if (U_FAILURE(status)) {
673 return;
674 }
675
676 output.append(singleUnit.getSimpleUnitID(), status);
677 }
678
679 /**
680 * Normalize a MeasureUnitImpl and generate the identifier string in place.
681 */
serialize(MeasureUnitImpl & impl,UErrorCode & status)682 void serialize(MeasureUnitImpl& impl, UErrorCode& status) {
683 if (U_FAILURE(status)) {
684 return;
685 }
686 U_ASSERT(impl.identifier.isEmpty());
687 if (impl.units.length() == 0) {
688 // Dimensionless, constructed by the default constructor: no appending
689 // to impl.identifier, we wish it to contain the zero-length string.
690 return;
691 }
692 if (impl.complexity == UMEASURE_UNIT_COMPOUND) {
693 // Note: don't sort a MIXED unit
694 uprv_sortArray(
695 impl.units.getAlias(),
696 impl.units.length(),
697 sizeof(impl.units[0]),
698 compareSingleUnits,
699 nullptr,
700 false,
701 &status);
702 if (U_FAILURE(status)) {
703 return;
704 }
705 }
706 serializeSingle(*impl.units[0], true, impl.identifier, status);
707 if (impl.units.length() == 1) {
708 return;
709 }
710 for (int32_t i = 1; i < impl.units.length(); i++) {
711 const SingleUnitImpl& prev = *impl.units[i-1];
712 const SingleUnitImpl& curr = *impl.units[i];
713 if (impl.complexity == UMEASURE_UNIT_MIXED) {
714 impl.identifier.append("-and-", status);
715 serializeSingle(curr, true, impl.identifier, status);
716 } else {
717 if (prev.dimensionality > 0 && curr.dimensionality < 0) {
718 impl.identifier.append("-per-", status);
719 } else {
720 impl.identifier.append('-', status);
721 }
722 serializeSingle(curr, false, impl.identifier, status);
723 }
724 }
725
726 }
727
728 /**
729 * Appends a SingleUnitImpl to a MeasureUnitImpl.
730 *
731 * @return true if a new item was added. If unit is the dimensionless unit, it
732 * is never added: the return value will always be false.
733 */
appendImpl(MeasureUnitImpl & impl,const SingleUnitImpl & unit,UErrorCode & status)734 bool appendImpl(MeasureUnitImpl& impl, const SingleUnitImpl& unit, UErrorCode& status) {
735 if (unit.isDimensionless()) {
736 // We don't append dimensionless units.
737 return false;
738 }
739 // Find a similar unit that already exists, to attempt to coalesce
740 SingleUnitImpl* oldUnit = nullptr;
741 for (int32_t i = 0; i < impl.units.length(); i++) {
742 auto* candidate = impl.units[i];
743 if (candidate->isCompatibleWith(unit)) {
744 oldUnit = candidate;
745 }
746 }
747 if (oldUnit) {
748 // Both dimensionalities will be positive, or both will be negative, by
749 // virtue of isCompatibleWith().
750 oldUnit->dimensionality += unit.dimensionality;
751 } else {
752 SingleUnitImpl* destination = impl.units.emplaceBack();
753 if (!destination) {
754 status = U_MEMORY_ALLOCATION_ERROR;
755 return false;
756 }
757 *destination = unit;
758 }
759 return (oldUnit == nullptr);
760 }
761
762 } // namespace
763
764
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)765 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
766 MeasureUnitImpl temp;
767 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
768 if (U_FAILURE(status)) {
769 return {};
770 }
771 if (impl.units.length() == 0) {
772 return {};
773 }
774 if (impl.units.length() == 1) {
775 return *impl.units[0];
776 }
777 status = U_ILLEGAL_ARGUMENT_ERROR;
778 return {};
779 }
780
build(UErrorCode & status) const781 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
782 MeasureUnitImpl temp;
783 temp.append(*this, status);
784 return std::move(temp).build(status);
785 }
786
getSimpleUnitID() const787 const char *SingleUnitImpl::getSimpleUnitID() const {
788 return gSimpleUnits[index];
789 }
790
MeasureUnitImpl(const MeasureUnitImpl & other,UErrorCode & status)791 MeasureUnitImpl::MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) {
792 *this = other.copy(status);
793 }
794
MeasureUnitImpl(const SingleUnitImpl & singleUnit,UErrorCode & status)795 MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
796 this->append(singleUnit, status);
797 }
798
forIdentifier(StringPiece identifier,UErrorCode & status)799 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
800 return Parser::from(identifier, status).parse(status);
801 }
802
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)803 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
804 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
805 if (measureUnit.fImpl) {
806 return *measureUnit.fImpl;
807 } else {
808 memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
809 return memory;
810 }
811 }
812
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)813 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
814 const MeasureUnit& measureUnit, UErrorCode& status) {
815 if (measureUnit.fImpl) {
816 return measureUnit.fImpl->copy(status);
817 } else {
818 return Parser::from(measureUnit.getIdentifier(), status).parse(status);
819 }
820 }
821
takeReciprocal(UErrorCode &)822 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
823 identifier.clear();
824 for (int32_t i = 0; i < units.length(); i++) {
825 units[i]->dimensionality *= -1;
826 }
827 }
828
append(const SingleUnitImpl & singleUnit,UErrorCode & status)829 bool MeasureUnitImpl::append(const SingleUnitImpl& singleUnit, UErrorCode& status) {
830 identifier.clear();
831 return appendImpl(*this, singleUnit, status);
832 }
833
extractIndividualUnits(UErrorCode & status) const834 MaybeStackVector<MeasureUnitImpl> MeasureUnitImpl::extractIndividualUnits(UErrorCode &status) const {
835 MaybeStackVector<MeasureUnitImpl> result;
836
837 if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
838 result.emplaceBackAndCheckErrorCode(status, *this, status);
839 return result;
840 }
841
842 for (int32_t i = 0; i < units.length(); i++) {
843 result.emplaceBackAndCheckErrorCode(status, *units[i], status);
844 }
845
846 return result;
847 }
848
build(UErrorCode & status)849 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
850 serialize(*this, status);
851 return MeasureUnit(std::move(*this));
852 }
853
forIdentifier(StringPiece identifier,UErrorCode & status)854 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
855 return Parser::from(identifier, status).parse(status).build(status);
856 }
857
getComplexity(UErrorCode & status) const858 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
859 MeasureUnitImpl temp;
860 return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
861 }
862
getSIPrefix(UErrorCode & status) const863 UMeasureSIPrefix MeasureUnit::getSIPrefix(UErrorCode& status) const {
864 return SingleUnitImpl::forMeasureUnit(*this, status).siPrefix;
865 }
866
withSIPrefix(UMeasureSIPrefix prefix,UErrorCode & status) const867 MeasureUnit MeasureUnit::withSIPrefix(UMeasureSIPrefix prefix, UErrorCode& status) const {
868 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
869 singleUnit.siPrefix = prefix;
870 return singleUnit.build(status);
871 }
872
getDimensionality(UErrorCode & status) const873 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
874 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
875 if (U_FAILURE(status)) { return 0; }
876 if (singleUnit.isDimensionless()) {
877 return 0;
878 }
879 return singleUnit.dimensionality;
880 }
881
withDimensionality(int32_t dimensionality,UErrorCode & status) const882 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
883 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
884 singleUnit.dimensionality = dimensionality;
885 return singleUnit.build(status);
886 }
887
reciprocal(UErrorCode & status) const888 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
889 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
890 impl.takeReciprocal(status);
891 return std::move(impl).build(status);
892 }
893
product(const MeasureUnit & other,UErrorCode & status) const894 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
895 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
896 MeasureUnitImpl temp;
897 const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
898 if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
899 status = U_ILLEGAL_ARGUMENT_ERROR;
900 return {};
901 }
902 for (int32_t i = 0; i < otherImpl.units.length(); i++) {
903 impl.append(*otherImpl.units[i], status);
904 }
905 if (impl.units.length() > 1) {
906 impl.complexity = UMEASURE_UNIT_COMPOUND;
907 }
908 return std::move(impl).build(status);
909 }
910
splitToSingleUnitsImpl(int32_t & outCount,UErrorCode & status) const911 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
912 MeasureUnitImpl temp;
913 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
914 outCount = impl.units.length();
915 MeasureUnit* arr = new MeasureUnit[outCount];
916 if (arr == nullptr) {
917 status = U_MEMORY_ALLOCATION_ERROR;
918 return LocalArray<MeasureUnit>();
919 }
920 for (int32_t i = 0; i < outCount; i++) {
921 arr[i] = impl.units[i]->build(status);
922 }
923 return LocalArray<MeasureUnit>(arr, status);
924 }
925
926
927 U_NAMESPACE_END
928
929 #endif /* !UNCONFIG_NO_FORMATTING */
930