1 // © 2020 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #ifndef __MEASUNIT_IMPL_H__ 5 #define __MEASUNIT_IMPL_H__ 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_FORMATTING 10 11 #include "unicode/measunit.h" 12 #include "cmemory.h" 13 #include "charstr.h" 14 15 U_NAMESPACE_BEGIN 16 17 namespace number { 18 namespace impl { 19 class LongNameHandler; 20 } 21 } // namespace number 22 23 static const char16_t kDefaultCurrency[] = u"XXX"; 24 static const char kDefaultCurrency8[] = "XXX"; 25 26 /** 27 * Looks up the "unitQuantity" (aka "type" or "category") of a base unit 28 * identifier. The category is returned via `result`, which must initially be 29 * empty. 30 * 31 * This only supports base units: other units must be resolved to base units 32 * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be 33 * returned. 34 * 35 * Categories are found in `unitQuantities` in the `units` resource (see 36 * `units.txt`). 37 */ 38 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class. 39 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status); 40 41 /** 42 * A struct representing a single unit (optional SI or binary prefix, and dimensionality). 43 */ 44 struct U_I18N_API SingleUnitImpl : public UMemory { 45 /** 46 * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error 47 * code and returns the base dimensionless unit. Parses if necessary. 48 */ 49 static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status); 50 51 /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */ 52 MeasureUnit build(UErrorCode& status) const; 53 54 /** 55 * Returns the "simple unit ID", without SI or dimensionality prefix: this 56 * instance may represent a square-kilometer, but only "meter" will be 57 * returned. 58 * 59 * The returned pointer points at memory that exists for the duration of the 60 * program's running. 61 */ 62 const char *getSimpleUnitID() const; 63 64 /** 65 * Generates and append a neutral identifier string for a single unit which means we do not include 66 * the dimension signal. 67 */ 68 void appendNeutralIdentifier(CharString &result, UErrorCode &status) const; 69 70 /** 71 * Returns the index of this unit's "quantity" in unitQuantities (in 72 * measunit_extra.cpp). The value of this index determines sort order for 73 * normalization of unit identifiers. 74 */ 75 int32_t getUnitCategoryIndex() const; 76 77 /** 78 * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of 79 * sorting and coalescing. 80 * 81 * Sort order of units is specified by UTS #35 82 * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization). 83 * 84 * Takes the sign of dimensionality into account, but not the absolute 85 * value: per-meter is not considered the same as meter, but meter is 86 * considered the same as square-meter. 87 * 88 * The dimensionless unit generally does not get compared, but if it did, it 89 * would sort before other units by virtue of index being < 0 and 90 * dimensionality not being negative. 91 */ compareToSingleUnitImpl92 int32_t compareTo(const SingleUnitImpl& other) const { 93 if (dimensionality < 0 && other.dimensionality > 0) { 94 // Positive dimensions first 95 return 1; 96 } 97 if (dimensionality > 0 && other.dimensionality < 0) { 98 return -1; 99 } 100 101 // Sort by official quantity order 102 int32_t thisQuantity = this->getUnitCategoryIndex(); 103 int32_t otherQuantity = other.getUnitCategoryIndex(); 104 if (thisQuantity < otherQuantity) { 105 return -1; 106 } 107 if (thisQuantity > otherQuantity) { 108 return 1; 109 } 110 111 // If quantity order didn't help, then we go by index. 112 if (index < other.index) { 113 return -1; 114 } 115 if (index > other.index) { 116 return 1; 117 } 118 119 // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can 120 // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can 121 // compare the bases. 122 // NOTE: this methodology will fail if the binary prefix more than or equal 98. 123 int32_t unitBase = umeas_getPrefixBase(unitPrefix); 124 int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix); 125 126 // Values for comparison purposes only. 127 int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3 128 : umeas_getPrefixPower(unitPrefix); 129 int32_t otherUnitPower = 130 otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3 131 : umeas_getPrefixPower(other.unitPrefix); 132 133 // NOTE: if the unitPower is less than the other, 134 // we return 1 not -1. Thus because we want th sorting order 135 // for the bigger prefix to be before the smaller. 136 // Example: megabyte should come before kilobyte. 137 if (unitPower < otherUnitPower) { 138 return 1; 139 } 140 if (unitPower > otherUnitPower) { 141 return -1; 142 } 143 144 if (unitBase < otherUnitBase) { 145 return 1; 146 } 147 if (unitBase > otherUnitBase) { 148 return -1; 149 } 150 151 return 0; 152 } 153 154 /** 155 * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing. 156 * 157 * Units with the same base unit and SI or binary prefix should match, except that they must also 158 * have the same dimensionality sign, such that we don't merge numerator and denominator. 159 */ isCompatibleWithSingleUnitImpl160 bool isCompatibleWith(const SingleUnitImpl& other) const { 161 return (compareTo(other) == 0); 162 } 163 164 /** 165 * Returns true if this unit is the "dimensionless base unit", as produced 166 * by the MeasureUnit() default constructor. (This does not include the 167 * likes of concentrations or angles.) 168 */ isDimensionlessSingleUnitImpl169 bool isDimensionless() const { 170 return index == -1; 171 } 172 173 /** 174 * Simple unit index, unique for every simple unit, -1 for the dimensionless 175 * unit. This is an index into a string list in measunit_extra.cpp, as 176 * loaded by SimpleUnitIdentifiersSink. 177 * 178 * The default value is -1, meaning the dimensionless unit: 179 * isDimensionless() will return true, until index is changed. 180 */ 181 int32_t index = -1; 182 183 /** 184 * SI or binary prefix. 185 * 186 * This is ignored for the dimensionless unit. 187 */ 188 UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE; 189 190 /** 191 * Dimensionality. 192 * 193 * This is meaningless for the dimensionless unit. 194 */ 195 int32_t dimensionality = 1; 196 }; 197 198 // Forward declaration 199 struct MeasureUnitImplWithIndex; 200 201 // Export explicit template instantiations of MaybeStackArray, MemoryPool and 202 // MaybeStackVector. This is required when building DLLs for Windows. (See 203 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.) 204 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 205 template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>; 206 template class U_I18N_API MemoryPool<SingleUnitImpl, 8>; 207 template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>; 208 #endif 209 210 /** 211 * Internal representation of measurement units. Capable of representing all complexities of units, 212 * including mixed and compound units. 213 */ 214 class U_I18N_API MeasureUnitImpl : public UMemory { 215 public: 216 MeasureUnitImpl() = default; 217 MeasureUnitImpl(MeasureUnitImpl &&other) = default; 218 // No copy constructor, use MeasureUnitImpl::copy() to make it explicit. 219 MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete; 220 MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status); 221 222 MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default; 223 224 /** Extract the MeasureUnitImpl from a MeasureUnit. */ get(const MeasureUnit & measureUnit)225 static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) { 226 return measureUnit.fImpl; 227 } 228 229 /** 230 * Parse a unit identifier into a MeasureUnitImpl. 231 * 232 * @param identifier The unit identifier string. 233 * @param status Set if the identifier string is not valid. 234 * @return A newly parsed value object. Behaviour of this unit is 235 * unspecified if an error is returned via status. 236 */ 237 static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status); 238 239 /** 240 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 241 * 242 * @param measureUnit The source MeasureUnit. 243 * @param memory A place to write the new MeasureUnitImpl if parsing is required. 244 * @param status Set if an error occurs. 245 * @return A reference to either measureUnit.fImpl or memory. 246 */ 247 static const MeasureUnitImpl& forMeasureUnit( 248 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status); 249 250 /** 251 * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present. 252 * 253 * @param measureUnit The source MeasureUnit. 254 * @param status Set if an error occurs. 255 * @return A value object, either newly parsed or copied from measureUnit. 256 */ 257 static MeasureUnitImpl forMeasureUnitMaybeCopy( 258 const MeasureUnit& measureUnit, UErrorCode& status); 259 260 /** 261 * Used for currency units. 262 */ forCurrencyCode(StringPiece currencyCode)263 static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) { 264 MeasureUnitImpl result; 265 UErrorCode localStatus = U_ZERO_ERROR; 266 result.identifier.append(currencyCode, localStatus); 267 // localStatus is not expected to fail since currencyCode should be 3 chars long 268 return result; 269 } 270 271 /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */ 272 MeasureUnit build(UErrorCode& status) &&; 273 274 /** 275 * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit. 276 */ 277 MeasureUnitImpl copy(UErrorCode& status) const; 278 279 /** 280 * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices. 281 * For example: 282 * - if the `MeasureUnitImpl` is `foot-per-hour` 283 * it will return a list of 1 {(0, `foot-per-hour`)} 284 * - if the `MeasureUnitImpl` is `foot-and-inch` 285 * it will return a list of 2 {(0, `foot`), (1, `inch`)} 286 */ 287 MaybeStackVector<MeasureUnitImplWithIndex> 288 extractIndividualUnitsWithIndices(UErrorCode &status) const; 289 290 /** Mutates this MeasureUnitImpl to take the reciprocal. */ 291 void takeReciprocal(UErrorCode& status); 292 293 /** 294 * Returns a simplified version of the unit. 295 * NOTE: the simplification happen when there are two units equals in their base unit and their 296 * prefixes. 297 * 298 * Example 1: "square-meter-per-meter" --> "meter" 299 * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter" 300 */ 301 MeasureUnitImpl copyAndSimplify(UErrorCode &status) const; 302 303 /** 304 * Mutates this MeasureUnitImpl to append a single unit. 305 * 306 * @return true if a new item was added. If unit is the dimensionless unit, 307 * it is never added: the return value will always be false. 308 */ 309 bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status); 310 311 /** 312 * Normalizes a MeasureUnitImpl and generate the identifier string in place. 313 */ 314 void serialize(UErrorCode &status); 315 316 /** The complexity, either SINGLE, COMPOUND, or MIXED. */ 317 UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE; 318 319 /** 320 * The list of single units. These may be summed or multiplied, based on the 321 * value of the complexity field. 322 * 323 * The "dimensionless" unit (SingleUnitImpl default constructor) must not be 324 * added to this list. 325 */ 326 MaybeStackVector<SingleUnitImpl> singleUnits; 327 328 /** 329 * The full unit identifier. Owned by the MeasureUnitImpl. Empty if not computed. 330 */ 331 CharString identifier; 332 333 // For calling serialize 334 // TODO(icu-units#147): revisit serialization 335 friend class number::impl::LongNameHandler; 336 }; 337 338 struct U_I18N_API MeasureUnitImplWithIndex : public UMemory { 339 const int32_t index; 340 MeasureUnitImpl unitImpl; 341 // Makes a copy of unitImpl. MeasureUnitImplWithIndexMeasureUnitImplWithIndex342 MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status) 343 : index(index), unitImpl(unitImpl.copy(status)) { 344 } MeasureUnitImplWithIndexMeasureUnitImplWithIndex345 MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status) 346 : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) { 347 } 348 }; 349 350 // Export explicit template instantiations of MaybeStackArray, MemoryPool and 351 // MaybeStackVector. This is required when building DLLs for Windows. (See 352 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.) 353 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN 354 template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>; 355 template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>; 356 template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>; 357 358 // Export an explicit template instantiation of the LocalPointer that is used as a 359 // data member of MeasureUnitImpl. 360 // (When building DLLs for Windows this is required.) 361 #if defined(_MSC_VER) 362 // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!= 363 #pragma warning(push) 364 #pragma warning(disable : 4661) 365 #endif 366 template class U_I18N_API LocalPointerBase<MeasureUnitImpl>; 367 template class U_I18N_API LocalPointer<MeasureUnitImpl>; 368 #if defined(_MSC_VER) 369 #pragma warning(pop) 370 #endif 371 #endif 372 373 U_NAMESPACE_END 374 375 #endif /* #if !UCONFIG_NO_FORMATTING */ 376 #endif //__MEASUNIT_IMPL_H__ 377