• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #ifndef __MEASUNIT_IMPL_H__
5 #define __MEASUNIT_IMPL_H__
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 #include "unicode/measunit.h"
12 #include "cmemory.h"
13 #include "charstr.h"
14 
15 U_NAMESPACE_BEGIN
16 
17 namespace number {
18 namespace impl {
19 class LongNameHandler;
20 }
21 } // namespace number
22 
23 static const char16_t kDefaultCurrency[] = u"XXX";
24 static const char kDefaultCurrency8[] = "XXX";
25 
26 /**
27  * Looks up the "unitQuantity" (aka "type" or "category") of a base unit
28  * identifier. The category is returned via `result`, which must initially be
29  * empty.
30  *
31  * This only supports base units: other units must be resolved to base units
32  * before passing to this function, otherwise U_UNSUPPORTED_ERROR status may be
33  * returned.
34  *
35  * Categories are found in `unitQuantities` in the `units` resource (see
36  * `units.txt`).
37  */
38 // TODO: make this function accepts any `MeasureUnit` as Java and move it to the `UnitsData` class.
39 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status);
40 
41 /**
42  * A struct representing a single unit (optional SI or binary prefix, and dimensionality).
43  */
44 struct U_I18N_API SingleUnitImpl : public UMemory {
45     /**
46      * Gets a single unit from the MeasureUnit. If there are multiple single units, sets an error
47      * code and returns the base dimensionless unit. Parses if necessary.
48      */
49     static SingleUnitImpl forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status);
50 
51     /** Transform this SingleUnitImpl into a MeasureUnit, simplifying if possible. */
52     MeasureUnit build(UErrorCode& status) const;
53 
54     /**
55      * Returns the "simple unit ID", without SI or dimensionality prefix: this
56      * instance may represent a square-kilometer, but only "meter" will be
57      * returned.
58      *
59      * The returned pointer points at memory that exists for the duration of the
60      * program's running.
61      */
62     const char *getSimpleUnitID() const;
63 
64     /**
65      * Generates and append a neutral identifier string for a single unit which means we do not include
66      * the dimension signal.
67      */
68     void appendNeutralIdentifier(CharString &result, UErrorCode &status) const;
69 
70     /**
71      * Returns the index of this unit's "quantity" in unitQuantities (in
72      * measunit_extra.cpp). The value of this index determines sort order for
73      * normalization of unit identifiers.
74      */
75     int32_t getUnitCategoryIndex() const;
76 
77     /**
78      * Compare this SingleUnitImpl to another SingleUnitImpl for the sake of
79      * sorting and coalescing.
80      *
81      * Sort order of units is specified by UTS #35
82      * (https://unicode.org/reports/tr35/tr35-info.html#Unit_Identifier_Normalization).
83      *
84      * Takes the sign of dimensionality into account, but not the absolute
85      * value: per-meter is not considered the same as meter, but meter is
86      * considered the same as square-meter.
87      *
88      * The dimensionless unit generally does not get compared, but if it did, it
89      * would sort before other units by virtue of index being < 0 and
90      * dimensionality not being negative.
91      */
compareToSingleUnitImpl92     int32_t compareTo(const SingleUnitImpl& other) const {
93         if (dimensionality < 0 && other.dimensionality > 0) {
94             // Positive dimensions first
95             return 1;
96         }
97         if (dimensionality > 0 && other.dimensionality < 0) {
98             return -1;
99         }
100 
101         // Sort by official quantity order
102         int32_t thisQuantity = this->getUnitCategoryIndex();
103         int32_t otherQuantity = other.getUnitCategoryIndex();
104         if (thisQuantity < otherQuantity) {
105             return -1;
106         }
107         if (thisQuantity > otherQuantity) {
108             return 1;
109         }
110 
111         // If quantity order didn't help, then we go by index.
112         if (index < other.index) {
113             return -1;
114         }
115         if (index > other.index) {
116             return 1;
117         }
118 
119         // When comparing binary prefixes vs SI prefixes, instead of comparing the actual values, we can
120         // multiply the binary prefix power by 3 and compare the powers. if they are equal, we can can
121         // compare the bases.
122         // NOTE: this methodology will fail if the binary prefix more than or equal 98.
123         int32_t unitBase = umeas_getPrefixBase(unitPrefix);
124         int32_t otherUnitBase = umeas_getPrefixBase(other.unitPrefix);
125 
126         // Values for comparison purposes only.
127         int32_t unitPower = unitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(unitPrefix) * 3
128                                                                  : umeas_getPrefixPower(unitPrefix);
129         int32_t otherUnitPower =
130             otherUnitBase == 1024 /* Binary Prefix */ ? umeas_getPrefixPower(other.unitPrefix) * 3
131                                                       : umeas_getPrefixPower(other.unitPrefix);
132 
133         // NOTE: if the unitPower is less than the other,
134         // we return 1 not -1. Thus because we want th sorting order
135         // for the bigger prefix to be before the smaller.
136         // Example: megabyte should come before kilobyte.
137         if (unitPower < otherUnitPower) {
138             return 1;
139         }
140         if (unitPower > otherUnitPower) {
141             return -1;
142         }
143 
144         if (unitBase < otherUnitBase) {
145             return 1;
146         }
147         if (unitBase > otherUnitBase) {
148             return -1;
149         }
150 
151         return 0;
152     }
153 
154     /**
155      * Return whether this SingleUnitImpl is compatible with another for the purpose of coalescing.
156      *
157      * Units with the same base unit and SI or binary prefix should match, except that they must also
158      * have the same dimensionality sign, such that we don't merge numerator and denominator.
159      */
isCompatibleWithSingleUnitImpl160     bool isCompatibleWith(const SingleUnitImpl& other) const {
161         return (compareTo(other) == 0);
162     }
163 
164     /**
165      * Returns true if this unit is the "dimensionless base unit", as produced
166      * by the MeasureUnit() default constructor. (This does not include the
167      * likes of concentrations or angles.)
168      */
isDimensionlessSingleUnitImpl169     bool isDimensionless() const {
170         return index == -1;
171     }
172 
173     /**
174      * Simple unit index, unique for every simple unit, -1 for the dimensionless
175      * unit. This is an index into a string list in measunit_extra.cpp, as
176      * loaded by SimpleUnitIdentifiersSink.
177      *
178      * The default value is -1, meaning the dimensionless unit:
179      * isDimensionless() will return true, until index is changed.
180      */
181     int32_t index = -1;
182 
183     /**
184      * SI or binary prefix.
185      *
186      * This is ignored for the dimensionless unit.
187      */
188     UMeasurePrefix unitPrefix = UMEASURE_PREFIX_ONE;
189 
190     /**
191      * Dimensionality.
192      *
193      * This is meaningless for the dimensionless unit.
194      */
195     int32_t dimensionality = 1;
196 };
197 
198 // Forward declaration
199 struct MeasureUnitImplWithIndex;
200 
201 // Export explicit template instantiations of MaybeStackArray, MemoryPool and
202 // MaybeStackVector. This is required when building DLLs for Windows. (See
203 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
204 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
205 template class U_I18N_API MaybeStackArray<SingleUnitImpl *, 8>;
206 template class U_I18N_API MemoryPool<SingleUnitImpl, 8>;
207 template class U_I18N_API MaybeStackVector<SingleUnitImpl, 8>;
208 #endif
209 
210 /**
211  * Internal representation of measurement units. Capable of representing all complexities of units,
212  * including mixed and compound units.
213  */
214 class U_I18N_API MeasureUnitImpl : public UMemory {
215   public:
216     MeasureUnitImpl() = default;
217     MeasureUnitImpl(MeasureUnitImpl &&other) = default;
218     // No copy constructor, use MeasureUnitImpl::copy() to make it explicit.
219     MeasureUnitImpl(const MeasureUnitImpl &other, UErrorCode &status) = delete;
220     MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status);
221 
222     MeasureUnitImpl &operator=(MeasureUnitImpl &&other) noexcept = default;
223 
224     /** Extract the MeasureUnitImpl from a MeasureUnit. */
get(const MeasureUnit & measureUnit)225     static inline const MeasureUnitImpl *get(const MeasureUnit &measureUnit) {
226         return measureUnit.fImpl;
227     }
228 
229     /**
230      * Parse a unit identifier into a MeasureUnitImpl.
231      *
232      * @param identifier The unit identifier string.
233      * @param status Set if the identifier string is not valid.
234      * @return A newly parsed value object. Behaviour of this unit is
235      * unspecified if an error is returned via status.
236      */
237     static MeasureUnitImpl forIdentifier(StringPiece identifier, UErrorCode& status);
238 
239     /**
240      * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
241      *
242      * @param measureUnit The source MeasureUnit.
243      * @param memory A place to write the new MeasureUnitImpl if parsing is required.
244      * @param status Set if an error occurs.
245      * @return A reference to either measureUnit.fImpl or memory.
246      */
247     static const MeasureUnitImpl& forMeasureUnit(
248         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status);
249 
250     /**
251      * Extract the MeasureUnitImpl from a MeasureUnit, or parse if it is not present.
252      *
253      * @param measureUnit The source MeasureUnit.
254      * @param status Set if an error occurs.
255      * @return A value object, either newly parsed or copied from measureUnit.
256      */
257     static MeasureUnitImpl forMeasureUnitMaybeCopy(
258         const MeasureUnit& measureUnit, UErrorCode& status);
259 
260     /**
261      * Used for currency units.
262      */
forCurrencyCode(StringPiece currencyCode)263     static inline MeasureUnitImpl forCurrencyCode(StringPiece currencyCode) {
264         MeasureUnitImpl result;
265         UErrorCode localStatus = U_ZERO_ERROR;
266         result.identifier.append(currencyCode, localStatus);
267         // localStatus is not expected to fail since currencyCode should be 3 chars long
268         return result;
269     }
270 
271     /** Transform this MeasureUnitImpl into a MeasureUnit, simplifying if possible. */
272     MeasureUnit build(UErrorCode& status) &&;
273 
274     /**
275      * Create a copy of this MeasureUnitImpl. Don't use copy constructor to make this explicit.
276      */
277     MeasureUnitImpl copy(UErrorCode& status) const;
278 
279     /**
280      * Extracts the list of all the individual units inside the `MeasureUnitImpl` with their indices.
281      *      For example:
282      *          -   if the `MeasureUnitImpl` is `foot-per-hour`
283      *                  it will return a list of 1 {(0, `foot-per-hour`)}
284      *          -   if the `MeasureUnitImpl` is `foot-and-inch`
285      *                  it will return a list of 2 {(0, `foot`), (1, `inch`)}
286      */
287     MaybeStackVector<MeasureUnitImplWithIndex>
288     extractIndividualUnitsWithIndices(UErrorCode &status) const;
289 
290     /** Mutates this MeasureUnitImpl to take the reciprocal. */
291     void takeReciprocal(UErrorCode& status);
292 
293     /**
294      * Returns a simplified version of the unit.
295      * NOTE: the simplification happen when there are two units equals in their base unit and their
296      * prefixes.
297      *
298      * Example 1: "square-meter-per-meter" --> "meter"
299      * Example 2: "square-millimeter-per-meter" --> "square-millimeter-per-meter"
300      */
301     MeasureUnitImpl copyAndSimplify(UErrorCode &status) const;
302 
303     /**
304      * Mutates this MeasureUnitImpl to append a single unit.
305      *
306      * @return true if a new item was added. If unit is the dimensionless unit,
307      * it is never added: the return value will always be false.
308      */
309     bool appendSingleUnit(const SingleUnitImpl& singleUnit, UErrorCode& status);
310 
311     /**
312      * Normalizes a MeasureUnitImpl and generate the identifier string in place.
313      */
314     void serialize(UErrorCode &status);
315 
316     /** The complexity, either SINGLE, COMPOUND, or MIXED. */
317     UMeasureUnitComplexity complexity = UMEASURE_UNIT_SINGLE;
318 
319     /**
320      * The list of single units. These may be summed or multiplied, based on the
321      * value of the complexity field.
322      *
323      * The "dimensionless" unit (SingleUnitImpl default constructor) must not be
324      * added to this list.
325      */
326     MaybeStackVector<SingleUnitImpl> singleUnits;
327 
328     /**
329      * The full unit identifier.  Owned by the MeasureUnitImpl.  Empty if not computed.
330      */
331     CharString identifier;
332 
333     // For calling serialize
334     // TODO(icu-units#147): revisit serialization
335     friend class number::impl::LongNameHandler;
336 };
337 
338 struct U_I18N_API MeasureUnitImplWithIndex : public UMemory {
339     const int32_t index;
340     MeasureUnitImpl unitImpl;
341     // Makes a copy of unitImpl.
MeasureUnitImplWithIndexMeasureUnitImplWithIndex342     MeasureUnitImplWithIndex(int32_t index, const MeasureUnitImpl &unitImpl, UErrorCode &status)
343         : index(index), unitImpl(unitImpl.copy(status)) {
344     }
MeasureUnitImplWithIndexMeasureUnitImplWithIndex345     MeasureUnitImplWithIndex(int32_t index, const SingleUnitImpl &singleUnitImpl, UErrorCode &status)
346         : index(index), unitImpl(MeasureUnitImpl(singleUnitImpl, status)) {
347     }
348 };
349 
350 // Export explicit template instantiations of MaybeStackArray, MemoryPool and
351 // MaybeStackVector. This is required when building DLLs for Windows. (See
352 // datefmt.h, collationiterator.h, erarules.h and others for similar examples.)
353 #if U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
354 template class U_I18N_API MaybeStackArray<MeasureUnitImplWithIndex *, 8>;
355 template class U_I18N_API MemoryPool<MeasureUnitImplWithIndex, 8>;
356 template class U_I18N_API MaybeStackVector<MeasureUnitImplWithIndex, 8>;
357 
358 // Export an explicit template instantiation of the LocalPointer that is used as a
359 // data member of MeasureUnitImpl.
360 // (When building DLLs for Windows this is required.)
361 #if defined(_MSC_VER)
362 // Ignore warning 4661 as LocalPointerBase does not use operator== or operator!=
363 #pragma warning(push)
364 #pragma warning(disable : 4661)
365 #endif
366 template class U_I18N_API LocalPointerBase<MeasureUnitImpl>;
367 template class U_I18N_API LocalPointer<MeasureUnitImpl>;
368 #if defined(_MSC_VER)
369 #pragma warning(pop)
370 #endif
371 #endif
372 
373 U_NAMESPACE_END
374 
375 #endif /* #if !UCONFIG_NO_FORMATTING */
376 #endif //__MEASUNIT_IMPL_H__
377