1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "cstring.h"
9 #include "number_decimalquantity.h"
10 #include "resource.h"
11 #include "uassert.h"
12 #include "unicode/unistr.h"
13 #include "unicode/ures.h"
14 #include "units_data.h"
15 #include "uresimp.h"
16 #include "util.h"
17 #include <utility>
18
19 U_NAMESPACE_BEGIN
20 namespace units {
21
22 namespace {
23
24 using icu::number::impl::DecimalQuantity;
25
trimSpaces(CharString & factor,UErrorCode & status)26 void trimSpaces(CharString& factor, UErrorCode& status){
27 CharString trimmed;
28 for (int i = 0 ; i < factor.length(); i++) {
29 if (factor[i] == ' ') continue;
30
31 trimmed.append(factor[i], status);
32 }
33
34 factor = std::move(trimmed);
35 }
36
37 /**
38 * A ResourceSink that collects conversion rate information.
39 *
40 * This class is for use by ures_getAllItemsWithFallback.
41 */
42 class ConversionRateDataSink : public ResourceSink {
43 public:
44 /**
45 * Constructor.
46 * @param out The vector to which ConversionRateInfo instances are to be
47 * added. This vector must outlive the use of the ResourceSink.
48 */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)49 explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
50
51 /**
52 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
53 * conversion rates that are found in `value` to the output vector.
54 *
55 * @param source This string must be "convertUnits": the resource that this
56 * class supports reading.
57 * @param value The "convertUnits" resource, containing unit conversion rate
58 * information.
59 * @param noFallback Ignored.
60 * @param status The standard ICU error code output parameter.
61 */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)62 void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
63 if (U_FAILURE(status)) { return; }
64 if (uprv_strcmp(source, "convertUnits") != 0) {
65 // This is very strict, however it is the cheapest way to be sure
66 // that with `value`, we're looking at the convertUnits table.
67 status = U_ILLEGAL_ARGUMENT_ERROR;
68 return;
69 }
70 ResourceTable conversionRateTable = value.getTable(status);
71 const char *srcUnit;
72 // We're reusing `value`, which seems to be a common pattern:
73 for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
74 ResourceTable unitTable = value.getTable(status);
75 const char *key;
76 UnicodeString baseUnit = ICU_Utility::makeBogusString();
77 UnicodeString factor = ICU_Utility::makeBogusString();
78 UnicodeString offset = ICU_Utility::makeBogusString();
79 for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
80 if (uprv_strcmp(key, "target") == 0) {
81 baseUnit = value.getUnicodeString(status);
82 } else if (uprv_strcmp(key, "factor") == 0) {
83 factor = value.getUnicodeString(status);
84 } else if (uprv_strcmp(key, "offset") == 0) {
85 offset = value.getUnicodeString(status);
86 }
87 }
88 if (U_FAILURE(status)) { return; }
89 if (baseUnit.isBogus() || factor.isBogus()) {
90 // We could not find a usable conversion rate: bad resource.
91 status = U_MISSING_RESOURCE_ERROR;
92 return;
93 }
94
95 // We don't have this ConversionRateInfo yet: add it.
96 ConversionRateInfo *cr = outVector->emplaceBack();
97 if (!cr) {
98 status = U_MEMORY_ALLOCATION_ERROR;
99 return;
100 } else {
101 cr->sourceUnit.append(srcUnit, status);
102 cr->baseUnit.appendInvariantChars(baseUnit, status);
103 cr->factor.appendInvariantChars(factor, status);
104 trimSpaces(cr->factor, status);
105 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
106 }
107 }
108 return;
109 }
110
111 private:
112 MaybeStackVector<ConversionRateInfo> *outVector;
113 };
114
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)115 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
116 return a.compareTo(b) < 0;
117 }
118
119 /**
120 * A ResourceSink that collects unit preferences information.
121 *
122 * This class is for use by ures_getAllItemsWithFallback.
123 */
124 class UnitPreferencesSink : public ResourceSink {
125 public:
126 /**
127 * Constructor.
128 * @param outPrefs The vector to which UnitPreference instances are to be
129 * added. This vector must outlive the use of the ResourceSink.
130 * @param outMetadata The vector to which UnitPreferenceMetadata instances
131 * are to be added. This vector must outlive the use of the ResourceSink.
132 */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)133 explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
134 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
135 : preferences(outPrefs), metadata(outMetadata) {}
136
137 /**
138 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
139 * preferences info that are found in `value` to the output vector.
140 *
141 * @param source This string must be "unitPreferenceData": the resource that
142 * this class supports reading.
143 * @param value The "unitPreferenceData" resource, containing unit
144 * preferences data.
145 * @param noFallback Ignored.
146 * @param status The standard ICU error code output parameter. Note: if an
147 * error is returned, outPrefs and outMetadata may be inconsistent.
148 */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)149 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
150 if (U_FAILURE(status)) { return; }
151 if (uprv_strcmp(key, "unitPreferenceData") != 0) {
152 // This is very strict, however it is the cheapest way to be sure
153 // that with `value`, we're looking at the convertUnits table.
154 status = U_ILLEGAL_ARGUMENT_ERROR;
155 return;
156 }
157 // The unitPreferenceData structure (see data/misc/units.txt) contains a
158 // hierarchy of category/usage/region, within which are a set of
159 // preferences. Hence three for-loops and another loop for the
160 // preferences themselves:
161 ResourceTable unitPreferenceDataTable = value.getTable(status);
162 const char *category;
163 for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
164 ResourceTable categoryTable = value.getTable(status);
165 const char *usage;
166 for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
167 ResourceTable regionTable = value.getTable(status);
168 const char *region;
169 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
170 // `value` now contains the set of preferences for
171 // category/usage/region.
172 ResourceArray unitPrefs = value.getArray(status);
173 if (U_FAILURE(status)) { return; }
174 int32_t prefLen = unitPrefs.getSize();
175
176 // Update metadata for this set of preferences.
177 UnitPreferenceMetadata *meta = metadata->emplaceBack(
178 category, usage, region, preferences->length(), prefLen, status);
179 if (!meta) {
180 status = U_MEMORY_ALLOCATION_ERROR;
181 return;
182 }
183 if (U_FAILURE(status)) { return; }
184 if (metadata->length() > 1) {
185 // Verify that unit preferences are sorted and
186 // without duplicates.
187 if (!(*(*metadata)[metadata->length() - 2] <
188 *(*metadata)[metadata->length() - 1])) {
189 status = U_INVALID_FORMAT_ERROR;
190 return;
191 }
192 }
193
194 // Collect the individual preferences.
195 for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
196 UnitPreference *up = preferences->emplaceBack();
197 if (!up) {
198 status = U_MEMORY_ALLOCATION_ERROR;
199 return;
200 }
201 ResourceTable unitPref = value.getTable(status);
202 if (U_FAILURE(status)) { return; }
203 for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
204 if (uprv_strcmp(key, "unit") == 0) {
205 int32_t length;
206 const UChar *u = value.getString(length, status);
207 up->unit.appendInvariantChars(u, length, status);
208 } else if (uprv_strcmp(key, "geq") == 0) {
209 int32_t length;
210 const UChar *g = value.getString(length, status);
211 CharString geq;
212 geq.appendInvariantChars(g, length, status);
213 DecimalQuantity dq;
214 dq.setToDecNumber(geq.data(), status);
215 up->geq = dq.toDouble();
216 } else if (uprv_strcmp(key, "skeleton") == 0) {
217 up->skeleton = value.getUnicodeString(status);
218 }
219 }
220 }
221 }
222 }
223 }
224 }
225
226 private:
227 MaybeStackVector<UnitPreference> *preferences;
228 MaybeStackVector<UnitPreferenceMetadata> *metadata;
229 };
230
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)231 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
232 const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
233 bool *foundRegion, UErrorCode &status) {
234 if (U_FAILURE(status)) { return -1; }
235 int32_t start = 0;
236 int32_t end = metadata->length();
237 *foundCategory = false;
238 *foundUsage = false;
239 *foundRegion = false;
240 while (start < end) {
241 int32_t mid = (start + end) / 2;
242 int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
243 if (cmp < 0) {
244 start = mid + 1;
245 } else if (cmp > 0) {
246 end = mid;
247 } else {
248 return mid;
249 }
250 }
251 return -1;
252 }
253
254 /**
255 * Finds the UnitPreferenceMetadata instance that matches the given category,
256 * usage and region: if missing, region falls back to "001", and usage
257 * repeatedly drops tailing components, eventually trying "default"
258 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
259 *
260 * @param metadata The full list of UnitPreferenceMetadata instances.
261 * @param category The category to search for. See getUnitCategory().
262 * @param usage The usage for which formatting preferences is needed. If the
263 * given usage is not known, automatic fallback occurs, see function description
264 * above.
265 * @param region The region for which preferences are needed. If there are no
266 * region-specific preferences, this function automatically falls back to the
267 * "001" region (global).
268 * @param status The standard ICU error code output parameter.
269 * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
270 * * If fallback to "default" or "001" didn't resolve, status will be
271 * U_MISSING_RESOURCE.
272 * @return The index into the metadata vector which represents the appropriate
273 * preferences. If appropriate preferences are not found, -1 is returned.
274 */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)275 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
276 StringPiece category, StringPiece usage, StringPiece region,
277 UErrorCode &status) {
278 if (U_FAILURE(status)) { return -1; }
279 bool foundCategory, foundUsage, foundRegion;
280 UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
281 int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
282 if (U_FAILURE(status)) { return -1; }
283 if (idx >= 0) { return idx; }
284 if (!foundCategory) {
285 status = U_ILLEGAL_ARGUMENT_ERROR;
286 return -1;
287 }
288 U_ASSERT(foundCategory);
289 while (!foundUsage) {
290 int32_t lastDashIdx = desired.usage.lastIndexOf('-');
291 if (lastDashIdx > 0) {
292 desired.usage.truncate(lastDashIdx);
293 } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
294 desired.usage.truncate(0).append("default", status);
295 } else {
296 // "default" is not supposed to be missing for any valid category.
297 status = U_MISSING_RESOURCE_ERROR;
298 return -1;
299 }
300 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
301 if (U_FAILURE(status)) { return -1; }
302 }
303 U_ASSERT(foundCategory);
304 U_ASSERT(foundUsage);
305 if (!foundRegion) {
306 if (uprv_strcmp(desired.region.data(), "001") != 0) {
307 desired.region.truncate(0).append("001", status);
308 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
309 }
310 if (!foundRegion) {
311 // "001" is not supposed to be missing for any valid usage.
312 status = U_MISSING_RESOURCE_ERROR;
313 return -1;
314 }
315 }
316 U_ASSERT(foundCategory);
317 U_ASSERT(foundUsage);
318 U_ASSERT(foundRegion);
319 U_ASSERT(idx >= 0);
320 return idx;
321 }
322
323 } // namespace
324
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)325 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
326 StringPiece region, int32_t prefsOffset,
327 int32_t prefsCount, UErrorCode &status) {
328 this->category.append(category, status);
329 this->usage.append(usage, status);
330 this->region.append(region, status);
331 this->prefsOffset = prefsOffset;
332 this->prefsCount = prefsCount;
333 }
334
compareTo(const UnitPreferenceMetadata & other) const335 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
336 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
337 if (cmp == 0) {
338 cmp = uprv_strcmp(usage.data(), other.usage.data());
339 }
340 if (cmp == 0) {
341 cmp = uprv_strcmp(region.data(), other.region.data());
342 }
343 return cmp;
344 }
345
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const346 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
347 bool *foundUsage, bool *foundRegion) const {
348 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
349 if (cmp == 0) {
350 *foundCategory = true;
351 cmp = uprv_strcmp(usage.data(), other.usage.data());
352 }
353 if (cmp == 0) {
354 *foundUsage = true;
355 cmp = uprv_strcmp(region.data(), other.region.data());
356 }
357 if (cmp == 0) {
358 *foundRegion = true;
359 }
360 return cmp;
361 }
362
getUnitCategory(const char * baseUnitIdentifier,UErrorCode & status)363 CharString U_I18N_API getUnitCategory(const char *baseUnitIdentifier, UErrorCode &status) {
364 CharString result;
365 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
366 LocalUResourceBundlePointer unitQuantities(
367 ures_getByKey(unitsBundle.getAlias(), "unitQuantities", NULL, &status));
368 int32_t categoryLength;
369 if (U_FAILURE(status)) { return result; }
370 const UChar *uCategory =
371 ures_getStringByKey(unitQuantities.getAlias(), baseUnitIdentifier, &categoryLength, &status);
372 if (U_FAILURE(status)) {
373 // TODO(CLDR-13787,hugovdm): special-casing the consumption-inverse
374 // case. Once CLDR-13787 is clarified, this should be generalised (or
375 // possibly removed):
376 if (uprv_strcmp(baseUnitIdentifier, "meter-per-cubic-meter") == 0) {
377 status = U_ZERO_ERROR;
378 result.append("consumption-inverse", status);
379 return result;
380 }
381 }
382 result.appendInvariantChars(uCategory, categoryLength, status);
383 return result;
384 }
385
386 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)387 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
388 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
389 ConversionRateDataSink sink(&result);
390 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
391 }
392
extractConversionInfo(StringPiece source,UErrorCode & status) const393 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
394 UErrorCode &status) const {
395 for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
396 if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
397 }
398
399 status = U_INTERNAL_PROGRAM_ERROR;
400 return nullptr;
401 }
402
UnitPreferences(UErrorCode & status)403 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
404 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
405 UnitPreferencesSink sink(&unitPrefs_, &metadata_);
406 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
407 }
408
409 // TODO: make outPreferences const?
410 //
411 // TODO: consider replacing `UnitPreference **&outPreferences` with slice class
412 // of some kind.
getPreferencesFor(StringPiece category,StringPiece usage,StringPiece region,const UnitPreference * const * & outPreferences,int32_t & preferenceCount,UErrorCode & status) const413 void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
414 StringPiece region,
415 const UnitPreference *const *&outPreferences,
416 int32_t &preferenceCount, UErrorCode &status) const {
417 int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
418 if (U_FAILURE(status)) { return; }
419 U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
420 const UnitPreferenceMetadata *m = metadata_[idx];
421 outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
422 preferenceCount = m->prefsCount;
423 }
424
425 } // namespace units
426 U_NAMESPACE_END
427
428 #endif /* #if !UCONFIG_NO_FORMATTING */
429