1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "bytesinkutil.h"
9 #include "cstring.h"
10 #include "number_decimalquantity.h"
11 #include "resource.h"
12 #include "uassert.h"
13 #include "unicode/locid.h"
14 #include "unicode/unistr.h"
15 #include "unicode/ures.h"
16 #include "units_data.h"
17 #include "uresimp.h"
18 #include "util.h"
19 #include <utility>
20
21 U_NAMESPACE_BEGIN
22 namespace units {
23
24 namespace {
25
26 using icu::number::impl::DecimalQuantity;
27
trimSpaces(CharString & factor,UErrorCode & status)28 void trimSpaces(CharString& factor, UErrorCode& status){
29 CharString trimmed;
30 for (int i = 0 ; i < factor.length(); i++) {
31 if (factor[i] == ' ') continue;
32
33 trimmed.append(factor[i], status);
34 }
35
36 factor = std::move(trimmed);
37 }
38
39 /**
40 * A ResourceSink that collects conversion rate information.
41 *
42 * This class is for use by ures_getAllItemsWithFallback.
43 */
44 class ConversionRateDataSink : public ResourceSink {
45 public:
46 /**
47 * Constructor.
48 * @param out The vector to which ConversionRateInfo instances are to be
49 * added. This vector must outlive the use of the ResourceSink.
50 */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)51 explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
52
53 /**
54 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
55 * conversion rates that are found in `value` to the output vector.
56 *
57 * @param source This string must be "convertUnits": the resource that this
58 * class supports reading.
59 * @param value The "convertUnits" resource, containing unit conversion rate
60 * information.
61 * @param noFallback Ignored.
62 * @param status The standard ICU error code output parameter.
63 */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)64 void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
65 if (U_FAILURE(status)) { return; }
66 if (uprv_strcmp(source, "convertUnits") != 0) {
67 // This is very strict, however it is the cheapest way to be sure
68 // that with `value`, we're looking at the convertUnits table.
69 status = U_ILLEGAL_ARGUMENT_ERROR;
70 return;
71 }
72 ResourceTable conversionRateTable = value.getTable(status);
73 const char *srcUnit;
74 // We're reusing `value`, which seems to be a common pattern:
75 for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
76 ResourceTable unitTable = value.getTable(status);
77 const char *key;
78 UnicodeString baseUnit = ICU_Utility::makeBogusString();
79 UnicodeString factor = ICU_Utility::makeBogusString();
80 UnicodeString offset = ICU_Utility::makeBogusString();
81 for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
82 if (uprv_strcmp(key, "target") == 0) {
83 baseUnit = value.getUnicodeString(status);
84 } else if (uprv_strcmp(key, "factor") == 0) {
85 factor = value.getUnicodeString(status);
86 } else if (uprv_strcmp(key, "offset") == 0) {
87 offset = value.getUnicodeString(status);
88 }
89 }
90 if (U_FAILURE(status)) { return; }
91 if (baseUnit.isBogus() || factor.isBogus()) {
92 // We could not find a usable conversion rate: bad resource.
93 status = U_MISSING_RESOURCE_ERROR;
94 return;
95 }
96
97 // We don't have this ConversionRateInfo yet: add it.
98 ConversionRateInfo *cr = outVector->emplaceBack();
99 if (!cr) {
100 status = U_MEMORY_ALLOCATION_ERROR;
101 return;
102 } else {
103 cr->sourceUnit.append(srcUnit, status);
104 cr->baseUnit.appendInvariantChars(baseUnit, status);
105 cr->factor.appendInvariantChars(factor, status);
106 trimSpaces(cr->factor, status);
107 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
108 }
109 }
110 return;
111 }
112
113 private:
114 MaybeStackVector<ConversionRateInfo> *outVector;
115 };
116
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)117 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
118 return a.compareTo(b) < 0;
119 }
120
121 /**
122 * A ResourceSink that collects unit preferences information.
123 *
124 * This class is for use by ures_getAllItemsWithFallback.
125 */
126 class UnitPreferencesSink : public ResourceSink {
127 public:
128 /**
129 * Constructor.
130 * @param outPrefs The vector to which UnitPreference instances are to be
131 * added. This vector must outlive the use of the ResourceSink.
132 * @param outMetadata The vector to which UnitPreferenceMetadata instances
133 * are to be added. This vector must outlive the use of the ResourceSink.
134 */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)135 explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
136 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
137 : preferences(outPrefs), metadata(outMetadata) {}
138
139 /**
140 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
141 * preferences info that are found in `value` to the output vector.
142 *
143 * @param source This string must be "unitPreferenceData": the resource that
144 * this class supports reading.
145 * @param value The "unitPreferenceData" resource, containing unit
146 * preferences data.
147 * @param noFallback Ignored.
148 * @param status The standard ICU error code output parameter. Note: if an
149 * error is returned, outPrefs and outMetadata may be inconsistent.
150 */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)151 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
152 if (U_FAILURE(status)) { return; }
153 if (uprv_strcmp(key, "unitPreferenceData") != 0) {
154 // This is very strict, however it is the cheapest way to be sure
155 // that with `value`, we're looking at the convertUnits table.
156 status = U_ILLEGAL_ARGUMENT_ERROR;
157 return;
158 }
159 // The unitPreferenceData structure (see data/misc/units.txt) contains a
160 // hierarchy of category/usage/region, within which are a set of
161 // preferences. Hence three for-loops and another loop for the
162 // preferences themselves:
163 ResourceTable unitPreferenceDataTable = value.getTable(status);
164 const char *category;
165 for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
166 ResourceTable categoryTable = value.getTable(status);
167 const char *usage;
168 for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
169 ResourceTable regionTable = value.getTable(status);
170 const char *region;
171 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
172 // `value` now contains the set of preferences for
173 // category/usage/region.
174 ResourceArray unitPrefs = value.getArray(status);
175 if (U_FAILURE(status)) { return; }
176 int32_t prefLen = unitPrefs.getSize();
177
178 // Update metadata for this set of preferences.
179 UnitPreferenceMetadata *meta = metadata->emplaceBack(
180 category, usage, region, preferences->length(), prefLen, status);
181 if (!meta) {
182 status = U_MEMORY_ALLOCATION_ERROR;
183 return;
184 }
185 if (U_FAILURE(status)) { return; }
186 if (metadata->length() > 1) {
187 // Verify that unit preferences are sorted and
188 // without duplicates.
189 if (!(*(*metadata)[metadata->length() - 2] <
190 *(*metadata)[metadata->length() - 1])) {
191 status = U_INVALID_FORMAT_ERROR;
192 return;
193 }
194 }
195
196 // Collect the individual preferences.
197 for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
198 UnitPreference *up = preferences->emplaceBack();
199 if (!up) {
200 status = U_MEMORY_ALLOCATION_ERROR;
201 return;
202 }
203 ResourceTable unitPref = value.getTable(status);
204 if (U_FAILURE(status)) { return; }
205 for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
206 if (uprv_strcmp(key, "unit") == 0) {
207 int32_t length;
208 const UChar *u = value.getString(length, status);
209 up->unit.appendInvariantChars(u, length, status);
210 } else if (uprv_strcmp(key, "geq") == 0) {
211 int32_t length;
212 const UChar *g = value.getString(length, status);
213 CharString geq;
214 geq.appendInvariantChars(g, length, status);
215 DecimalQuantity dq;
216 dq.setToDecNumber(geq.data(), status);
217 up->geq = dq.toDouble();
218 } else if (uprv_strcmp(key, "skeleton") == 0) {
219 up->skeleton = value.getUnicodeString(status);
220 }
221 }
222 }
223 }
224 }
225 }
226 }
227
228 private:
229 MaybeStackVector<UnitPreference> *preferences;
230 MaybeStackVector<UnitPreferenceMetadata> *metadata;
231 };
232
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)233 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
234 const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
235 bool *foundRegion, UErrorCode &status) {
236 if (U_FAILURE(status)) { return -1; }
237 int32_t start = 0;
238 int32_t end = metadata->length();
239 *foundCategory = false;
240 *foundUsage = false;
241 *foundRegion = false;
242 while (start < end) {
243 int32_t mid = (start + end) / 2;
244 int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
245 if (cmp < 0) {
246 start = mid + 1;
247 } else if (cmp > 0) {
248 end = mid;
249 } else {
250 return mid;
251 }
252 }
253 return -1;
254 }
255
256 /**
257 * Finds the UnitPreferenceMetadata instance that matches the given category,
258 * usage and region: if missing, region falls back to "001", and usage
259 * repeatedly drops tailing components, eventually trying "default"
260 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
261 *
262 * @param metadata The full list of UnitPreferenceMetadata instances.
263 * @param category The category to search for. See getUnitCategory().
264 * @param usage The usage for which formatting preferences is needed. If the
265 * given usage is not known, automatic fallback occurs, see function description
266 * above.
267 * @param region The region for which preferences are needed. If there are no
268 * region-specific preferences, this function automatically falls back to the
269 * "001" region (global).
270 * @param status The standard ICU error code output parameter.
271 * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
272 * * If fallback to "default" or "001" didn't resolve, status will be
273 * U_MISSING_RESOURCE.
274 * @return The index into the metadata vector which represents the appropriate
275 * preferences. If appropriate preferences are not found, -1 is returned.
276 */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)277 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
278 StringPiece category, StringPiece usage, StringPiece region,
279 UErrorCode &status) {
280 if (U_FAILURE(status)) { return -1; }
281 bool foundCategory, foundUsage, foundRegion;
282 UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
283 int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
284 if (U_FAILURE(status)) { return -1; }
285 if (idx >= 0) { return idx; }
286 if (!foundCategory) {
287 // TODO: failures can happen if units::getUnitCategory returns a category
288 // that does not appear in unitPreferenceData. Do we want a unit test that
289 // checks unitPreferenceData has full coverage of categories? Or just trust
290 // CLDR?
291 status = U_ILLEGAL_ARGUMENT_ERROR;
292 return -1;
293 }
294 U_ASSERT(foundCategory);
295 while (!foundUsage) {
296 int32_t lastDashIdx = desired.usage.lastIndexOf('-');
297 if (lastDashIdx > 0) {
298 desired.usage.truncate(lastDashIdx);
299 } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
300 desired.usage.truncate(0).append("default", status);
301 } else {
302 // "default" is not supposed to be missing for any valid category.
303 status = U_MISSING_RESOURCE_ERROR;
304 return -1;
305 }
306 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
307 if (U_FAILURE(status)) { return -1; }
308 }
309 U_ASSERT(foundCategory);
310 U_ASSERT(foundUsage);
311 if (!foundRegion) {
312 if (uprv_strcmp(desired.region.data(), "001") != 0) {
313 desired.region.truncate(0).append("001", status);
314 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
315 }
316 if (!foundRegion) {
317 // "001" is not supposed to be missing for any valid usage.
318 status = U_MISSING_RESOURCE_ERROR;
319 return -1;
320 }
321 }
322 U_ASSERT(foundCategory);
323 U_ASSERT(foundUsage);
324 U_ASSERT(foundRegion);
325 U_ASSERT(idx >= 0);
326 return idx;
327 }
328
329 } // namespace
330
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)331 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
332 StringPiece region, int32_t prefsOffset,
333 int32_t prefsCount, UErrorCode &status) {
334 this->category.append(category, status);
335 this->usage.append(usage, status);
336 this->region.append(region, status);
337 this->prefsOffset = prefsOffset;
338 this->prefsCount = prefsCount;
339 }
340
compareTo(const UnitPreferenceMetadata & other) const341 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
342 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
343 if (cmp == 0) {
344 cmp = uprv_strcmp(usage.data(), other.usage.data());
345 }
346 if (cmp == 0) {
347 cmp = uprv_strcmp(region.data(), other.region.data());
348 }
349 return cmp;
350 }
351
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const352 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
353 bool *foundUsage, bool *foundRegion) const {
354 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
355 if (cmp == 0) {
356 *foundCategory = true;
357 cmp = uprv_strcmp(usage.data(), other.usage.data());
358 }
359 if (cmp == 0) {
360 *foundUsage = true;
361 cmp = uprv_strcmp(region.data(), other.region.data());
362 }
363 if (cmp == 0) {
364 *foundRegion = true;
365 }
366 return cmp;
367 }
368
369 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)370 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
371 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
372 ConversionRateDataSink sink(&result);
373 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
374 }
375
extractConversionInfo(StringPiece source,UErrorCode & status) const376 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
377 UErrorCode &status) const {
378 for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
379 if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
380 }
381
382 status = U_INTERNAL_PROGRAM_ERROR;
383 return nullptr;
384 }
385
UnitPreferences(UErrorCode & status)386 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
387 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
388 UnitPreferencesSink sink(&unitPrefs_, &metadata_);
389 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
390 }
391
getKeyWordValue(const Locale & locale,StringPiece kw,UErrorCode & status)392 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
393 CharString result;
394 if (U_FAILURE(status)) { return result; }
395 {
396 CharStringByteSink sink(&result);
397 locale.getKeywordValue(kw, sink, status);
398 }
399 if (U_SUCCESS(status) && result.isEmpty()) {
400 status = U_MISSING_RESOURCE_ERROR;
401 }
402 return result;
403 }
404
405 MaybeStackVector<UnitPreference>
getPreferencesFor(StringPiece category,StringPiece usage,const Locale & locale,UErrorCode & status) const406 U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
407 const Locale &locale, UErrorCode &status) const {
408
409 MaybeStackVector<UnitPreference> result;
410
411 // TODO: remove this once all the categories are allowed.
412 UErrorCode internalMuStatus = U_ZERO_ERROR;
413 if (category.compare("temperature") == 0) {
414 CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
415 if (U_SUCCESS(internalMuStatus)) {
416 // TODO: use the unit category as Java especially when all the categories are allowed..
417 if (localeUnitCharString == "celsius" //
418 || localeUnitCharString == "fahrenheit" //
419 || localeUnitCharString == "kelvin" //
420 ) {
421 UnitPreference unitPref;
422 unitPref.unit.append(localeUnitCharString, status);
423 result.emplaceBackAndCheckErrorCode(status, unitPref);
424 return result;
425 }
426 }
427 }
428
429 CharString region(locale.getCountry(), status);
430
431 // Check the locale system tag, e.g `ms=metric`.
432 UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
433 CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
434 bool isLocaleSystem = false;
435 if (U_SUCCESS(internalMeasureTagStatus)) {
436 if (localeSystem == "metric") {
437 region.clear();
438 region.append("001", status);
439 isLocaleSystem = true;
440 } else if (localeSystem == "ussystem") {
441 region.clear();
442 region.append("US", status);
443 isLocaleSystem = true;
444 } else if (localeSystem == "uksystem") {
445 region.clear();
446 region.append("GB", status);
447 isLocaleSystem = true;
448 }
449 }
450
451 // Check the region tag, e.g. `rg=uszzz`.
452 if (!isLocaleSystem) {
453 UErrorCode internalRgTagStatus = U_ZERO_ERROR;
454 CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus);
455 if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) {
456 if (localeRegion == "default") {
457 region.clear();
458 region.append(localeRegion, status);
459 } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') {
460 region.clear();
461 region.append(localeRegion.data(), 3, status);
462 } else {
463 // Take the first two character and capitalize them.
464 region.clear();
465 region.append(uprv_toupper(localeRegion[0]), status);
466 region.append(uprv_toupper(localeRegion[1]), status);
467 }
468 }
469 }
470
471 int32_t idx =
472 getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
473 if (U_FAILURE(status)) {
474 return result;
475 }
476
477 U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
478 const UnitPreferenceMetadata *m = metadata_[idx];
479 for (int32_t i = 0; i < m->prefsCount; i++) {
480 result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
481 }
482 return result;
483 }
484
485 } // namespace units
486 U_NAMESPACE_END
487
488 #endif /* #if !UCONFIG_NO_FORMATTING */
489