• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "bytesinkutil.h"
9 #include "cstring.h"
10 #include "number_decimalquantity.h"
11 #include "resource.h"
12 #include "uassert.h"
13 #include "unicode/locid.h"
14 #include "unicode/unistr.h"
15 #include "unicode/ures.h"
16 #include "units_data.h"
17 #include "uresimp.h"
18 #include "util.h"
19 #include <utility>
20 
21 U_NAMESPACE_BEGIN
22 namespace units {
23 
24 namespace {
25 
26 using icu::number::impl::DecimalQuantity;
27 
trimSpaces(CharString & factor,UErrorCode & status)28 void trimSpaces(CharString& factor, UErrorCode& status){
29    CharString trimmed;
30    for (int i = 0 ; i < factor.length(); i++) {
31        if (factor[i] == ' ') continue;
32 
33        trimmed.append(factor[i], status);
34    }
35 
36    factor = std::move(trimmed);
37 }
38 
39 /**
40  * A ResourceSink that collects conversion rate information.
41  *
42  * This class is for use by ures_getAllItemsWithFallback.
43  */
44 class ConversionRateDataSink : public ResourceSink {
45   public:
46     /**
47      * Constructor.
48      * @param out The vector to which ConversionRateInfo instances are to be
49      * added. This vector must outlive the use of the ResourceSink.
50      */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)51     explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
52 
53     /**
54      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
55      * conversion rates that are found in `value` to the output vector.
56      *
57      * @param source This string must be "convertUnits": the resource that this
58      * class supports reading.
59      * @param value The "convertUnits" resource, containing unit conversion rate
60      * information.
61      * @param noFallback Ignored.
62      * @param status The standard ICU error code output parameter.
63      */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)64     void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
65         if (U_FAILURE(status)) { return; }
66         if (uprv_strcmp(source, "convertUnits") != 0) {
67             // This is very strict, however it is the cheapest way to be sure
68             // that with `value`, we're looking at the convertUnits table.
69             status = U_ILLEGAL_ARGUMENT_ERROR;
70             return;
71         }
72         ResourceTable conversionRateTable = value.getTable(status);
73         const char *srcUnit;
74         // We're reusing `value`, which seems to be a common pattern:
75         for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
76             ResourceTable unitTable = value.getTable(status);
77             const char *key;
78             UnicodeString baseUnit = ICU_Utility::makeBogusString();
79             UnicodeString factor = ICU_Utility::makeBogusString();
80             UnicodeString offset = ICU_Utility::makeBogusString();
81             for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
82                 if (uprv_strcmp(key, "target") == 0) {
83                     baseUnit = value.getUnicodeString(status);
84                 } else if (uprv_strcmp(key, "factor") == 0) {
85                     factor = value.getUnicodeString(status);
86                 } else if (uprv_strcmp(key, "offset") == 0) {
87                     offset = value.getUnicodeString(status);
88                 }
89             }
90             if (U_FAILURE(status)) { return; }
91             if (baseUnit.isBogus() || factor.isBogus()) {
92                 // We could not find a usable conversion rate: bad resource.
93                 status = U_MISSING_RESOURCE_ERROR;
94                 return;
95             }
96 
97             // We don't have this ConversionRateInfo yet: add it.
98             ConversionRateInfo *cr = outVector->emplaceBack();
99             if (!cr) {
100                 status = U_MEMORY_ALLOCATION_ERROR;
101                 return;
102             } else {
103                 cr->sourceUnit.append(srcUnit, status);
104                 cr->baseUnit.appendInvariantChars(baseUnit, status);
105                 cr->factor.appendInvariantChars(factor, status);
106                 trimSpaces(cr->factor, status);
107                 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
108             }
109         }
110         return;
111     }
112 
113   private:
114     MaybeStackVector<ConversionRateInfo> *outVector;
115 };
116 
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)117 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
118     return a.compareTo(b) < 0;
119 }
120 
121 /**
122  * A ResourceSink that collects unit preferences information.
123  *
124  * This class is for use by ures_getAllItemsWithFallback.
125  */
126 class UnitPreferencesSink : public ResourceSink {
127   public:
128     /**
129      * Constructor.
130      * @param outPrefs The vector to which UnitPreference instances are to be
131      * added. This vector must outlive the use of the ResourceSink.
132      * @param outMetadata  The vector to which UnitPreferenceMetadata instances
133      * are to be added. This vector must outlive the use of the ResourceSink.
134      */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)135     explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
136                                  MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
137         : preferences(outPrefs), metadata(outMetadata) {}
138 
139     /**
140      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
141      * preferences info that are found in `value` to the output vector.
142      *
143      * @param source This string must be "unitPreferenceData": the resource that
144      * this class supports reading.
145      * @param value The "unitPreferenceData" resource, containing unit
146      * preferences data.
147      * @param noFallback Ignored.
148      * @param status The standard ICU error code output parameter. Note: if an
149      * error is returned, outPrefs and outMetadata may be inconsistent.
150      */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)151     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
152         if (U_FAILURE(status)) { return; }
153         if (uprv_strcmp(key, "unitPreferenceData") != 0) {
154             // This is very strict, however it is the cheapest way to be sure
155             // that with `value`, we're looking at the convertUnits table.
156             status = U_ILLEGAL_ARGUMENT_ERROR;
157             return;
158         }
159         // The unitPreferenceData structure (see data/misc/units.txt) contains a
160         // hierarchy of category/usage/region, within which are a set of
161         // preferences. Hence three for-loops and another loop for the
162         // preferences themselves:
163         ResourceTable unitPreferenceDataTable = value.getTable(status);
164         const char *category;
165         for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
166             ResourceTable categoryTable = value.getTable(status);
167             const char *usage;
168             for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
169                 ResourceTable regionTable = value.getTable(status);
170                 const char *region;
171                 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
172                     // `value` now contains the set of preferences for
173                     // category/usage/region.
174                     ResourceArray unitPrefs = value.getArray(status);
175                     if (U_FAILURE(status)) { return; }
176                     int32_t prefLen = unitPrefs.getSize();
177 
178                     // Update metadata for this set of preferences.
179                     UnitPreferenceMetadata *meta = metadata->emplaceBack(
180                         category, usage, region, preferences->length(), prefLen, status);
181                     if (!meta) {
182                         status = U_MEMORY_ALLOCATION_ERROR;
183                         return;
184                     }
185                     if (U_FAILURE(status)) { return; }
186                     if (metadata->length() > 1) {
187                         // Verify that unit preferences are sorted and
188                         // without duplicates.
189                         if (!(*(*metadata)[metadata->length() - 2] <
190                               *(*metadata)[metadata->length() - 1])) {
191                             status = U_INVALID_FORMAT_ERROR;
192                             return;
193                         }
194                     }
195 
196                     // Collect the individual preferences.
197                     for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
198                         UnitPreference *up = preferences->emplaceBack();
199                         if (!up) {
200                             status = U_MEMORY_ALLOCATION_ERROR;
201                             return;
202                         }
203                         ResourceTable unitPref = value.getTable(status);
204                         if (U_FAILURE(status)) { return; }
205                         for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
206                             if (uprv_strcmp(key, "unit") == 0) {
207                                 int32_t length;
208                                 const UChar *u = value.getString(length, status);
209                                 up->unit.appendInvariantChars(u, length, status);
210                             } else if (uprv_strcmp(key, "geq") == 0) {
211                                 int32_t length;
212                                 const UChar *g = value.getString(length, status);
213                                 CharString geq;
214                                 geq.appendInvariantChars(g, length, status);
215                                 DecimalQuantity dq;
216                                 dq.setToDecNumber(geq.data(), status);
217                                 up->geq = dq.toDouble();
218                             } else if (uprv_strcmp(key, "skeleton") == 0) {
219                                 up->skeleton = value.getUnicodeString(status);
220                             }
221                         }
222                     }
223                 }
224             }
225         }
226     }
227 
228   private:
229     MaybeStackVector<UnitPreference> *preferences;
230     MaybeStackVector<UnitPreferenceMetadata> *metadata;
231 };
232 
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)233 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
234                      const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
235                      bool *foundRegion, UErrorCode &status) {
236     if (U_FAILURE(status)) { return -1; }
237     int32_t start = 0;
238     int32_t end = metadata->length();
239     *foundCategory = false;
240     *foundUsage = false;
241     *foundRegion = false;
242     while (start < end) {
243         int32_t mid = (start + end) / 2;
244         int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
245         if (cmp < 0) {
246             start = mid + 1;
247         } else if (cmp > 0) {
248             end = mid;
249         } else {
250             return mid;
251         }
252     }
253     return -1;
254 }
255 
256 /**
257  * Finds the UnitPreferenceMetadata instance that matches the given category,
258  * usage and region: if missing, region falls back to "001", and usage
259  * repeatedly drops tailing components, eventually trying "default"
260  * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
261  *
262  * @param metadata The full list of UnitPreferenceMetadata instances.
263  * @param category The category to search for. See getUnitCategory().
264  * @param usage The usage for which formatting preferences is needed. If the
265  * given usage is not known, automatic fallback occurs, see function description
266  * above.
267  * @param region The region for which preferences are needed. If there are no
268  * region-specific preferences, this function automatically falls back to the
269  * "001" region (global).
270  * @param status The standard ICU error code output parameter.
271  *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
272  *   * If fallback to "default" or "001" didn't resolve, status will be
273  *     U_MISSING_RESOURCE.
274  * @return The index into the metadata vector which represents the appropriate
275  * preferences. If appropriate preferences are not found, -1 is returned.
276  */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)277 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
278                                    StringPiece category, StringPiece usage, StringPiece region,
279                                    UErrorCode &status) {
280     if (U_FAILURE(status)) { return -1; }
281     bool foundCategory, foundUsage, foundRegion;
282     UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
283     int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
284     if (U_FAILURE(status)) { return -1; }
285     if (idx >= 0) { return idx; }
286     if (!foundCategory) {
287         // TODO: failures can happen if units::getUnitCategory returns a category
288         // that does not appear in unitPreferenceData. Do we want a unit test that
289         // checks unitPreferenceData has full coverage of categories? Or just trust
290         // CLDR?
291         status = U_ILLEGAL_ARGUMENT_ERROR;
292         return -1;
293     }
294     U_ASSERT(foundCategory);
295     while (!foundUsage) {
296         int32_t lastDashIdx = desired.usage.lastIndexOf('-');
297         if (lastDashIdx > 0) {
298             desired.usage.truncate(lastDashIdx);
299         } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
300             desired.usage.truncate(0).append("default", status);
301         } else {
302             // "default" is not supposed to be missing for any valid category.
303             status = U_MISSING_RESOURCE_ERROR;
304             return -1;
305         }
306         idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
307         if (U_FAILURE(status)) { return -1; }
308     }
309     U_ASSERT(foundCategory);
310     U_ASSERT(foundUsage);
311     if (!foundRegion) {
312         if (uprv_strcmp(desired.region.data(), "001") != 0) {
313             desired.region.truncate(0).append("001", status);
314             idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
315         }
316         if (!foundRegion) {
317             // "001" is not supposed to be missing for any valid usage.
318             status = U_MISSING_RESOURCE_ERROR;
319             return -1;
320         }
321     }
322     U_ASSERT(foundCategory);
323     U_ASSERT(foundUsage);
324     U_ASSERT(foundRegion);
325     U_ASSERT(idx >= 0);
326     return idx;
327 }
328 
329 } // namespace
330 
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)331 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
332                                                StringPiece region, int32_t prefsOffset,
333                                                int32_t prefsCount, UErrorCode &status) {
334     this->category.append(category, status);
335     this->usage.append(usage, status);
336     this->region.append(region, status);
337     this->prefsOffset = prefsOffset;
338     this->prefsCount = prefsCount;
339 }
340 
compareTo(const UnitPreferenceMetadata & other) const341 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
342     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
343     if (cmp == 0) {
344         cmp = uprv_strcmp(usage.data(), other.usage.data());
345     }
346     if (cmp == 0) {
347         cmp = uprv_strcmp(region.data(), other.region.data());
348     }
349     return cmp;
350 }
351 
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const352 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
353                                           bool *foundUsage, bool *foundRegion) const {
354     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
355     if (cmp == 0) {
356         *foundCategory = true;
357         cmp = uprv_strcmp(usage.data(), other.usage.data());
358     }
359     if (cmp == 0) {
360         *foundUsage = true;
361         cmp = uprv_strcmp(region.data(), other.region.data());
362     }
363     if (cmp == 0) {
364         *foundRegion = true;
365     }
366     return cmp;
367 }
368 
369 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)370 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
371     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
372     ConversionRateDataSink sink(&result);
373     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
374 }
375 
extractConversionInfo(StringPiece source,UErrorCode & status) const376 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
377                                                                  UErrorCode &status) const {
378     for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
379         if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
380     }
381 
382     status = U_INTERNAL_PROGRAM_ERROR;
383     return nullptr;
384 }
385 
UnitPreferences(UErrorCode & status)386 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
387     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
388     UnitPreferencesSink sink(&unitPrefs_, &metadata_);
389     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
390 }
391 
getKeyWordValue(const Locale & locale,StringPiece kw,UErrorCode & status)392 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
393     CharString result;
394     if (U_FAILURE(status)) { return result; }
395     {
396         CharStringByteSink sink(&result);
397         locale.getKeywordValue(kw, sink, status);
398     }
399     if (U_SUCCESS(status) && result.isEmpty()) {
400         status = U_MISSING_RESOURCE_ERROR;
401     }
402     return result;
403 }
404 
405 MaybeStackVector<UnitPreference>
getPreferencesFor(StringPiece category,StringPiece usage,const Locale & locale,UErrorCode & status) const406     U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
407                                                   const Locale &locale, UErrorCode &status) const {
408 
409     MaybeStackVector<UnitPreference> result;
410 
411     // TODO: remove this once all the categories are allowed.
412     UErrorCode internalMuStatus = U_ZERO_ERROR;
413     if (category.compare("temperature") == 0) {
414         CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
415         if (U_SUCCESS(internalMuStatus)) {
416             // TODO: use the unit category as Java especially when all the categories are allowed..
417             if (localeUnitCharString == "celsius"       //
418                 || localeUnitCharString == "fahrenheit" //
419                 || localeUnitCharString == "kelvin"     //
420             ) {
421                 UnitPreference unitPref;
422                 unitPref.unit.append(localeUnitCharString, status);
423                 result.emplaceBackAndCheckErrorCode(status, unitPref);
424                 return result;
425             }
426         }
427     }
428 
429     CharString region(locale.getCountry(), status);
430 
431     // Check the locale system tag, e.g `ms=metric`.
432     UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
433     CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
434     bool isLocaleSystem = false;
435     if (U_SUCCESS(internalMeasureTagStatus)) {
436         if (localeSystem == "metric") {
437             region.clear();
438             region.append("001", status);
439             isLocaleSystem = true;
440         } else if (localeSystem == "ussystem") {
441             region.clear();
442             region.append("US", status);
443             isLocaleSystem = true;
444         } else if (localeSystem == "uksystem") {
445             region.clear();
446             region.append("GB", status);
447             isLocaleSystem = true;
448         }
449     }
450 
451     // Check the region tag, e.g. `rg=uszzz`.
452     if (!isLocaleSystem) {
453         UErrorCode internalRgTagStatus = U_ZERO_ERROR;
454         CharString localeRegion = getKeyWordValue(locale, "rg", internalRgTagStatus);
455         if (U_SUCCESS(internalRgTagStatus) && localeRegion.length() >= 3) {
456             if (localeRegion == "default") {
457                 region.clear();
458                 region.append(localeRegion, status);
459             } else if (localeRegion[0] >= '0' && localeRegion[0] <= '9') {
460                 region.clear();
461                 region.append(localeRegion.data(), 3, status);
462             } else {
463                 // Take the first two character and capitalize them.
464                 region.clear();
465                 region.append(uprv_toupper(localeRegion[0]), status);
466                 region.append(uprv_toupper(localeRegion[1]), status);
467             }
468         }
469     }
470 
471     int32_t idx =
472         getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
473     if (U_FAILURE(status)) {
474         return result;
475     }
476 
477     U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
478     const UnitPreferenceMetadata *m = metadata_[idx];
479     for (int32_t i = 0; i < m->prefsCount; i++) {
480         result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
481     }
482     return result;
483 }
484 
485 } // namespace units
486 U_NAMESPACE_END
487 
488 #endif /* #if !UCONFIG_NO_FORMATTING */
489