1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "cstring.h"
9 #include "number_decimalquantity.h"
10 #include "resource.h"
11 #include "uassert.h"
12 #include "unicode/unistr.h"
13 #include "unicode/ures.h"
14 #include "units_data.h"
15 #include "uresimp.h"
16 #include "util.h"
17 #include <utility>
18
19 U_NAMESPACE_BEGIN
20 namespace units {
21
22 namespace {
23
24 using icu::number::impl::DecimalQuantity;
25
trimSpaces(CharString & factor,UErrorCode & status)26 void trimSpaces(CharString& factor, UErrorCode& status){
27 CharString trimmed;
28 for (int i = 0 ; i < factor.length(); i++) {
29 if (factor[i] == ' ') continue;
30
31 trimmed.append(factor[i], status);
32 }
33
34 factor = std::move(trimmed);
35 }
36
37 /**
38 * A ResourceSink that collects conversion rate information.
39 *
40 * This class is for use by ures_getAllItemsWithFallback.
41 */
42 class ConversionRateDataSink : public ResourceSink {
43 public:
44 /**
45 * Constructor.
46 * @param out The vector to which ConversionRateInfo instances are to be
47 * added. This vector must outlive the use of the ResourceSink.
48 */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)49 explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
50
51 /**
52 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
53 * conversion rates that are found in `value` to the output vector.
54 *
55 * @param source This string must be "convertUnits": the resource that this
56 * class supports reading.
57 * @param value The "convertUnits" resource, containing unit conversion rate
58 * information.
59 * @param noFallback Ignored.
60 * @param status The standard ICU error code output parameter.
61 */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)62 void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
63 if (U_FAILURE(status)) { return; }
64 if (uprv_strcmp(source, "convertUnits") != 0) {
65 // This is very strict, however it is the cheapest way to be sure
66 // that with `value`, we're looking at the convertUnits table.
67 status = U_ILLEGAL_ARGUMENT_ERROR;
68 return;
69 }
70 ResourceTable conversionRateTable = value.getTable(status);
71 const char *srcUnit;
72 // We're reusing `value`, which seems to be a common pattern:
73 for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
74 ResourceTable unitTable = value.getTable(status);
75 const char *key;
76 UnicodeString baseUnit = ICU_Utility::makeBogusString();
77 UnicodeString factor = ICU_Utility::makeBogusString();
78 UnicodeString offset = ICU_Utility::makeBogusString();
79 for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
80 if (uprv_strcmp(key, "target") == 0) {
81 baseUnit = value.getUnicodeString(status);
82 } else if (uprv_strcmp(key, "factor") == 0) {
83 factor = value.getUnicodeString(status);
84 } else if (uprv_strcmp(key, "offset") == 0) {
85 offset = value.getUnicodeString(status);
86 }
87 }
88 if (U_FAILURE(status)) { return; }
89 if (baseUnit.isBogus() || factor.isBogus()) {
90 // We could not find a usable conversion rate: bad resource.
91 status = U_MISSING_RESOURCE_ERROR;
92 return;
93 }
94
95 // We don't have this ConversionRateInfo yet: add it.
96 ConversionRateInfo *cr = outVector->emplaceBack();
97 if (!cr) {
98 status = U_MEMORY_ALLOCATION_ERROR;
99 return;
100 } else {
101 cr->sourceUnit.append(srcUnit, status);
102 cr->baseUnit.appendInvariantChars(baseUnit, status);
103 cr->factor.appendInvariantChars(factor, status);
104 trimSpaces(cr->factor, status);
105 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
106 }
107 }
108 return;
109 }
110
111 private:
112 MaybeStackVector<ConversionRateInfo> *outVector;
113 };
114
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)115 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
116 return a.compareTo(b) < 0;
117 }
118
119 /**
120 * A ResourceSink that collects unit preferences information.
121 *
122 * This class is for use by ures_getAllItemsWithFallback.
123 */
124 class UnitPreferencesSink : public ResourceSink {
125 public:
126 /**
127 * Constructor.
128 * @param outPrefs The vector to which UnitPreference instances are to be
129 * added. This vector must outlive the use of the ResourceSink.
130 * @param outMetadata The vector to which UnitPreferenceMetadata instances
131 * are to be added. This vector must outlive the use of the ResourceSink.
132 */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)133 explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
134 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
135 : preferences(outPrefs), metadata(outMetadata) {}
136
137 /**
138 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
139 * preferences info that are found in `value` to the output vector.
140 *
141 * @param source This string must be "unitPreferenceData": the resource that
142 * this class supports reading.
143 * @param value The "unitPreferenceData" resource, containing unit
144 * preferences data.
145 * @param noFallback Ignored.
146 * @param status The standard ICU error code output parameter. Note: if an
147 * error is returned, outPrefs and outMetadata may be inconsistent.
148 */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)149 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
150 if (U_FAILURE(status)) { return; }
151 if (uprv_strcmp(key, "unitPreferenceData") != 0) {
152 // This is very strict, however it is the cheapest way to be sure
153 // that with `value`, we're looking at the convertUnits table.
154 status = U_ILLEGAL_ARGUMENT_ERROR;
155 return;
156 }
157 // The unitPreferenceData structure (see data/misc/units.txt) contains a
158 // hierarchy of category/usage/region, within which are a set of
159 // preferences. Hence three for-loops and another loop for the
160 // preferences themselves:
161 ResourceTable unitPreferenceDataTable = value.getTable(status);
162 const char *category;
163 for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
164 ResourceTable categoryTable = value.getTable(status);
165 const char *usage;
166 for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
167 ResourceTable regionTable = value.getTable(status);
168 const char *region;
169 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
170 // `value` now contains the set of preferences for
171 // category/usage/region.
172 ResourceArray unitPrefs = value.getArray(status);
173 if (U_FAILURE(status)) { return; }
174 int32_t prefLen = unitPrefs.getSize();
175
176 // Update metadata for this set of preferences.
177 UnitPreferenceMetadata *meta = metadata->emplaceBack(
178 category, usage, region, preferences->length(), prefLen, status);
179 if (!meta) {
180 status = U_MEMORY_ALLOCATION_ERROR;
181 return;
182 }
183 if (U_FAILURE(status)) { return; }
184 if (metadata->length() > 1) {
185 // Verify that unit preferences are sorted and
186 // without duplicates.
187 if (!(*(*metadata)[metadata->length() - 2] <
188 *(*metadata)[metadata->length() - 1])) {
189 status = U_INVALID_FORMAT_ERROR;
190 return;
191 }
192 }
193
194 // Collect the individual preferences.
195 for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
196 UnitPreference *up = preferences->emplaceBack();
197 if (!up) {
198 status = U_MEMORY_ALLOCATION_ERROR;
199 return;
200 }
201 ResourceTable unitPref = value.getTable(status);
202 if (U_FAILURE(status)) { return; }
203 for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
204 if (uprv_strcmp(key, "unit") == 0) {
205 int32_t length;
206 const UChar *u = value.getString(length, status);
207 up->unit.appendInvariantChars(u, length, status);
208 } else if (uprv_strcmp(key, "geq") == 0) {
209 int32_t length;
210 const UChar *g = value.getString(length, status);
211 CharString geq;
212 geq.appendInvariantChars(g, length, status);
213 DecimalQuantity dq;
214 dq.setToDecNumber(geq.data(), status);
215 up->geq = dq.toDouble();
216 } else if (uprv_strcmp(key, "skeleton") == 0) {
217 up->skeleton = value.getUnicodeString(status);
218 }
219 }
220 }
221 }
222 }
223 }
224 }
225
226 private:
227 MaybeStackVector<UnitPreference> *preferences;
228 MaybeStackVector<UnitPreferenceMetadata> *metadata;
229 };
230
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)231 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
232 const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
233 bool *foundRegion, UErrorCode &status) {
234 if (U_FAILURE(status)) { return -1; }
235 int32_t start = 0;
236 int32_t end = metadata->length();
237 *foundCategory = false;
238 *foundUsage = false;
239 *foundRegion = false;
240 while (start < end) {
241 int32_t mid = (start + end) / 2;
242 int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
243 if (cmp < 0) {
244 start = mid + 1;
245 } else if (cmp > 0) {
246 end = mid;
247 } else {
248 return mid;
249 }
250 }
251 return -1;
252 }
253
254 /**
255 * Finds the UnitPreferenceMetadata instance that matches the given category,
256 * usage and region: if missing, region falls back to "001", and usage
257 * repeatedly drops tailing components, eventually trying "default"
258 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
259 *
260 * @param metadata The full list of UnitPreferenceMetadata instances.
261 * @param category The category to search for. See getUnitCategory().
262 * @param usage The usage for which formatting preferences is needed. If the
263 * given usage is not known, automatic fallback occurs, see function description
264 * above.
265 * @param region The region for which preferences are needed. If there are no
266 * region-specific preferences, this function automatically falls back to the
267 * "001" region (global).
268 * @param status The standard ICU error code output parameter.
269 * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
270 * * If fallback to "default" or "001" didn't resolve, status will be
271 * U_MISSING_RESOURCE.
272 * @return The index into the metadata vector which represents the appropriate
273 * preferences. If appropriate preferences are not found, -1 is returned.
274 */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)275 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
276 StringPiece category, StringPiece usage, StringPiece region,
277 UErrorCode &status) {
278 if (U_FAILURE(status)) { return -1; }
279 bool foundCategory, foundUsage, foundRegion;
280 UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
281 int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
282 if (U_FAILURE(status)) { return -1; }
283 if (idx >= 0) { return idx; }
284 if (!foundCategory) {
285 // TODO: failures can happen if units::getUnitCategory returns a category
286 // that does not appear in unitPreferenceData. Do we want a unit test that
287 // checks unitPreferenceData has full coverage of categories? Or just trust
288 // CLDR?
289 status = U_ILLEGAL_ARGUMENT_ERROR;
290 return -1;
291 }
292 U_ASSERT(foundCategory);
293 while (!foundUsage) {
294 int32_t lastDashIdx = desired.usage.lastIndexOf('-');
295 if (lastDashIdx > 0) {
296 desired.usage.truncate(lastDashIdx);
297 } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
298 desired.usage.truncate(0).append("default", status);
299 } else {
300 // "default" is not supposed to be missing for any valid category.
301 status = U_MISSING_RESOURCE_ERROR;
302 return -1;
303 }
304 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
305 if (U_FAILURE(status)) { return -1; }
306 }
307 U_ASSERT(foundCategory);
308 U_ASSERT(foundUsage);
309 if (!foundRegion) {
310 if (uprv_strcmp(desired.region.data(), "001") != 0) {
311 desired.region.truncate(0).append("001", status);
312 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
313 }
314 if (!foundRegion) {
315 // "001" is not supposed to be missing for any valid usage.
316 status = U_MISSING_RESOURCE_ERROR;
317 return -1;
318 }
319 }
320 U_ASSERT(foundCategory);
321 U_ASSERT(foundUsage);
322 U_ASSERT(foundRegion);
323 U_ASSERT(idx >= 0);
324 return idx;
325 }
326
327 } // namespace
328
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)329 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
330 StringPiece region, int32_t prefsOffset,
331 int32_t prefsCount, UErrorCode &status) {
332 this->category.append(category, status);
333 this->usage.append(usage, status);
334 this->region.append(region, status);
335 this->prefsOffset = prefsOffset;
336 this->prefsCount = prefsCount;
337 }
338
compareTo(const UnitPreferenceMetadata & other) const339 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
340 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
341 if (cmp == 0) {
342 cmp = uprv_strcmp(usage.data(), other.usage.data());
343 }
344 if (cmp == 0) {
345 cmp = uprv_strcmp(region.data(), other.region.data());
346 }
347 return cmp;
348 }
349
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const350 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
351 bool *foundUsage, bool *foundRegion) const {
352 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
353 if (cmp == 0) {
354 *foundCategory = true;
355 cmp = uprv_strcmp(usage.data(), other.usage.data());
356 }
357 if (cmp == 0) {
358 *foundUsage = true;
359 cmp = uprv_strcmp(region.data(), other.region.data());
360 }
361 if (cmp == 0) {
362 *foundRegion = true;
363 }
364 return cmp;
365 }
366
367 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)368 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
369 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
370 ConversionRateDataSink sink(&result);
371 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
372 }
373
extractConversionInfo(StringPiece source,UErrorCode & status) const374 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
375 UErrorCode &status) const {
376 for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
377 if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
378 }
379
380 status = U_INTERNAL_PROGRAM_ERROR;
381 return nullptr;
382 }
383
UnitPreferences(UErrorCode & status)384 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
385 LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
386 UnitPreferencesSink sink(&unitPrefs_, &metadata_);
387 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
388 }
389
390 // TODO: make outPreferences const?
391 //
392 // TODO: consider replacing `UnitPreference **&outPreferences` with slice class
393 // of some kind.
getPreferencesFor(StringPiece category,StringPiece usage,StringPiece region,const UnitPreference * const * & outPreferences,int32_t & preferenceCount,UErrorCode & status) const394 void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
395 StringPiece region,
396 const UnitPreference *const *&outPreferences,
397 int32_t &preferenceCount, UErrorCode &status) const {
398 int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
399 if (U_FAILURE(status)) {
400 outPreferences = nullptr;
401 preferenceCount = 0;
402 return;
403 }
404 U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
405 const UnitPreferenceMetadata *m = metadata_[idx];
406 outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
407 preferenceCount = m->prefsCount;
408 }
409
410 } // namespace units
411 U_NAMESPACE_END
412
413 #endif /* #if !UCONFIG_NO_FORMATTING */
414